diff options
author | Gabe Black <gblack@eecs.umich.edu> | 2006-10-12 10:58:45 -0400 |
---|---|---|
committer | Gabe Black <gblack@eecs.umich.edu> | 2006-10-12 10:58:45 -0400 |
commit | 866cfaf9dc596d8547e14bc2133fb962776572a7 (patch) | |
tree | 19b82a8021533e8bc2e35f14fb0b6a0440756814 | |
parent | 6a31898a88a9ecced399ccf50636831c21d4a75e (diff) | |
parent | 78aec04b660544ea7af80d76912b4422c4426602 (diff) | |
download | gem5-866cfaf9dc596d8547e14bc2133fb962776572a7.tar.xz |
Merge zizzer.eecs.umich.edu:/bk/newmem
into zeep.eecs.umich.edu:/home/gblack/m5/newmem
--HG--
extra : convert_revision : 30b2475ba034550376455e1bc0e52e19a200fd5a
171 files changed, 5677 insertions, 5149 deletions
diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py index 67a1e5735..470dc8867 100644 --- a/configs/common/FSConfig.py +++ b/configs/common/FSConfig.py @@ -30,7 +30,6 @@ import m5 from m5 import makeList from m5.objects import * from Benchmarks import * -from FullO3Config import * class CowIdeDisk(IdeDisk): image = CowDiskImage(child=RawDiskImage(read_only=True), diff --git a/configs/example/fs.py b/configs/example/fs.py index 71c5961ef..460fb68fb 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -49,10 +49,14 @@ parser.add_option("--dual", action="store_true", parser.add_option("-b", "--benchmark", action="store", type="string", dest="benchmark", help="Specify the benchmark to run. Available benchmarks: %s"\ - % DefinedBenchmarks) + % DefinedBenchmarks) parser.add_option("--etherdump", action="store", type="string", dest="etherdump", - help="Specify the filename to dump a pcap capture of the ethernet" - "traffic") + help="Specify the filename to dump a pcap capture of the" \ + "ethernet traffic") +parser.add_option("--checkpoint_dir", action="store", type="string", + help="Place all checkpoints in this absolute directory") +parser.add_option("-c", "--checkpoint", action="store", type="int", + help="restore from checkpoint <N>") (options, args) = parser.parse_args() @@ -61,8 +65,8 @@ if args: sys.exit(1) if options.detailed: - cpu = DetailedO3CPU() - cpu2 = DetailedO3CPU() + cpu = DerivO3CPU() + cpu2 = DerivO3CPU() mem_mode = 'timing' elif options.timing: cpu = TimingSimpleCPU() @@ -75,6 +79,8 @@ else: cpu.clock = '2GHz' cpu2.clock = '2GHz' +cpu.cpu_id = 0 +cpu2.cpu_id = 0 if options.benchmark: if options.benchmark not in Benchmarks: @@ -111,6 +117,31 @@ else: m5.instantiate(root) +if options.checkpoint: + from os.path import isdir + from os import listdir, getcwd + import re + if options.checkpoint_dir: + cptdir = options.checkpoint_dir + else: + cptdir = getcwd() + + if not isdir(cptdir): + m5.panic("checkpoint dir %s does not exist!" % cptdir) + + dirs = listdir(cptdir) + expr = re.compile('cpt.([0-9]*)') + cpts = [] + for dir in dirs: + match = expr.match(dir) + if match: + cpts.append(match.group(1)) + + if options.checkpoint > len(cpts): + m5.panic('Checkpoint %d not found' % options.checkpoint) + + m5.restoreCheckpoint(root, "/".join([cptdir, "cpt.%s" % cpts[options.checkpoint - 1]])) + if options.maxtick: maxtick = options.maxtick elif options.maxtime: @@ -123,7 +154,14 @@ else: exit_event = m5.simulate(maxtick) while exit_event.getCause() == "checkpoint": - m5.checkpoint(root, "cpt.%d") - exit_event = m5.simulate(maxtick - m5.curTick()) + if options.checkpoint_dir: + m5.checkpoint(root, "/".join([options.checkpoint_dir, "cpt.%d"])) + else: + m5.checkpoint(root, "cpt.%d") + + if maxtick == -1: + exit_event = m5.simulate(maxtick) + else: + exit_event = m5.simulate(maxtick - m5.curTick()) print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() diff --git a/configs/example/se.py b/configs/example/se.py index de8b6c890..6a941b9da 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -34,7 +34,6 @@ import m5 from m5.objects import * import os, optparse, sys m5.AddToPath('../common') -from FullO3Config import * parser = optparse.OptionParser() @@ -86,11 +85,12 @@ if options.detailed: if options.timing: cpu = TimingSimpleCPU() elif options.detailed: - cpu = DetailedO3CPU() + cpu = DerivO3CPU() else: cpu = AtomicSimpleCPU() cpu.workload = process +cpu.cpu_id = 0 system = System(cpu = cpu, physmem = PhysicalMemory(), diff --git a/src/arch/SConscript b/src/arch/SConscript index 59cea6211..dda1dea53 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -50,6 +50,7 @@ isa_switch_hdrs = Split(''' arguments.hh faults.hh isa_traits.hh + locked_mem.hh process.hh regfile.hh stacktrace.hh diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa index 4fc9da3f3..5bd19b677 100644 --- a/src/arch/alpha/isa/decoder.isa +++ b/src/arch/alpha/isa/decoder.isa @@ -701,7 +701,7 @@ decode OPCODE default Unknown::unknown() { 0x00: decode PALFUNC { format EmulatedCallPal { 0x00: halt ({{ - exitSimLoop(curTick, "halt instruction encountered"); + exitSimLoop("halt instruction encountered"); }}, IsNonSpeculative); 0x83: callsys({{ xc->syscall(R0); diff --git a/src/arch/alpha/locked_mem.hh b/src/arch/alpha/locked_mem.hh new file mode 100644 index 000000000..368ea2895 --- /dev/null +++ b/src/arch/alpha/locked_mem.hh @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + */ + +#ifndef __ARCH_ALPHA_LOCKED_MEM_HH__ +#define __ARCH_ALPHA_LOCKED_MEM_HH__ + +/** + * @file + * + * ISA-specific helper functions for locked memory accesses. + */ + +#include "arch/isa_traits.hh" +#include "base/misc.hh" +#include "mem/request.hh" + + +namespace AlphaISA +{ +template <class XC> +inline void +handleLockedRead(XC *xc, Request *req) +{ + xc->setMiscReg(Lock_Addr_DepTag, req->getPaddr() & ~0xf); + xc->setMiscReg(Lock_Flag_DepTag, true); +} + + +template <class XC> +inline bool +handleLockedWrite(XC *xc, Request *req) +{ + if (req->isUncacheable()) { + // Funky Turbolaser mailbox access...don't update + // result register (see stq_c in decoder.isa) + req->setScResult(2); + } else { + // standard store conditional + bool lock_flag = xc->readMiscReg(Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(Lock_Addr_DepTag); + if (!lock_flag || (req->getPaddr() & ~0xf) != lock_addr) { + // Lock flag not set or addr mismatch in CPU; + // don't even bother sending to memory system + req->setScResult(0); + xc->setMiscReg(Lock_Flag_DepTag, false); + // the rest of this code is not architectural; + // it's just a debugging aid to help detect + // livelock by warning on long sequences of failed + // store conditionals + int stCondFailures = xc->readStCondFailures(); + stCondFailures++; + xc->setStCondFailures(stCondFailures); + if (stCondFailures % 100000 == 0) { + warn("cpu %d: %d consecutive " + "store conditional failures\n", + xc->readCpuId(), stCondFailures); + } + + // store conditional failed already, so don't issue it to mem + return false; + } + } + + return true; +} + + +} // namespace AlphaISA + +#endif diff --git a/src/arch/mips/locked_mem.hh b/src/arch/mips/locked_mem.hh new file mode 100644 index 000000000..363cf1e90 --- /dev/null +++ b/src/arch/mips/locked_mem.hh @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + */ + +#ifndef __ARCH_MIPS_LOCKED_MEM_HH__ +#define __ARCH_MIPS_LOCKED_MEM_HH__ + +/** + * @file + * + * ISA-specific helper functions for locked memory accesses. + */ + +#include "mem/request.hh" + + +namespace MipsISA +{ +template <class XC> +inline void +handleLockedRead(XC *xc, Request *req) +{ +} + + +template <class XC> +inline bool +handleLockedWrite(XC *xc, Request *req) +{ + return true; +} + + +} // namespace MipsISA + +#endif diff --git a/src/arch/sparc/locked_mem.hh b/src/arch/sparc/locked_mem.hh new file mode 100644 index 000000000..291b2f422 --- /dev/null +++ b/src/arch/sparc/locked_mem.hh @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + */ + +#ifndef __ARCH_SPARC_LOCKED_MEM_HH__ +#define __ARCH_SPARC_LOCKED_MEM_HH__ + +/** + * @file + * + * ISA-specific helper functions for locked memory accesses. + */ + +#include "mem/request.hh" + + +namespace SparcISA +{ +template <class XC> +inline void +handleLockedRead(XC *xc, Request *req) +{ +} + + +template <class XC> +inline bool +handleLockedWrite(XC *xc, Request *req) +{ + return true; +} + + +} // namespace SparcISA + +#endif diff --git a/src/arch/sparc/system.cc b/src/arch/sparc/system.cc index 63cbbe057..ef6443d17 100644 --- a/src/arch/sparc/system.cc +++ b/src/arch/sparc/system.cc @@ -152,10 +152,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SparcSystem) Param<std::string> readfile; Param<unsigned int> init_param; - Param<bool> bin; - VectorParam<std::string> binned_fns; - Param<bool> bin_int; - END_DECLARE_SIM_OBJECT_PARAMS(SparcSystem) BEGIN_INIT_SIM_OBJECT_PARAMS(SparcSystem) @@ -173,10 +169,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SparcSystem) INIT_PARAM_DFLT(readfile, "file to read startup script from", ""), INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0), INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 34), - INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10), - INIT_PARAM_DFLT(bin, "is this system to be binned", false), - INIT_PARAM(binned_fns, "functions to be broken down and binned"), - INIT_PARAM_DFLT(bin_int, "is interrupt code binned seperately?", true) + INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10) END_INIT_SIM_OBJECT_PARAMS(SparcSystem) @@ -196,9 +189,6 @@ CREATE_SIM_OBJECT(SparcSystem) p->readfile = readfile; p->system_type = system_type; p->system_rev = system_rev; - p->bin = bin; - p->binned_fns = binned_fns; - p->bin_int = bin_int; return new SparcSystem(p); } diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 8e8153b68..f871ce35f 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -58,6 +58,7 @@ baseFlags = [ 'BusAddrRanges', 'BusBridge', 'Cache', + 'CachePort', 'Chains', 'Checker', 'Clock', @@ -112,6 +113,7 @@ baseFlags = [ 'IdeDisk', 'InstExec', 'Interrupt', + 'LLSC', 'LSQ', 'LSQUnit', 'Loader', diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 2bb9a2399..5771a7904 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -158,6 +158,7 @@ if 'O3CPU' in env['CPU_MODELS']: o3/scoreboard.cc o3/store_set.cc ''') + sources += Split('memtest/memtest.cc') if env['USE_CHECKER']: sources += Split('o3/checker_builder.cc') else: diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 513dd7c55..ea4b03bf2 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -41,6 +41,7 @@ #include "cpu/cpuevent.hh" #include "cpu/thread_context.hh" #include "cpu/profile.hh" +#include "sim/sim_exit.hh" #include "sim/param.hh" #include "sim/process.hh" #include "sim/sim_events.hh" @@ -125,8 +126,9 @@ BaseCPU::BaseCPU(Params *p) // if (p->max_insts_any_thread != 0) for (int i = 0; i < number_of_threads; ++i) - new SimLoopExitEvent(comInstEventQueue[i], p->max_insts_any_thread, - "a thread reached the max instruction count"); + schedExitSimLoop("a thread reached the max instruction count", + p->max_insts_any_thread, 0, + comInstEventQueue[i]); if (p->max_insts_all_threads != 0) { // allocate & initialize shared downcounter: each event will @@ -150,8 +152,9 @@ BaseCPU::BaseCPU(Params *p) // if (p->max_loads_any_thread != 0) for (int i = 0; i < number_of_threads; ++i) - new SimLoopExitEvent(comLoadEventQueue[i], p->max_loads_any_thread, - "a thread reached the max load count"); + schedExitSimLoop("a thread reached the max load count", + p->max_loads_any_thread, 0, + comLoadEventQueue[i]); if (p->max_loads_all_threads != 0) { // allocate & initialize shared downcounter: each event will diff --git a/src/cpu/base.hh b/src/cpu/base.hh index e02527371..75e0d86af 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -140,8 +140,8 @@ class BaseCPU : public MemObject bool functionTrace; Tick functionTraceStart; System *system; -#if FULL_SYSTEM int cpu_id; +#if FULL_SYSTEM Tick profile; #endif Tick progress_interval; diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index f2109e88d..d6cdff5c5 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -193,7 +193,7 @@ BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags) // note this is a local, not BaseDynInst::fault Fault trans_fault = cpu->translateDataReadReq(req); - if (trans_fault == NoFault && !(req->flags & UNCACHEABLE)) { + if (trans_fault == NoFault && !(req->isUncacheable())) { // It's a valid address to cacheable space. Record key MemReq // parameters so we can generate another one just like it for // the timing access without calling translate() again (which diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index 1540a6b94..f6d56eef6 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -175,7 +175,7 @@ CheckerCPU::read(Addr addr, T &data, unsigned flags) pkt->dataStatic(&data); - if (!(memReq->getFlags() & UNCACHEABLE)) { + if (!(memReq->isUncacheable())) { // Access memory to see if we have the same data dcachePort->sendFunctional(pkt); } else { @@ -251,9 +251,9 @@ CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) // This is because the LSQ would have to be snooped in the CPU to // verify this data. if (unverifiedReq && - !(unverifiedReq->getFlags() & UNCACHEABLE) && - (!(unverifiedReq->getFlags() & LOCKED) || - ((unverifiedReq->getFlags() & LOCKED) && + !(unverifiedReq->isUncacheable()) && + (!(unverifiedReq->isLocked()) || + ((unverifiedReq->isLocked()) && unverifiedReq->getScResult() == 1))) { T inst_data; /* diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index 8c0186dae..b2806d40b 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -133,7 +133,7 @@ class CheckerThreadContext : public ThreadContext void takeOverFrom(ThreadContext *oldContext) { actualTC->takeOverFrom(oldContext); - checkerTC->takeOverFrom(oldContext); + checkerTC->copyState(oldContext); } void regStats(const std::string &name) { actualTC->regStats(name); } diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 7ea9eaefc..f42f0f8e2 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -38,86 +38,158 @@ #include "base/misc.hh" #include "base/statistics.hh" -#include "cpu/simple_thread.hh" +//#include "cpu/simple_thread.hh" #include "cpu/memtest/memtest.hh" -#include "mem/cache/base_cache.hh" +//#include "mem/cache/base_cache.hh" +//#include "mem/physical.hh" #include "sim/builder.hh" #include "sim/sim_events.hh" #include "sim/stats.hh" +#include "mem/packet.hh" +#include "mem/request.hh" +#include "mem/port.hh" +#include "mem/mem_object.hh" using namespace std; -using namespace TheISA; int TESTER_ALLOCATOR=0; +bool +MemTest::CpuPort::recvTiming(Packet *pkt) +{ + memtest->completeRequest(pkt); + return true; +} + +Tick +MemTest::CpuPort::recvAtomic(Packet *pkt) +{ + panic("MemTest doesn't expect recvAtomic callback!"); + return curTick; +} + +void +MemTest::CpuPort::recvFunctional(Packet *pkt) +{ + //Do nothing if we see one come through + if (curTick != 0)//Supress warning durring initialization + warn("Functional Writes not implemented in MemTester\n"); + //Need to find any response values that intersect and update + return; +} + +void +MemTest::CpuPort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("MemTest doesn't expect recvStatusChange callback!"); +} + +void +MemTest::CpuPort::recvRetry() +{ + memtest->doRetry(); +} + +void +MemTest::sendPkt(Packet *pkt) { + if (atomic) { + cachePort.sendAtomic(pkt); + pkt->makeAtomicResponse(); + completeRequest(pkt); + } + else if (!cachePort.sendTiming(pkt)) { + accessRetry = true; + retryPkt = pkt; + } + +} + MemTest::MemTest(const string &name, - MemInterface *_cache_interface, - FunctionalMemory *main_mem, - FunctionalMemory *check_mem, +// MemInterface *_cache_interface, +// PhysicalMemory *main_mem, +// PhysicalMemory *check_mem, unsigned _memorySize, unsigned _percentReads, - unsigned _percentCopies, +// unsigned _percentCopies, unsigned _percentUncacheable, unsigned _progressInterval, unsigned _percentSourceUnaligned, unsigned _percentDestUnaligned, Addr _traceAddr, - Counter _max_loads) - : SimObject(name), + Counter _max_loads, + bool _atomic) + : MemObject(name), tickEvent(this), - cacheInterface(_cache_interface), - mainMem(main_mem), - checkMem(check_mem), + cachePort("test", this), + funcPort("functional", this), + retryPkt(NULL), +// mainMem(main_mem), +// checkMem(check_mem), size(_memorySize), percentReads(_percentReads), - percentCopies(_percentCopies), +// percentCopies(_percentCopies), percentUncacheable(_percentUncacheable), progressInterval(_progressInterval), nextProgressMessage(_progressInterval), percentSourceUnaligned(_percentSourceUnaligned), percentDestUnaligned(percentDestUnaligned), - maxLoads(_max_loads) + maxLoads(_max_loads), + atomic(_atomic) { vector<string> cmd; cmd.push_back("/bin/ls"); vector<string> null_vec; - thread = new SimpleThread(NULL, 0, mainMem, 0); - - blockSize = cacheInterface->getBlockSize(); - blockAddrMask = blockSize - 1; - traceBlockAddr = blockAddr(_traceAddr); - - //setup data storage with interesting values - uint8_t *data1 = new uint8_t[size]; - uint8_t *data2 = new uint8_t[size]; - uint8_t *data3 = new uint8_t[size]; - memset(data1, 1, size); - memset(data2, 2, size); - memset(data3, 3, size); + // thread = new SimpleThread(NULL, 0, NULL, 0, mainMem); curTick = 0; + // Needs to be masked off once we know the block size. + traceBlockAddr = _traceAddr; baseAddr1 = 0x100000; baseAddr2 = 0x400000; uncacheAddr = 0x800000; - // set up intial memory contents here - mainMem->prot_write(baseAddr1, data1, size); - checkMem->prot_write(baseAddr1, data1, size); - mainMem->prot_write(baseAddr2, data2, size); - checkMem->prot_write(baseAddr2, data2, size); - mainMem->prot_write(uncacheAddr, data3, size); - checkMem->prot_write(uncacheAddr, data3, size); - - delete [] data1; - delete [] data2; - delete [] data3; - // set up counters noResponseCycles = 0; numReads = 0; tickEvent.schedule(0); id = TESTER_ALLOCATOR++; + if (TESTER_ALLOCATOR > 8) + panic("False sharing memtester only allows up to 8 testers"); + + accessRetry = false; +} + +Port * +MemTest::getPort(const std::string &if_name, int idx) +{ + if (if_name == "functional") + return &funcPort; + else if (if_name == "test") + return &cachePort; + else + panic("No Such Port\n"); +} + +void +MemTest::init() +{ + // By the time init() is called, the ports should be hooked up. + blockSize = cachePort.peerBlockSize(); + blockAddrMask = blockSize - 1; + traceBlockAddr = blockAddr(traceBlockAddr); + + // set up intial memory contents here + + cachePort.memsetBlob(baseAddr1, 1, size); + funcPort.memsetBlob(baseAddr1, 1, size); + cachePort.memsetBlob(baseAddr2, 2, size); + funcPort.memsetBlob(baseAddr2, 2, size); + cachePort.memsetBlob(uncacheAddr, 3, size); + funcPort.memsetBlob(uncacheAddr, 3, size); } static void @@ -132,23 +204,31 @@ printData(ostream &os, uint8_t *data, int nbytes) } void -MemTest::completeRequest(MemReqPtr &req, uint8_t *data) +MemTest::completeRequest(Packet *pkt) { + MemTestSenderState *state = + dynamic_cast<MemTestSenderState *>(pkt->senderState); + + uint8_t *data = state->data; + uint8_t *pkt_data = pkt->getPtr<uint8_t>(); + Request *req = pkt->req; + //Remove the address from the list of outstanding - std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->paddr); + std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->getPaddr()); assert(removeAddr != outstandingAddrs.end()); outstandingAddrs.erase(removeAddr); - switch (req->cmd) { - case Read: - if (memcmp(req->data, data, req->size) != 0) { - cerr << name() << ": on read of 0x" << hex << req->paddr - << " (0x" << hex << blockAddr(req->paddr) << ")" + switch (pkt->cmd) { + case Packet::ReadResp: + + if (memcmp(pkt_data, data, pkt->getSize()) != 0) { + cerr << name() << ": on read of 0x" << hex << req->getPaddr() + << " (0x" << hex << blockAddr(req->getPaddr()) << ")" << "@ cycle " << dec << curTick << ", cache returns 0x"; - printData(cerr, req->data, req->size); + printData(cerr, pkt_data, pkt->getSize()); cerr << ", expected 0x"; - printData(cerr, data, req->size); + printData(cerr, data, pkt->getSize()); cerr << endl; fatal(""); } @@ -163,13 +243,13 @@ MemTest::completeRequest(MemReqPtr &req, uint8_t *data) } if (numReads >= maxLoads) - SimExit(curTick, "Maximum number of loads reached!"); + exitSimLoop("Maximum number of loads reached!"); break; - case Write: + case Packet::WriteResp: numWritesStat++; break; - +/* case Copy: //Also remove dest from outstanding list removeAddr = outstandingAddrs.find(req->dest); @@ -177,36 +257,37 @@ MemTest::completeRequest(MemReqPtr &req, uint8_t *data) outstandingAddrs.erase(removeAddr); numCopiesStat++; break; - +*/ default: panic("invalid command"); } - if (blockAddr(req->paddr) == traceBlockAddr) { + if (blockAddr(req->getPaddr()) == traceBlockAddr) { cerr << name() << ": completed " - << (req->cmd.isWrite() ? "write" : "read") + << (pkt->isWrite() ? "write" : "read") << " access of " - << dec << req->size << " bytes at address 0x" - << hex << req->paddr - << " (0x" << hex << blockAddr(req->paddr) << ")" + << dec << pkt->getSize() << " bytes at address 0x" + << hex << req->getPaddr() + << " (0x" << hex << blockAddr(req->getPaddr()) << ")" << ", value = 0x"; - printData(cerr, req->data, req->size); + printData(cerr, pkt_data, pkt->getSize()); cerr << " @ cycle " << dec << curTick; cerr << endl; } noResponseCycles = 0; + delete state; delete [] data; + delete pkt->req; + delete pkt; } - void MemTest::regStats() { using namespace Stats; - numReadsStat .name(name() + ".num_reads") .desc("number of read accesses completed") @@ -234,7 +315,7 @@ MemTest::tick() fatal(""); } - if (cacheInterface->isBlocked()) { + if (accessRetry) { return; } @@ -248,30 +329,30 @@ MemTest::tick() //If we aren't doing copies, use id as offset, and do a false sharing //mem tester - if (percentCopies == 0) { - //We can eliminate the lower bits of the offset, and then use the id - //to offset within the blks - offset &= ~63; //Not the low order bits - offset += id; - access_size = 0; - } + //We can eliminate the lower bits of the offset, and then use the id + //to offset within the blks + offset &= ~63; //Not the low order bits + offset += id; + access_size = 0; - MemReqPtr req = new MemReq(); + Request *req = new Request(); + uint32_t flags = 0; + Addr paddr; if (cacheable < percentUncacheable) { - req->flags |= UNCACHEABLE; - req->paddr = uncacheAddr + offset; + flags |= UNCACHEABLE; + paddr = uncacheAddr + offset; } else { - req->paddr = ((base) ? baseAddr1 : baseAddr2) + offset; + paddr = ((base) ? baseAddr1 : baseAddr2) + offset; } - // bool probe = (random() % 2 == 1) && !req->isUncacheable(); + //bool probe = (random() % 2 == 1) && !req->isUncacheable(); bool probe = false; - req->size = 1 << access_size; - req->data = new uint8_t[req->size]; - req->paddr &= ~(req->size - 1); - req->time = curTick; - req->xc = thread->getProxy(); + paddr &= ~((1 << access_size) - 1); + req->setPhys(paddr, 1 << access_size, flags); + req->setThreadContext(id,0); + + uint8_t *result = new uint8_t[8]; if (cmd < percentReads) { // read @@ -279,60 +360,75 @@ MemTest::tick() //For now we only allow one outstanding request per addreess per tester //This means we assume CPU does write forwarding to reads that alias something //in the cpu store buffer. - if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; - else outstandingAddrs.insert(req->paddr); + if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(paddr); + + // ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin + funcPort.readBlob(req->getPaddr(), result, req->getSize()); - req->cmd = Read; - uint8_t *result = new uint8_t[8]; - checkMem->access(Read, req->paddr, result, req->size); - if (blockAddr(req->paddr) == traceBlockAddr) { + if (blockAddr(paddr) == traceBlockAddr) { cerr << name() << ": initiating read " << ((probe) ? "probe of " : "access of ") - << dec << req->size << " bytes from addr 0x" - << hex << req->paddr - << " (0x" << hex << blockAddr(req->paddr) << ")" + << dec << req->getSize() << " bytes from addr 0x" + << hex << paddr + << " (0x" << hex << blockAddr(paddr) << ")" << " at cycle " << dec << curTick << endl; } + + Packet *pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + pkt->dataDynamicArray(new uint8_t[req->getSize()]); + MemTestSenderState *state = new MemTestSenderState(result); + pkt->senderState = state; + if (probe) { - cacheInterface->probeAndUpdate(req); - completeRequest(req, result); + cachePort.sendFunctional(pkt); + completeRequest(pkt); } else { - req->completionEvent = new MemCompleteEvent(req, result, this); - cacheInterface->access(req); +// req->completionEvent = new MemCompleteEvent(req, result, this); + sendPkt(pkt); } - } else if (cmd < (100 - percentCopies)){ + } else { // write //For now we only allow one outstanding request per addreess per tester //This means we assume CPU does write forwarding to reads that alias something //in the cpu store buffer. - if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; - else outstandingAddrs.insert(req->paddr); + if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(paddr); - req->cmd = Write; - memcpy(req->data, &data, req->size); - checkMem->access(Write, req->paddr, req->data, req->size); - if (blockAddr(req->paddr) == traceBlockAddr) { +/* + if (blockAddr(req->getPaddr()) == traceBlockAddr) { cerr << name() << ": initiating write " << ((probe)?"probe of ":"access of ") - << dec << req->size << " bytes (value = 0x"; - printData(cerr, req->data, req->size); + << dec << req->getSize() << " bytes (value = 0x"; + printData(cerr, data_pkt->getPtr(), req->getSize()); cerr << ") to addr 0x" - << hex << req->paddr - << " (0x" << hex << blockAddr(req->paddr) << ")" + << hex << req->getPaddr() + << " (0x" << hex << blockAddr(req->getPaddr()) << ")" << " at cycle " << dec << curTick << endl; } +*/ + Packet *pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + uint8_t *pkt_data = new uint8_t[req->getSize()]; + pkt->dataDynamicArray(pkt_data); + memcpy(pkt_data, &data, req->getSize()); + MemTestSenderState *state = new MemTestSenderState(result); + pkt->senderState = state; + + funcPort.writeBlob(req->getPaddr(), pkt_data, req->getSize()); + if (probe) { - cacheInterface->probeAndUpdate(req); - completeRequest(req, NULL); + cachePort.sendFunctional(pkt); + completeRequest(pkt); } else { - req->completionEvent = new MemCompleteEvent(req, NULL, this); - cacheInterface->access(req); +// req->completionEvent = new MemCompleteEvent(req, NULL, this); + sendPkt(pkt); } - } else { + } +/* else { // copy unsigned source_align = random() % 100; unsigned dest_align = random() % 100; @@ -369,56 +465,51 @@ MemTest::tick() << " (0x" << hex << blockAddr(dest) << ")" << " at cycle " << dec << curTick << endl; - } + }* cacheInterface->access(req); uint8_t result[blockSize]; checkMem->access(Read, source, &result, blockSize); checkMem->access(Write, dest, &result, blockSize); } +*/ } - void -MemCompleteEvent::process() -{ - tester->completeRequest(req, data); - delete this; -} - - -const char * -MemCompleteEvent::description() +MemTest::doRetry() { - return "memory access completion"; + if (cachePort.sendTiming(retryPkt)) { + accessRetry = false; + retryPkt = NULL; + } } - BEGIN_DECLARE_SIM_OBJECT_PARAMS(MemTest) - SimObjectParam<BaseCache *> cache; - SimObjectParam<FunctionalMemory *> main_mem; - SimObjectParam<FunctionalMemory *> check_mem; +// SimObjectParam<BaseCache *> cache; +// SimObjectParam<PhysicalMemory *> main_mem; +// SimObjectParam<PhysicalMemory *> check_mem; Param<unsigned> memory_size; Param<unsigned> percent_reads; - Param<unsigned> percent_copies; +// Param<unsigned> percent_copies; Param<unsigned> percent_uncacheable; Param<unsigned> progress_interval; Param<unsigned> percent_source_unaligned; Param<unsigned> percent_dest_unaligned; Param<Addr> trace_addr; Param<Counter> max_loads; + Param<bool> atomic; END_DECLARE_SIM_OBJECT_PARAMS(MemTest) BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest) - INIT_PARAM(cache, "L1 cache"), - INIT_PARAM(main_mem, "hierarchical memory"), - INIT_PARAM(check_mem, "check memory"), +// INIT_PARAM(cache, "L1 cache"), +// INIT_PARAM(main_mem, "hierarchical memory"), +// INIT_PARAM(check_mem, "check memory"), INIT_PARAM(memory_size, "memory size"), INIT_PARAM(percent_reads, "target read percentage"), - INIT_PARAM(percent_copies, "target copy percentage"), +// INIT_PARAM(percent_copies, "target copy percentage"), INIT_PARAM(percent_uncacheable, "target uncacheable percentage"), INIT_PARAM(progress_interval, "progress report interval (in accesses)"), INIT_PARAM(percent_source_unaligned, @@ -426,18 +517,19 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest) INIT_PARAM(percent_dest_unaligned, "percent of copy dest address that are unaligned"), INIT_PARAM(trace_addr, "address to trace"), - INIT_PARAM(max_loads, "terminate when we have reached this load count") + INIT_PARAM(max_loads, "terminate when we have reached this load count"), + INIT_PARAM(atomic, "Is the tester testing atomic mode (or timing)") END_INIT_SIM_OBJECT_PARAMS(MemTest) CREATE_SIM_OBJECT(MemTest) { - return new MemTest(getInstanceName(), cache->getInterface(), main_mem, - check_mem, memory_size, percent_reads, percent_copies, + return new MemTest(getInstanceName(), /*cache->getInterface(),*/ /*main_mem,*/ + /*check_mem,*/ memory_size, percent_reads, /*percent_copies,*/ percent_uncacheable, progress_interval, percent_source_unaligned, percent_dest_unaligned, - trace_addr, max_loads); + trace_addr, max_loads, atomic); } REGISTER_SIM_OBJECT("MemTest", MemTest) diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index 42fb235db..5de41f0d8 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -35,31 +35,36 @@ #include <set> #include "base/statistics.hh" -#include "mem/functional/functional.hh" -#include "mem/mem_interface.hh" +//#include "mem/functional/functional.hh" +//#include "mem/mem_interface.hh" #include "sim/eventq.hh" #include "sim/sim_exit.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" +#include "mem/mem_object.hh" +#include "mem/port.hh" -class ThreadContext; -class MemTest : public SimObject +class Packet; +class MemTest : public MemObject { public: MemTest(const std::string &name, - MemInterface *_cache_interface, - FunctionalMemory *main_mem, - FunctionalMemory *check_mem, +// MemInterface *_cache_interface, +// PhysicalMemory *main_mem, +// PhysicalMemory *check_mem, unsigned _memorySize, unsigned _percentReads, - unsigned _percentCopies, +// unsigned _percentCopies, unsigned _percentUncacheable, unsigned _progressInterval, unsigned _percentSourceUnaligned, unsigned _percentDestUnaligned, Addr _traceAddr, - Counter _max_loads); + Counter _max_loads, + bool _atomic); + + virtual void init(); // register statistics virtual void regStats(); @@ -69,6 +74,8 @@ class MemTest : public SimObject // main simulation loop (one cycle) void tick(); + virtual Port *getPort(const std::string &if_name, int idx = -1); + protected: class TickEvent : public Event { @@ -82,16 +89,62 @@ class MemTest : public SimObject }; TickEvent tickEvent; + class CpuPort : public Port + { + + MemTest *memtest; + + public: + + CpuPort(const std::string &_name, MemTest *_memtest) + : Port(_name), memtest(_memtest) + { } + + protected: + + virtual bool recvTiming(Packet *pkt); - MemInterface *cacheInterface; - FunctionalMemory *mainMem; - FunctionalMemory *checkMem; - SimpleThread *thread; + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt); + + virtual void recvStatusChange(Status status); + + virtual void recvRetry(); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } + }; + + CpuPort cachePort; + CpuPort funcPort; + + class MemTestSenderState : public Packet::SenderState + { + public: + /** Constructor. */ + MemTestSenderState(uint8_t *_data) + : data(_data) + { } + + // Hold onto data pointer + uint8_t *data; + }; + +// Request *dataReq; + Packet *retryPkt; +// MemInterface *cacheInterface; +// PhysicalMemory *mainMem; +// PhysicalMemory *checkMem; +// SimpleThread *thread; + + bool accessRetry; unsigned size; // size of testing memory region unsigned percentReads; // target percentage of read accesses - unsigned percentCopies; // target percentage of copy accesses +// unsigned percentCopies; // target percentage of copy accesses unsigned percentUncacheable; int id; @@ -123,34 +176,21 @@ class MemTest : public SimObject uint64_t numReads; uint64_t maxLoads; + + bool atomic; + Stats::Scalar<> numReadsStat; Stats::Scalar<> numWritesStat; Stats::Scalar<> numCopiesStat; // called by MemCompleteEvent::process() - void completeRequest(MemReqPtr &req, uint8_t *data); + void completeRequest(Packet *pkt); - friend class MemCompleteEvent; -}; + void sendPkt(Packet *pkt); + void doRetry(); -class MemCompleteEvent : public Event -{ - MemReqPtr req; - uint8_t *data; - MemTest *tester; - - public: - - MemCompleteEvent(MemReqPtr &_req, uint8_t *_data, MemTest *_tester) - : Event(&mainEventQueue), - req(_req), data(_data), tester(_tester) - { - } - - void process(); - - virtual const char *description(); + friend class MemCompleteEvent; }; #endif // __CPU_MEMTEST_MEMTEST_HH__ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index c80e4d8c1..ecf6ed632 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -342,12 +342,6 @@ DefaultCommit<Impl>::drain() { drainPending = true; - // If it's already drained, return true. - if (rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalDrained(); - return true; - } - return false; } @@ -1218,16 +1212,16 @@ DefaultCommit<Impl>::skidInsert() for (int inst_num = 0; inst_num < fromRename->size; ++inst_num) { DynInstPtr inst = fromRename->insts[inst_num]; - int tid = inst->threadNumber; if (!inst->isSquashed()) { DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ", - "skidBuffer.\n", inst->readPC(), inst->seqNum, tid); + "skidBuffer.\n", inst->readPC(), inst->seqNum, + inst->threadNumber); skidBuffer.push(inst); } else { DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was " "squashed, skipping.\n", - inst->readPC(), inst->seqNum, tid); + inst->readPC(), inst->seqNum, inst->threadNumber); } } } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 7386dfadd..4c9a8e91f 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -88,7 +88,7 @@ FullO3CPU<Impl>::TickEvent::description() template <class Impl> FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent() - : Event(&mainEventQueue, CPU_Tick_Pri) + : Event(&mainEventQueue, CPU_Switch_Pri) { } @@ -135,7 +135,8 @@ void FullO3CPU<Impl>::DeallocateContextEvent::process() { cpu->deactivateThread(tid); - cpu->removeThread(tid); + if (remove) + cpu->removeThread(tid); } template <class Impl> @@ -191,7 +192,11 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) deferRegistration(params->deferRegistration), numThreads(number_of_threads) { - _status = Idle; + if (!deferRegistration) { + _status = Running; + } else { + _status = Idle; + } checker = NULL; @@ -304,6 +309,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) tid, bindRegs); + + activateThreadEvent[tid].init(tid, this); + deallocateContextEvent[tid].init(tid, this); } rename.setRenameMap(renameMap); @@ -447,13 +455,16 @@ FullO3CPU<Impl>::tick() if (!tickEvent.scheduled()) { if (_status == SwitchedOut || getState() == SimObject::Drained) { + DPRINTF(O3CPU, "Switched out!\n"); // increment stat lastRunningCycle = curTick; - } else if (!activityRec.active()) { + } else if (!activityRec.active() || _status == Idle) { + DPRINTF(O3CPU, "Idle!\n"); lastRunningCycle = curTick; timesIdled++; } else { tickEvent.schedule(curTick + cycles(1)); + DPRINTF(O3CPU, "Scheduling next tick!\n"); } } @@ -512,6 +523,8 @@ FullO3CPU<Impl>::activateThread(unsigned tid) list<unsigned>::iterator isActive = find( activeThreads.begin(), activeThreads.end(), tid); + DPRINTF(O3CPU, "[tid:%i]: Calling activate thread.\n", tid); + if (isActive == activeThreads.end()) { DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", tid); @@ -528,6 +541,8 @@ FullO3CPU<Impl>::deactivateThread(unsigned tid) list<unsigned>::iterator thread_it = find(activeThreads.begin(), activeThreads.end(), tid); + DPRINTF(O3CPU, "[tid:%i]: Calling deactivate thread.\n", tid); + if (thread_it != activeThreads.end()) { DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", tid); @@ -548,7 +563,7 @@ FullO3CPU<Impl>::activateContext(int tid, int delay) activateThread(tid); } - if(lastActivatedCycle < curTick) { + if (lastActivatedCycle < curTick) { scheduleTickEvent(delay); // Be sure to signal that there's some activity so the CPU doesn't @@ -563,17 +578,20 @@ FullO3CPU<Impl>::activateContext(int tid, int delay) } template <class Impl> -void -FullO3CPU<Impl>::deallocateContext(int tid, int delay) +bool +FullO3CPU<Impl>::deallocateContext(int tid, bool remove, int delay) { // Schedule removal of thread data from CPU if (delay){ DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " "on cycle %d\n", tid, curTick + cycles(delay)); - scheduleDeallocateContextEvent(tid, delay); + scheduleDeallocateContextEvent(tid, remove, delay); + return false; } else { deactivateThread(tid); - removeThread(tid); + if (remove) + removeThread(tid); + return true; } } @@ -582,8 +600,9 @@ void FullO3CPU<Impl>::suspendContext(int tid) { DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); - deactivateThread(tid); - if (activeThreads.size() == 0) + bool deallocated = deallocateContext(tid, false, 1); + // If this was the last thread then unschedule the tick event. + if ((activeThreads.size() == 1 && !deallocated) || activeThreads.size() == 0) unscheduleTickEvent(); _status = Idle; } @@ -594,7 +613,7 @@ FullO3CPU<Impl>::haltContext(int tid) { //For now, this is the same as deallocate DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); - deallocateContext(tid, 1); + deallocateContext(tid, true, 1); } template <class Impl> @@ -682,10 +701,17 @@ FullO3CPU<Impl>::removeThread(unsigned tid) assert(iew.ldstQueue.getCount(tid) == 0); // Reset ROB/IQ/LSQ Entries + + // Commented out for now. This should be possible to do by + // telling all the pipeline stages to drain first, and then + // checking until the drain completes. Once the pipeline is + // drained, call resetEntries(). - 10-09-06 ktlim +/* if (activeThreads.size() >= 1) { commit.rob->resetEntries(); iew.resetEntries(); } +*/ } @@ -824,7 +850,9 @@ template <class Impl> void FullO3CPU<Impl>::resume() { +#if FULL_SYSTEM assert(system->getMemoryMode() == System::Timing); +#endif fetch.resume(); decode.resume(); rename.resume(); @@ -935,6 +963,25 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) } if (!tickEvent.scheduled()) tickEvent.schedule(curTick); + + Port *peer; + Port *icachePort = fetch.getIcachePort(); + if (icachePort->getPeer() == NULL) { + peer = oldCPU->getPort("icache_port")->getPeer(); + icachePort->setPeer(peer); + } else { + peer = icachePort->getPeer(); + } + peer->setPeer(icachePort); + + Port *dcachePort = iew.getDcachePort(); + if (dcachePort->getPeer() == NULL) { + peer = oldCPU->getPort("dcache_port")->getPeer(); + dcachePort->setPeer(peer); + } else { + peer = dcachePort->getPeer(); + } + peer->setPeer(dcachePort); } template <class Impl> diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index dcdcd1fe6..fe510519c 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -202,9 +202,12 @@ class FullO3CPU : public BaseO3CPU class DeallocateContextEvent : public Event { private: - /** Number of Thread to Activate */ + /** Number of Thread to deactivate */ int tid; + /** Should the thread be removed from the CPU? */ + bool remove; + /** Pointer to the CPU. */ FullO3CPU<Impl> *cpu; @@ -218,12 +221,15 @@ class FullO3CPU : public BaseO3CPU /** Processes the event, calling activateThread() on the CPU. */ void process(); + /** Sets whether the thread should also be removed from the CPU. */ + void setRemove(bool _remove) { remove = _remove; } + /** Returns the description of the event. */ const char *description(); }; /** Schedule cpu to deallocate thread context.*/ - void scheduleDeallocateContextEvent(int tid, int delay) + void scheduleDeallocateContextEvent(int tid, bool remove, int delay) { // Schedule thread to activate, regardless of its current state. if (deallocateContextEvent[tid].squashed()) @@ -296,9 +302,9 @@ class FullO3CPU : public BaseO3CPU void suspendContext(int tid); /** Remove Thread from Active Threads List && - * Remove Thread Context from CPU. + * Possibly Remove Thread Context from CPU. */ - void deallocateContext(int tid, int delay = 1); + bool deallocateContext(int tid, bool remove, int delay = 1); /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. @@ -626,11 +632,6 @@ class FullO3CPU : public BaseO3CPU /** Pointers to all of the threads in the CPU. */ std::vector<Thread *> thread; - /** Pointer to the icache interface. */ - MemInterface *icacheInterface; - /** Pointer to the dcache interface. */ - MemInterface *dcacheInterface; - /** Whether or not the CPU should defer its registration. */ bool deferRegistration; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 1a2ca32a4..280bf0e71 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -96,7 +96,7 @@ class DefaultFetch /** Returns the address ranges of this device. */ virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } /** Timing version of receive. Handles setting fetch to the * proper status to start fetching. */ diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 3c47c39fa..072580af7 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -63,7 +63,7 @@ template<class Impl> void DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt) { - panic("DefaultFetch doesn't expect recvFunctional callback!"); + warn("Default fetch doesn't update it's state from a functional call."); } template<class Impl> @@ -599,7 +599,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid if (fault == NoFault) { #if 0 if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) || - memReq[tid]->flags & UNCACHEABLE) { + memReq[tid]->isUncacheable()) { DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a " "misspeculating path)!", memReq[tid]->paddr); @@ -623,6 +623,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Now do the timing access to see whether or not the instruction // exists within the cache. if (!icachePort->sendTiming(data_pkt)) { + if (data_pkt->result == Packet::BadAddress) { + fault = TheISA::genMachineCheckFault(); + delete mem_req; + memReq[tid] = NULL; + } assert(retryPkt == NULL); assert(retryTid == -1); DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index b2baae296..ba5260fe2 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -600,6 +600,11 @@ template<class Impl> void DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) { + // This function should not be called after writebackInsts in a + // single cycle. That will cause problems with an instruction + // being added to the queue to commit without being processed by + // writebackInsts prior to being sent to commit. + // First check the time slot that this instruction will write // to. If there are free write ports at the time, then go ahead // and write the instruction to that time. If there are not, @@ -1286,6 +1291,7 @@ DefaultIEW<Impl>::executeInsts() } else if (fault != NoFault) { // If the instruction faulted, then we need to send it along to commit // without the instruction completing. + DPRINTF(IEW, "Store has fault! [sn:%lli]\n", inst->seqNum); // Send this instruction to commit, also make sure iew stage // realizes there is activity. diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 190734dc2..6b12d75b4 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -311,7 +311,7 @@ class LSQ { /** Returns the address ranges of this device. */ virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } /** Timing version of receive. Handles writing back and * completing the load or store that has returned from diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 2bbab71f0..7b7d1eb8e 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -46,7 +46,7 @@ template <class Impl> void LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) { - panic("O3CPU doesn't expect recvFunctional callback!"); + warn("O3CPU doesn't update things on a recvFunctional."); } template <class Impl> diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 90d1a3d53..11a02e7c7 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -492,7 +492,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // A bit of a hackish way to get uncached accesses to work only if they're // at the head of the LSQ and are ready to commit (at the head of the ROB // too). - if (req->getFlags() & UNCACHEABLE && + if (req->isUncacheable() && (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); ++lsqRescheduledLoads; @@ -509,7 +509,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) load_idx, store_idx, storeHead, req->getPaddr()); #if FULL_SYSTEM - if (req->getFlags() & LOCKED) { + if (req->isLocked()) { cpu->lockAddr = req->getPaddr(); cpu->lockFlag = true; } @@ -626,18 +626,30 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) ++usedPorts; - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataStatic(load_inst->memData); - - LSQSenderState *state = new LSQSenderState; - state->isLoad = true; - state->idx = load_idx; - state->inst = load_inst; - data_pkt->senderState = state; - // if we the cache is not blocked, do cache access if (!lsq->cacheBlocked()) { + PacketPtr data_pkt = + new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(load_inst->memData); + + LSQSenderState *state = new LSQSenderState; + state->isLoad = true; + state->idx = load_idx; + state->inst = load_inst; + data_pkt->senderState = state; + if (!dcachePort->sendTiming(data_pkt)) { + Packet::Result result = data_pkt->result; + + // Delete state and data packet because a load retry + // initiates a pipeline restart; it does not retry. + delete state; + delete data_pkt; + + if (result == Packet::BadAddress) { + return TheISA::genMachineCheckFault(); + } + // If the access didn't succeed, tell the LSQ by setting // the retry thread id. lsq->setRetryTid(lsqID); @@ -664,16 +676,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) return NoFault; } - if (data_pkt->result != Packet::Success) { - DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); - DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", - load_inst->seqNum); - } else { - DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); - DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", - load_inst->seqNum); - } - return NoFault; } diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 98bea74fb..3f9db912f 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -416,7 +416,7 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst) // realizes there is activity. // Mark it as executed unless it is an uncached load that // needs to hit the head of commit. - if (!(inst->req->getFlags() & UNCACHEABLE) || inst->isAtCommit()) { + if (!(inst->req->isUncacheable()) || inst->isAtCommit()) { inst->setExecuted(); } iewStage->instToCommit(inst); @@ -608,21 +608,30 @@ LSQUnit<Impl>::writebackStores() DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " "to Addr:%#x, data:%#x [sn:%lli]\n", - storeWBIdx, storeQueue[storeWBIdx].inst->readPC(), + storeWBIdx, inst->readPC(), req->getPaddr(), *(inst->memData), - storeQueue[storeWBIdx].inst->seqNum); + inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. - if (req->getFlags() & LOCKED) { - if (req->getFlags() & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { req->setScResult(2); } else { if (cpu->lockFlag) { req->setScResult(1); + DPRINTF(LSQUnit, "Store conditional [sn:%lli] succeeded.", + inst->seqNum); } else { req->setScResult(0); // Hack: Instantly complete this store. - completeDataAccess(data_pkt); +// completeDataAccess(data_pkt); + DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " + "Instantly completing it.\n", + inst->seqNum); + WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); + wb->schedule(curTick + 1); + delete state; + completeStore(storeWBIdx); incrStIdx(storeWBIdx); continue; } @@ -633,7 +642,13 @@ LSQUnit<Impl>::writebackStores() } if (!dcachePort->sendTiming(data_pkt)) { + if (data_pkt->result == Packet::BadAddress) { + panic("LSQ sent out a bad address for a completed store!"); + } // Need to handle becoming blocked on a store. + DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will" + "retry later\n", + inst->seqNum); isStoreBlocked = true; ++lsqCacheBlocked; assert(retryPkt == NULL); @@ -880,6 +895,9 @@ LSQUnit<Impl>::recvRetry() assert(retryPkt != NULL); if (dcachePort->sendTiming(retryPkt)) { + if (retryPkt->result == Packet::BadAddress) { + panic("LSQ sent out a bad address for a completed store!"); + } storePostSend(retryPkt); retryPkt = NULL; isStoreBlocked = false; diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index 25e1db21c..2bc194d53 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -165,14 +165,14 @@ template <class Impl> void O3ThreadContext<Impl>::deallocate(int delay) { - DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n", - getThreadNum()); + DPRINTF(O3CPU, "Calling deallocate on Thread Context %d delay %d\n", + getThreadNum(), delay); if (thread->status() == ThreadContext::Unallocated) return; thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid(), delay); + cpu->deallocateContext(thread->readTid(), true, delay); } template <class Impl> diff --git a/src/cpu/ozone/back_end.hh b/src/cpu/ozone/back_end.hh index 9bab6a964..8debd277d 100644 --- a/src/cpu/ozone/back_end.hh +++ b/src/cpu/ozone/back_end.hh @@ -493,7 +493,7 @@ BackEnd<Impl>::read(RequestPtr req, T &data, int load_idx) } */ /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Read"); */ return LSQ.read(req, data, load_idx); @@ -534,7 +534,7 @@ BackEnd<Impl>::write(RequestPtr req, T &data, int store_idx) *res = memReq->result; */ /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Write"); */ return LSQ.write(req, data, store_idx); diff --git a/src/cpu/ozone/back_end_impl.hh b/src/cpu/ozone/back_end_impl.hh index ac3218c02..4078699fe 100644 --- a/src/cpu/ozone/back_end_impl.hh +++ b/src/cpu/ozone/back_end_impl.hh @@ -1256,7 +1256,7 @@ BackEnd<Impl>::executeInsts() // ++iewExecStoreInsts; - if (!(inst->req->flags & LOCKED)) { + if (!(inst->req->isLocked())) { inst->setExecuted(); instToCommit(inst); diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index 8c5be9424..70ec1d101 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -455,12 +455,12 @@ class OzoneCPU : public BaseCPU { #if 0 #if FULL_SYSTEM && defined(TARGET_ALPHA) - if (req->flags & LOCKED) { + if (req->isLocked()) { req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); } #endif - if (req->flags & LOCKED) { + if (req->isLocked()) { lockAddrList.insert(req->paddr); lockFlag = true; } @@ -489,10 +489,10 @@ class OzoneCPU : public BaseCPU ExecContext *xc; // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { + if (req->isLocked()) { xc = req->xc; - if (req->flags & UNCACHEABLE) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; xc->setStCondFailures(0);//Needed? [RGD] @@ -532,8 +532,8 @@ class OzoneCPU : public BaseCPU #endif - if (req->flags & LOCKED) { - if (req->flags & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { req->result = 2; } else { if (this->lockFlag) { diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh index 5ffd3666e..59cf9785c 100644 --- a/src/cpu/ozone/front_end.hh +++ b/src/cpu/ozone/front_end.hh @@ -92,7 +92,7 @@ class FrontEnd /** Returns the address ranges of this device. */ virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } /** Timing version of receive. Handles setting fetch to the * proper status to start fetching. */ diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index 9a00aefbf..9eff8619d 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -59,7 +59,7 @@ template<class Impl> void FrontEnd<Impl>::IcachePort::recvFunctional(PacketPtr pkt) { - panic("FrontEnd doesn't expect recvFunctional callback!"); + warn("FrontEnd doesn't update state from functional calls"); } template<class Impl> @@ -493,7 +493,7 @@ FrontEnd<Impl>::fetchCacheLine() if (fault == NoFault) { #if 0 if (cpu->system->memctrl->badaddr(memReq->paddr) || - memReq->flags & UNCACHEABLE) { + memReq->isUncacheable()) { DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " "misspeculating path!", memReq->paddr); diff --git a/src/cpu/ozone/inorder_back_end.hh b/src/cpu/ozone/inorder_back_end.hh index ffdba2f6c..76eef6fad 100644 --- a/src/cpu/ozone/inorder_back_end.hh +++ b/src/cpu/ozone/inorder_back_end.hh @@ -231,7 +231,7 @@ InorderBackEnd<Impl>::read(Addr addr, T &data, unsigned flags) } } /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Read"); */ return fault; @@ -243,7 +243,7 @@ Fault InorderBackEnd<Impl>::read(MemReqPtr &req, T &data) { #if FULL_SYSTEM && defined(TARGET_ALPHA) - if (req->flags & LOCKED) { + if (req->isLocked()) { req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); } @@ -291,7 +291,7 @@ InorderBackEnd<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) if (res && (fault == NoFault)) *res = memReq->result; /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Write"); */ return fault; @@ -306,10 +306,10 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data) ExecContext *xc; // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { + if (req->isLocked()) { xc = req->xc; - if (req->flags & UNCACHEABLE) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; xc->setStCondFailures(0);//Needed? [RGD] @@ -391,7 +391,7 @@ InorderBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx) } /* - if (!dcacheInterface && (req->flags & UNCACHEABLE)) + if (!dcacheInterface && (req->isUncacheable())) recordEvent("Uncached Read"); */ return NoFault; @@ -455,8 +455,8 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx) } } /* - if (req->flags & LOCKED) { - if (req->flags & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; } else { @@ -469,7 +469,7 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx) *res = req->result; */ /* - if (!dcacheInterface && (req->flags & UNCACHEABLE)) + if (!dcacheInterface && (req->isUncacheable())) recordEvent("Uncached Write"); */ return NoFault; diff --git a/src/cpu/ozone/lsq_unit.hh b/src/cpu/ozone/lsq_unit.hh index 38c1c09a2..056c79521 100644 --- a/src/cpu/ozone/lsq_unit.hh +++ b/src/cpu/ozone/lsq_unit.hh @@ -426,7 +426,7 @@ OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). // @todo: Fix uncached accesses. - if (req->flags & UNCACHEABLE && + if (req->isUncacheable() && (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) { return TheISA::genMachineCheckFault(); diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh index ee0804036..c46eb90be 100644 --- a/src/cpu/ozone/lsq_unit_impl.hh +++ b/src/cpu/ozone/lsq_unit_impl.hh @@ -577,7 +577,7 @@ OzoneLSQ<Impl>::writebackStores() MemAccessResult result = dcacheInterface->access(req); //@todo temp fix for LL/SC (works fine for 1 CPU) - if (req->flags & LOCKED) { + if (req->isLocked()) { req->result=1; panic("LL/SC! oh no no support!!!"); } @@ -596,7 +596,7 @@ OzoneLSQ<Impl>::writebackStores() Event *wb = NULL; /* typename IEW::LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { + if (req->isLocked()) { // Stx_C does not generate a system port transaction. req->result=0; wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, @@ -630,7 +630,7 @@ OzoneLSQ<Impl>::writebackStores() // DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", // storeQueue[storeWBIdx].inst->seqNum); - if (req->flags & LOCKED) { + if (req->isLocked()) { // Stx_C does not generate a system port transaction. req->result=1; typename BackEnd::LdWritebackEvent *wb = diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index 6640a9f34..9b93ce74f 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -260,7 +260,7 @@ class OzoneLWLSQ { virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1); } virtual bool recvTiming(PacketPtr pkt); @@ -507,7 +507,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). // @todo: Fix uncached accesses. - if (req->getFlags() & UNCACHEABLE && + if (req->isUncacheable() && (inst != loadQueue.back() || !inst->isAtCommit())) { DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " "commit/LSQ!\n", @@ -659,7 +659,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) return NoFault; } - if (req->getFlags() & LOCKED) { + if (req->isLocked()) { cpu->lockFlag = true; } diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index 4c96ad149..e523712da 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -72,7 +72,7 @@ template <class Impl> void OzoneLWLSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) { - panic("O3CPU doesn't expect recvFunctional callback!"); + warn("O3CPU doesn't update things on a recvFunctional"); } template <class Impl> @@ -394,7 +394,7 @@ OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst) // Actually probably want the oldest faulting load if (load_fault != NoFault) { DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum); - if (!(inst->req->getFlags() & UNCACHEABLE && !inst->isAtCommit())) { + if (!(inst->req->isUncacheable() && !inst->isAtCommit())) { inst->setExecuted(); } // Maybe just set it as can commit here, although that might cause @@ -605,8 +605,8 @@ OzoneLWLSQ<Impl>::writebackStores() inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. - if (req->getFlags() & LOCKED) { - if (req->getFlags() & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { req->setScResult(2); } else { if (cpu->lockFlag) { @@ -663,7 +663,7 @@ OzoneLWLSQ<Impl>::writebackStores() if (result != MA_HIT && dcacheInterface->doEvents()) { store_event->miss = true; typename BackEnd::LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { + if (req->isLocked()) { wb = new typename BackEnd::LdWritebackEvent(inst, be); store_event->wbEvent = wb; @@ -690,7 +690,7 @@ OzoneLWLSQ<Impl>::writebackStores() // DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", // inst->seqNum); - if (req->flags & LOCKED) { + if (req->isLocked()) { // Stx_C does not generate a system port // transaction in the 21264, but that might be // hard to accomplish in this model. diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 7ba1b7df1..490be20ae 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -28,6 +28,7 @@ * Authors: Steve Reinhardt */ +#include "arch/locked_mem.hh" #include "arch/utility.hh" #include "cpu/exetrace.hh" #include "cpu/simple/atomic.hh" @@ -93,7 +94,7 @@ AtomicSimpleCPU::init() bool AtomicSimpleCPU::CpuPort::recvTiming(Packet *pkt) { - panic("AtomicSimpleCPU doesn't expect recvAtomic callback!"); + panic("AtomicSimpleCPU doesn't expect recvTiming callback!"); return true; } @@ -107,7 +108,8 @@ AtomicSimpleCPU::CpuPort::recvAtomic(Packet *pkt) void AtomicSimpleCPU::CpuPort::recvFunctional(Packet *pkt) { - panic("AtomicSimpleCPU doesn't expect recvFunctional callback!"); + //No internal storage to update, just return + return; } void @@ -133,20 +135,19 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p) { _status = Idle; - // @todo fix me and get the real cpu id & thread number!!! ifetch_req = new Request(); - ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); ifetch_pkt->dataStatic(&inst); data_read_req = new Request(); - data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + data_read_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too data_read_pkt = new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast); data_read_pkt->dataStatic(&dataReg); data_write_req = new Request(); - data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + data_write_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too data_write_pkt = new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast); } @@ -161,9 +162,11 @@ AtomicSimpleCPU::serialize(ostream &os) { SimObject::State so_state = SimObject::getState(); SERIALIZE_ENUM(so_state); + Status _status = status(); + SERIALIZE_ENUM(_status); + BaseSimpleCPU::serialize(os); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); - BaseSimpleCPU::serialize(os); } void @@ -171,8 +174,9 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { SimObject::State so_state; UNSERIALIZE_ENUM(so_state); - tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + UNSERIALIZE_ENUM(_status); BaseSimpleCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); } void @@ -253,29 +257,36 @@ template <class T> Fault AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) { - data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + // use the CPU's statically allocated read request and packet objects + Request *req = data_read_req; + Packet *pkt = data_read_pkt; + + req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { traceData->setAddr(addr); } // translate to physical address - Fault fault = thread->translateDataReadReq(data_read_req); + Fault fault = thread->translateDataReadReq(req); // Now do the access. if (fault == NoFault) { - data_read_pkt->reinitFromRequest(); + pkt->reinitFromRequest(); - dcache_latency = dcachePort.sendAtomic(data_read_pkt); + dcache_latency = dcachePort.sendAtomic(pkt); dcache_access = true; - assert(data_read_pkt->result == Packet::Success); - data = data_read_pkt->get<T>(); + assert(pkt->result == Packet::Success); + data = pkt->get<T>(); + if (req->isLocked()) { + TheISA::handleLockedRead(thread, req); + } } // This will need a new way to tell if it has a dcache attached. - if (data_read_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Read"); return fault; @@ -328,33 +339,52 @@ template <class T> Fault AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { - data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + // use the CPU's statically allocated write request and packet objects + Request *req = data_write_req; + Packet *pkt = data_write_pkt; + + req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { traceData->setAddr(addr); } // translate to physical address - Fault fault = thread->translateDataWriteReq(data_write_req); + Fault fault = thread->translateDataWriteReq(req); // Now do the access. if (fault == NoFault) { - data = htog(data); - data_write_pkt->reinitFromRequest(); - data_write_pkt->dataStatic(&data); + bool do_access = true; // flag to suppress cache access - dcache_latency = dcachePort.sendAtomic(data_write_pkt); - dcache_access = true; + if (req->isLocked()) { + do_access = TheISA::handleLockedWrite(thread, req); + } + + if (do_access) { + data = htog(data); + pkt->reinitFromRequest(); + pkt->dataStatic(&data); - assert(data_write_pkt->result == Packet::Success); + dcache_latency = dcachePort.sendAtomic(pkt); + dcache_access = true; - if (res && data_write_req->getFlags() & LOCKED) { - *res = data_write_req->getScResult(); + assert(pkt->result == Packet::Success); + } + + if (req->isLocked()) { + uint64_t scResult = req->getScResult(); + if (scResult != 0) { + // clear failure counter + thread->setStCondFailures(0); + } + if (res) { + *res = req->getScResult(); + } } } // This will need a new way to tell if it's hooked up to a cache or not. - if (data_write_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Write"); // If the write needs to have a fault on the access, consider calling @@ -467,11 +497,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) Param<Tick> progress_interval; SimObjectParam<MemObject *> mem; SimObjectParam<System *> system; + Param<int> cpu_id; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - Param<int> cpu_id; Param<Tick> profile; #else SimObjectParam<Process *> workload; @@ -500,11 +530,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) INIT_PARAM(progress_interval, "Progress interval"), INIT_PARAM(mem, "memory"), INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else INIT_PARAM(workload, "processes to run"), @@ -538,11 +568,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU) params->simulate_stalls = simulate_stalls; params->mem = mem; params->system = system; + params->cpu_id = cpu_id; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->cpu_id = cpu_id; params->profile = profile; #else params->process = workload; diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index b602af558..52afd76ef 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -104,9 +104,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - }; + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } + }; CpuPort icachePort; CpuPort dcachePort; diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 03ee27e04..33f673cbc 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -28,6 +28,7 @@ * Authors: Steve Reinhardt */ +#include "arch/locked_mem.hh" #include "arch/utility.hh" #include "cpu/exetrace.hh" #include "cpu/simple/timing.hh" @@ -73,7 +74,8 @@ TimingSimpleCPU::CpuPort::recvAtomic(Packet *pkt) void TimingSimpleCPU::CpuPort::recvFunctional(Packet *pkt) { - panic("TimingSimpleCPU doesn't expect recvFunctional callback!"); + //No internal storage to update, jusst return + return; } void @@ -94,12 +96,14 @@ TimingSimpleCPU::CpuPort::TickEvent::schedule(Packet *_pkt, Tick t) } TimingSimpleCPU::TimingSimpleCPU(Params *p) - : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock) + : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock), + cpu_id(p->cpu_id) { _status = Idle; ifetch_pkt = dcache_pkt = NULL; drainEvent = NULL; fetchEvent = NULL; + previousTick = 0; changeState(SimObject::Running); } @@ -158,6 +162,7 @@ TimingSimpleCPU::resume() assert(system->getMemoryMode() == System::Timing); changeState(SimObject::Running); + previousTick = curTick; } void @@ -165,6 +170,7 @@ TimingSimpleCPU::switchOut() { assert(status() == Running || status() == Idle); _status = SwitchedOut; + numCycles += curTick - previousTick; // If we've been scheduled to resume but are then told to switch out, // we'll need to cancel it. @@ -187,6 +193,27 @@ TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU) break; } } + + if (_status != Running) { + _status = Idle; + } + + Port *peer; + if (icachePort.getPeer() == NULL) { + peer = oldCPU->getPort("icache_port")->getPeer(); + icachePort.setPeer(peer); + } else { + peer = icachePort.getPeer(); + } + peer->setPeer(&icachePort); + + if (dcachePort.getPeer() == NULL) { + peer = oldCPU->getPort("dcache_port")->getPeer(); + dcachePort.setPeer(peer); + } else { + peer = dcachePort.getPeer(); + } + peer->setPeer(&dcachePort); } @@ -227,35 +254,35 @@ template <class T> Fault TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) { - // need to fill in CPU & thread IDs here - Request *data_read_req = new Request(); - data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE - data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + Request *req = + new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(), + cpu_id, /* thread ID */ 0); if (traceData) { - traceData->setAddr(data_read_req->getVaddr()); + traceData->setAddr(req->getVaddr()); } // translate to physical address - Fault fault = thread->translateDataReadReq(data_read_req); + Fault fault = thread->translateDataReadReq(req); // Now do the access. if (fault == NoFault) { - Packet *data_read_pkt = - new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast); - data_read_pkt->dataDynamic<T>(new T); + Packet *pkt = + new Packet(req, Packet::ReadReq, Packet::Broadcast); + pkt->dataDynamic<T>(new T); - if (!dcachePort.sendTiming(data_read_pkt)) { + if (!dcachePort.sendTiming(pkt)) { _status = DcacheRetry; - dcache_pkt = data_read_pkt; + dcache_pkt = pkt; } else { _status = DcacheWaitResponse; + // memory system takes ownership of packet dcache_pkt = NULL; } } // This will need a new way to tell if it has a dcache attached. - if (data_read_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Read"); return fault; @@ -308,31 +335,39 @@ template <class T> Fault TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { - // need to fill in CPU & thread IDs here - Request *data_write_req = new Request(); - data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE - data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + Request *req = + new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(), + cpu_id, /* thread ID */ 0); // translate to physical address - Fault fault = thread->translateDataWriteReq(data_write_req); + Fault fault = thread->translateDataWriteReq(req); + // Now do the access. if (fault == NoFault) { - Packet *data_write_pkt = - new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast); - data_write_pkt->allocate(); - data_write_pkt->set(data); + assert(dcache_pkt == NULL); + dcache_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + dcache_pkt->allocate(); + dcache_pkt->set(data); - if (!dcachePort.sendTiming(data_write_pkt)) { - _status = DcacheRetry; - dcache_pkt = data_write_pkt; - } else { - _status = DcacheWaitResponse; - dcache_pkt = NULL; + bool do_access = true; // flag to suppress cache access + + if (req->isLocked()) { + do_access = TheISA::handleLockedWrite(thread, req); + } + + if (do_access) { + if (!dcachePort.sendTiming(dcache_pkt)) { + _status = DcacheRetry; + } else { + _status = DcacheWaitResponse; + // memory system takes ownership of packet + dcache_pkt = NULL; + } } } // This will need a new way to tell if it's hooked up to a cache or not. - if (data_write_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Write"); // If the write needs to have a fault on the access, consider calling @@ -392,9 +427,8 @@ TimingSimpleCPU::fetch() { checkForInterrupts(); - // need to fill in CPU & thread IDs here Request *ifetch_req = new Request(); - ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + ifetch_req->setThreadContext(cpu_id, /* thread ID */ 0); Fault fault = setupFetchRequest(ifetch_req); ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); @@ -414,6 +448,9 @@ TimingSimpleCPU::fetch() // fetch fault: advance directly to next instruction (fault handler) advanceInst(fault); } + + numCycles += curTick - previousTick; + previousTick = curTick; } @@ -444,6 +481,9 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) delete pkt->req; delete pkt; + numCycles += curTick - previousTick; + previousTick = curTick; + if (getState() == SimObject::Draining) { completeDrain(); return; @@ -453,12 +493,20 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) if (curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) { // load or store: just send to dcache Fault fault = curStaticInst->initiateAcc(this, traceData); - if (fault == NoFault) { - // successfully initiated access: instruction will - // complete in dcache response callback - assert(_status == DcacheWaitResponse); + if (_status != Running) { + // instruction will complete in dcache response callback + assert(_status == DcacheWaitResponse || _status == DcacheRetry); + assert(fault == NoFault); } else { - // fault: complete now to invoke fault handler + if (fault == NoFault) { + // early fail on store conditional: complete now + assert(dcache_pkt != NULL); + fault = curStaticInst->completeAcc(dcache_pkt, this, + traceData); + delete dcache_pkt->req; + delete dcache_pkt; + dcache_pkt = NULL; + } postExecute(); advanceInst(fault); } @@ -479,8 +527,7 @@ TimingSimpleCPU::IcachePort::ITickEvent::process() bool TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt) { - // These next few lines could be replaced with something faster - // who knows what though + // delay processing of returned data until next CPU clock edge Tick time = pkt->req->getTime(); while (time < curTick) time += lat; @@ -516,21 +563,27 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) assert(_status == DcacheWaitResponse); _status = Running; - if (getState() == SimObject::Draining) { - completeDrain(); + numCycles += curTick - previousTick; + previousTick = curTick; - delete pkt->req; - delete pkt; + Fault fault = curStaticInst->completeAcc(pkt, this, traceData); - return; + if (pkt->isRead() && pkt->req->isLocked()) { + TheISA::handleLockedRead(thread, pkt->req); } - Fault fault = curStaticInst->completeAcc(pkt, this, traceData); - delete pkt->req; delete pkt; postExecute(); + + if (getState() == SimObject::Draining) { + advancePC(fault); + completeDrain(); + + return; + } + advanceInst(fault); } @@ -546,6 +599,7 @@ TimingSimpleCPU::completeDrain() bool TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt) { + // delay processing of returned data until next CPU clock edge Tick time = pkt->req->getTime(); while (time < curTick) time += lat; @@ -574,6 +628,7 @@ TimingSimpleCPU::DcachePort::recvRetry() Packet *tmp = cpu->dcache_pkt; if (sendTiming(tmp)) { cpu->_status = DcacheWaitResponse; + // memory system takes ownership of packet cpu->dcache_pkt = NULL; } } @@ -592,11 +647,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) Param<Tick> progress_interval; SimObjectParam<MemObject *> mem; SimObjectParam<System *> system; + Param<int> cpu_id; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - Param<int> cpu_id; Param<Tick> profile; #else SimObjectParam<Process *> workload; @@ -625,11 +680,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) INIT_PARAM(progress_interval, "Progress interval"), INIT_PARAM(mem, "memory"), INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else INIT_PARAM(workload, "processes to run"), @@ -661,11 +716,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU) params->functionTraceStart = function_trace_start; params->mem = mem; params->system = system; + params->cpu_id = cpu_id; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->cpu_id = cpu_id; params->profile = profile; #else params->process = workload; diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index d03fa4bc0..988ddeded 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -92,7 +92,7 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } struct TickEvent : public Event { @@ -166,6 +166,9 @@ class TimingSimpleCPU : public BaseSimpleCPU Packet *ifetch_pkt; Packet *dcache_pkt; + int cpu_id; + Tick previousTick; + public: virtual Port *getPort(const std::string &if_name, int idx = -1); diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 242cfd0e1..6fa6500bd 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -237,7 +237,7 @@ class SimpleThread : public ThreadState Fault read(RequestPtr &req, T &data) { #if FULL_SYSTEM && THE_ISA == ALPHA_ISA - if (req->flags & LOCKED) { + if (req->isLocked()) { req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); } @@ -256,10 +256,10 @@ class SimpleThread : public ThreadState ExecContext *xc; // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { + if (req->isLocked()) { xc = req->xc; - if (req->flags & UNCACHEABLE) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; xc->setStCondFailures(0);//Needed? [RGD] diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc index e8d7f4817..8007fda5e 100644 --- a/src/dev/ide_ctrl.cc +++ b/src/dev/ide_ctrl.cc @@ -742,7 +742,6 @@ IdeController::unserialize(Checkpoint *cp, const std::string §ion) UNSERIALIZE_SCALAR(bm_enabled); UNSERIALIZE_ARRAY(cmd_in_progress, sizeof(cmd_in_progress) / sizeof(cmd_in_progress[0])); - pioPort->sendStatusChange(Port::RangeChange); } #ifndef DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc index c3b83f448..b16ddb31a 100644 --- a/src/dev/pcidev.cc +++ b/src/dev/pcidev.cc @@ -302,6 +302,8 @@ PciDev::unserialize(Checkpoint *cp, const std::string §ion) UNSERIALIZE_ARRAY(BARAddrs, sizeof(BARAddrs) / sizeof(BARAddrs[0])); UNSERIALIZE_ARRAY(config.data, sizeof(config.data) / sizeof(config.data[0])); + pioPort->sendStatusChange(Port::RangeChange); + } #ifndef DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/kern/tru64/tru64.hh b/src/kern/tru64/tru64.hh index 97a5e2a7c..2ee4014db 100644 --- a/src/kern/tru64/tru64.hh +++ b/src/kern/tru64/tru64.hh @@ -532,16 +532,26 @@ class Tru64 : public OperatingSystem argp.copyIn(tc->getMemPort()); + int stack_size = + gtoh(argp->rsize) + gtoh(argp->ysize) + gtoh(argp->gsize); + // if the user chose an address, just let them have it. Otherwise // pick one for them. - if (htog(argp->address) == 0) { - argp->address = htog(process->next_thread_stack_base); - int stack_size = (htog(argp->rsize) + htog(argp->ysize) + - htog(argp->gsize)); + Addr stack_base = gtoh(argp->address); + + if (stack_base == 0) { + stack_base = process->next_thread_stack_base; process->next_thread_stack_base -= stack_size; - argp.copyOut(tc->getMemPort()); } + stack_base = roundDown(stack_base, VMPageSize); + + // map memory + process->pTable->allocate(stack_base, roundUp(stack_size, VMPageSize)); + + argp->address = gtoh(stack_base); + argp.copyOut(tc->getMemPort()); + return 0; } @@ -577,7 +587,7 @@ class Tru64 : public OperatingSystem abort(); } - const Addr base_addr = 0x12000; // was 0x3f0000000LL; + Addr base_addr = 0x12000; // was 0x3f0000000LL; Addr cur_addr = base_addr; // next addresses to use // first comes the config_info struct Addr config_addr = cur_addr; @@ -603,8 +613,6 @@ class Tru64 : public OperatingSystem config->nxm_slot_state = htog(slot_state_addr); config->nxm_rad[0] = htog(rad_state_addr); - config.copyOut(tc->getMemPort()); - // initialize the slot_state array and copy it out TypedBufferArg<Tru64::nxm_slot_state_t> slot_state(slot_state_addr, slot_state_size); @@ -616,8 +624,6 @@ class Tru64 : public OperatingSystem (i == 0) ? Tru64::NXM_SLOT_BOUND : Tru64::NXM_SLOT_AVAIL; } - slot_state.copyOut(tc->getMemPort()); - // same for the per-RAD "shared" struct. Note that we need to // allocate extra bytes for the per-VP array which is embedded at // the end. @@ -650,17 +656,20 @@ class Tru64 : public OperatingSystem } } - rad_state.copyOut(tc->getMemPort()); - // // copy pointer to shared config area out to user // *configptr_ptr = htog(config_addr); - configptr_ptr.copyOut(tc->getMemPort()); // Register this as a valid address range with the process - process->nxm_start = base_addr; - process->nxm_end = cur_addr; + base_addr = roundDown(base_addr, VMPageSize); + int size = cur_addr - base_addr; + process->pTable->allocate(base_addr, roundUp(size, VMPageSize)); + + config.copyOut(tc->getMemPort()); + slot_state.copyOut(tc->getMemPort()); + rad_state.copyOut(tc->getMemPort()); + configptr_ptr.copyOut(tc->getMemPort()); return 0; } diff --git a/src/mem/bus.cc b/src/mem/bus.cc index cf9e54e62..75ffed0d2 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -61,12 +61,79 @@ Bus::getPort(const std::string &if_name, int idx) void Bus::init() { - std::vector<Port*>::iterator intIter; + std::vector<BusPort*>::iterator intIter; for (intIter = interfaces.begin(); intIter != interfaces.end(); intIter++) (*intIter)->sendStatusChange(Port::RangeChange); } +Bus::BusFreeEvent::BusFreeEvent(Bus *_bus) : Event(&mainEventQueue), bus(_bus) +{} + +void Bus::BusFreeEvent::process() +{ + bus->recvRetry(-1); +} + +const char * Bus::BusFreeEvent::description() +{ + return "bus became available"; +} + +void Bus::occupyBus(PacketPtr pkt) +{ + //Bring tickNextIdle up to the present tick + //There is some potential ambiguity where a cycle starts, which might make + //a difference when devices are acting right around a cycle boundary. Using + //a < allows things which happen exactly on a cycle boundary to take up only + //the following cycle. Anthing that happens later will have to "wait" for + //the end of that cycle, and then start using the bus after that. + while (tickNextIdle < curTick) + tickNextIdle += clock; + + // The packet will be sent. Figure out how long it occupies the bus, and + // how much of that time is for the first "word", aka bus width. + int numCycles = 0; + // Requests need one cycle to send an address + if (pkt->isRequest()) + numCycles++; + else if (pkt->isResponse() || pkt->hasData()) { + // If a packet has data, it needs ceil(size/width) cycles to send it + // We're using the "adding instead of dividing" trick again here + if (pkt->hasData()) { + int dataSize = pkt->getSize(); + for (int transmitted = 0; transmitted < dataSize; + transmitted += width) { + numCycles++; + } + } else { + // If the packet didn't have data, it must have been a response. + // Those use the bus for one cycle to send their data. + numCycles++; + } + } + + // The first word will be delivered after the current tick, the delivery + // of the address if any, and one bus cycle to deliver the data + pkt->firstWordTime = + tickNextIdle + + pkt->isRequest() ? clock : 0 + + clock; + + //Advance it numCycles bus cycles. + //XXX Should this use the repeated addition trick as well? + tickNextIdle += (numCycles * clock); + if (!busIdle.scheduled()) { + busIdle.schedule(tickNextIdle); + } else { + busIdle.reschedule(tickNextIdle); + } + DPRINTF(Bus, "The bus is now occupied from tick %d to %d\n", + curTick, tickNextIdle); + + // The bus will become idle once the current packet is delivered. + pkt->finishTime = tickNextIdle; +} /** Function called by the port when the bus is receiving a Timing * transaction.*/ @@ -77,17 +144,40 @@ Bus::recvTiming(Packet *pkt) DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); + BusPort *pktPort; + if (pkt->getSrc() == defaultId) + pktPort = defaultPort; + else pktPort = interfaces[pkt->getSrc()]; + + // If the bus is busy, or other devices are in line ahead of the current + // one, put this device on the retry list. + if (tickNextIdle > curTick || + (retryList.size() && (!inRetry || pktPort != retryList.front()))) { + addToRetryList(pktPort); + return false; + } + short dest = pkt->getDest(); if (dest == Packet::Broadcast) { - if ( timingSnoopPhase1(pkt) ) - { - timingSnoopPhase2(pkt); + if (timingSnoop(pkt)) { + pkt->flags |= SNOOP_COMMIT; + bool success = timingSnoop(pkt); + assert(success); + if (pkt->flags & SATISFIED) { + //Cache-Cache transfer occuring + if (inRetry) { + retryList.front()->onRetryList(false); + retryList.pop_front(); + inRetry = false; + } + occupyBus(pkt); + return true; + } port = findPort(pkt->getAddr(), pkt->getSrc()); - } - else - { + } else { //Snoop didn't succeed - retryList.push_back(interfaces[pkt->getSrc()]); + DPRINTF(Bus, "Adding a retry to RETRY list %i\n", pktPort); + addToRetryList(pktPort); return false; } } else { @@ -95,35 +185,60 @@ Bus::recvTiming(Packet *pkt) assert(dest != pkt->getSrc()); // catch infinite loops port = interfaces[dest]; } + + occupyBus(pkt); + if (port->sendTiming(pkt)) { - // packet was successfully sent, just return true. + // Packet was successfully sent. Return true. + // Also take care of retries + if (inRetry) { + DPRINTF(Bus, "Remove retry from list %i\n", retryList.front()); + retryList.front()->onRetryList(false); + retryList.pop_front(); + inRetry = false; + } return true; } - // packet not successfully sent - retryList.push_back(interfaces[pkt->getSrc()]); + // Packet not successfully sent. Leave or put it on the retry list. + DPRINTF(Bus, "Adding a retry to RETRY list %i\n", pktPort); + addToRetryList(pktPort); return false; } void Bus::recvRetry(int id) { - // Go through all the elements on the list calling sendRetry on each - // This is not very efficient at all but it works. Ultimately we should end - // up with something that is more intelligent. - int initialSize = retryList.size(); - int i; - Port *p; - - for (i = 0; i < initialSize; i++) { - assert(retryList.size() > 0); - p = retryList.front(); - retryList.pop_front(); - p->sendRetry(); + DPRINTF(Bus, "Received a retry\n"); + // If there's anything waiting, and the bus isn't busy... + if (retryList.size() && curTick >= tickNextIdle) { + //retryingPort = retryList.front(); + inRetry = true; + DPRINTF(Bus, "Sending a retry\n"); + retryList.front()->sendRetry(); + // If inRetry is still true, sendTiming wasn't called + if (inRetry) + { + retryList.front()->onRetryList(false); + retryList.pop_front(); + inRetry = false; + + //Bring tickNextIdle up to the present + while (tickNextIdle < curTick) + tickNextIdle += clock; + + //Burn a cycle for the missed grant. + tickNextIdle += clock; + + if (!busIdle.scheduled()) { + busIdle.schedule(tickNextIdle); + } else { + busIdle.reschedule(tickNextIdle); + } + } } } - Port * Bus::findPort(Addr addr, int id) { @@ -174,64 +289,60 @@ Bus::findSnoopPorts(Addr addr, int id) //Careful to not overlap ranges //or snoop will be called more than once on the port ports.push_back(portSnoopList[i].portId); - DPRINTF(Bus, " found snoop addr %#llx on device%d\n", addr, - portSnoopList[i].portId); +// DPRINTF(Bus, " found snoop addr %#llx on device%d\n", addr, +// portSnoopList[i].portId); } i++; } return ports; } -void +Tick Bus::atomicSnoop(Packet *pkt) { std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); + Tick response_time = 0; while (!ports.empty()) { - interfaces[ports.back()]->sendAtomic(pkt); + Tick response = interfaces[ports.back()]->sendAtomic(pkt); + if (response) { + assert(!response_time); //Multiple responders + response_time = response; + } ports.pop_back(); } + return response_time; } -bool -Bus::timingSnoopPhase1(Packet *pkt) +void +Bus::functionalSnoop(Packet *pkt) { std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); - bool success = true; - while (!ports.empty() && success) + while (!ports.empty()) { - snoopCallbacks.push_back(ports.back()); - success = interfaces[ports.back()]->sendTiming(pkt); + interfaces[ports.back()]->sendFunctional(pkt); ports.pop_back(); } - if (!success) - { - while (!snoopCallbacks.empty()) - { - interfaces[snoopCallbacks.back()]->sendStatusChange(Port::SnoopSquash); - snoopCallbacks.pop_back(); - } - return false; - } - return true; } -void -Bus::timingSnoopPhase2(Packet *pkt) +bool +Bus::timingSnoop(Packet *pkt) { - bool success; - pkt->flags |= SNOOP_COMMIT; - while (!snoopCallbacks.empty()) + std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); + bool success = true; + + while (!ports.empty() && success) { - success = interfaces[snoopCallbacks.back()]->sendTiming(pkt); - //We should not fail on snoop callbacks - assert(success); - snoopCallbacks.pop_back(); + success = interfaces[ports.back()]->sendTiming(pkt); + ports.pop_back(); } + + return success; } + /** Function called by the port when the bus is receiving a Atomic * transaction.*/ Tick @@ -240,8 +351,11 @@ Bus::recvAtomic(Packet *pkt) DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); - atomicSnoop(pkt); - return findPort(pkt->getAddr(), pkt->getSrc())->sendAtomic(pkt); + Tick snoopTime = atomicSnoop(pkt); + if (snoopTime) + return snoopTime; //Snoop satisfies it + else + return findPort(pkt->getAddr(), pkt->getSrc())->sendAtomic(pkt); } /** Function called by the port when the bus is receiving a Functional @@ -252,6 +366,7 @@ Bus::recvFunctional(Packet *pkt) DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); + functionalSnoop(pkt); findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt); } @@ -280,7 +395,7 @@ Bus::recvStatusChange(Port::Status status, int id) } } else { - assert((id < interfaces.size() && id >= 0) || id == -1); + assert((id < interfaces.size() && id >= 0) || id == defaultId); Port *port = interfaces[id]; std::vector<DevMap>::iterator portIter; std::vector<DevMap>::iterator snoopIter; @@ -380,16 +495,20 @@ Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) BEGIN_DECLARE_SIM_OBJECT_PARAMS(Bus) Param<int> bus_id; + Param<int> clock; + Param<int> width; END_DECLARE_SIM_OBJECT_PARAMS(Bus) BEGIN_INIT_SIM_OBJECT_PARAMS(Bus) - INIT_PARAM(bus_id, "a globally unique bus id") + INIT_PARAM(bus_id, "a globally unique bus id"), + INIT_PARAM(clock, "bus clock speed"), + INIT_PARAM(width, "width of the bus (bits)") END_INIT_SIM_OBJECT_PARAMS(Bus) CREATE_SIM_OBJECT(Bus) { - return new Bus(getInstanceName(), bus_id); + return new Bus(getInstanceName(), bus_id, clock, width); } REGISTER_SIM_OBJECT("Bus", Bus) diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 941389296..509b8cf9b 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -46,13 +46,20 @@ #include "mem/packet.hh" #include "mem/port.hh" #include "mem/request.hh" +#include "sim/eventq.hh" class Bus : public MemObject { /** a globally unique id for this bus. */ int busId; + /** the clock speed for the bus */ + int clock; + /** the width of the bus in bytes */ + int width; + /** the next tick at which the bus will be idle */ + Tick tickNextIdle; - static const int defaultId = -1; + static const int defaultId = -3; //Make it unique from Broadcast struct DevMap { int portId; @@ -62,9 +69,6 @@ class Bus : public MemObject AddrRangeList defaultRange; std::vector<DevMap> portSnoopList; - std::vector<int> snoopCallbacks; - - /** Function called by the port when the bus is recieving a Timing transaction.*/ bool recvTiming(Packet *pkt); @@ -103,18 +107,16 @@ class Bus : public MemObject std::vector<int> findSnoopPorts(Addr addr, int id); /** Snoop all relevant ports atomicly. */ - void atomicSnoop(Packet *pkt); + Tick atomicSnoop(Packet *pkt); - /** Snoop for NACK and Blocked in phase 1 - * @return True if succeds. - */ - bool timingSnoopPhase1(Packet *pkt); + /** Snoop all relevant ports functionally. */ + void functionalSnoop(Packet *pkt); - /** @todo Don't need to commit all snoops just those that need it - *(register somehow). */ - /** Commit all snoops now that we know if any of them would have blocked. + /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want + * the snoop to happen + * @return True if succeds. */ - void timingSnoopPhase2(Packet *pkt); + bool timingSnoop(Packet *pkt); /** Process address range request. * @param resp addresses that we can respond to @@ -123,11 +125,15 @@ class Bus : public MemObject */ void addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id); + /** Occupy the bus with transmitting the packet pkt */ + void occupyBus(PacketPtr pkt); /** Declaration of the buses port type, one will be instantiated for each of the interfaces connecting to the bus. */ class BusPort : public Port { + bool _onRetryList; + /** A pointer to the bus to which this port belongs. */ Bus *bus; @@ -138,9 +144,15 @@ class Bus : public MemObject /** Constructor for the BusPort.*/ BusPort(const std::string &_name, Bus *_bus, int _id) - : Port(_name), bus(_bus), id(_id) + : Port(_name), _onRetryList(false), bus(_bus), id(_id) { } + bool onRetryList() + { return _onRetryList; } + + void onRetryList(bool newVal) + { _onRetryList = newVal; } + protected: /** When reciving a timing request from the peer port (at id), @@ -181,16 +193,52 @@ class Bus : public MemObject }; + class BusFreeEvent : public Event + { + Bus * bus; + + public: + BusFreeEvent(Bus * _bus); + void process(); + const char *description(); + }; + + BusFreeEvent busIdle; + + bool inRetry; + /** An array of pointers to the peer port interfaces connected to this bus.*/ - std::vector<Port*> interfaces; + std::vector<BusPort*> interfaces; /** An array of pointers to ports that retry should be called on because the * original send failed for whatever reason.*/ - std::list<Port*> retryList; + std::list<BusPort*> retryList; + + void addToRetryList(BusPort * port) + { + if (!inRetry) { + // The device wasn't retrying a packet, or wasn't at an appropriate + // time. + assert(!port->onRetryList()); + port->onRetryList(true); + retryList.push_back(port); + } else { + if (port->onRetryList()) { + // The device was retrying a packet. It didn't work, so we'll leave + // it at the head of the retry list. + assert(port == retryList.front()); + inRetry = false; + } + else { + port->onRetryList(true); + retryList.push_back(port); + } + } + } /** Port that handles requests that don't match any of the interfaces.*/ - Port *defaultPort; + BusPort *defaultPort; public: @@ -199,8 +247,16 @@ class Bus : public MemObject virtual void init(); - Bus(const std::string &n, int bus_id) - : MemObject(n), busId(bus_id), defaultPort(NULL) {} + Bus(const std::string &n, int bus_id, int _clock, int _width) + : MemObject(n), busId(bus_id), clock(_clock), width(_width), + tickNextIdle(0), busIdle(this), inRetry(false), defaultPort(NULL) + { + //Both the width and clock period must be positive + if (width <= 0) + fatal("Bus width must be positive\n"); + if (clock <= 0) + fatal("Bus clock period must be positive\n"); + } }; diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index a172847df..71ea58416 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -44,6 +44,8 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache, : Port(_name), cache(_cache), isCpuSide(_isCpuSide) { blocked = false; + cshrRetry = NULL; + waitingOnRetry = false; //Start ports at null if more than one is created we should panic //cpuSidePort = NULL; //memSidePort = NULL; @@ -71,7 +73,23 @@ BaseCache::CachePort::deviceBlockSize() bool BaseCache::CachePort::recvTiming(Packet *pkt) { - if (blocked) + if (isCpuSide + && !pkt->req->isUncacheable() + && pkt->isInvalidate() + && !pkt->isRead() && !pkt->isWrite()) { + //Upgrade or Invalidate + //Look into what happens if two slave caches on bus + DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(), + pkt->getAddr() & (((ULL(1))<<48)-1), + pkt->getAddr() & ~((Addr)cache->blkSize - 1)); + + assert(!(pkt->flags & SATISFIED)); + pkt->flags |= SATISFIED; + //Invalidates/Upgrades need no response if they get the bus + return true; + } + + if (pkt->isRequest() && blocked) { DPRINTF(Cache,"Scheduling a retry while blocked\n"); mustSendRetry = true; @@ -96,16 +114,44 @@ void BaseCache::CachePort::recvRetry() { Packet *pkt; - - if (!isCpuSide) + assert(waitingOnRetry); + if (!drainList.empty()) { + DPRINTF(CachePort, "%s attempting to send a retry for response\n", name()); + //We have some responses to drain first + if (sendTiming(drainList.front())) { + DPRINTF(CachePort, "%s sucessful in sending a retry for response\n", name()); + drainList.pop_front(); + if (!drainList.empty() || + !isCpuSide && cache->doMasterRequest() || + isCpuSide && cache->doSlaveRequest()) { + + DPRINTF(CachePort, "%s has more responses/requests\n", name()); + BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this); + reqCpu->schedule(curTick + 1); + } + waitingOnRetry = false; + } + } + else if (!isCpuSide) { + DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name()); + if (!cache->doMasterRequest()) { + //This can happen if I am the owner of a block and see an upgrade + //while the block was in my WB Buffers. I just remove the + //wb and de-assert the masterRequest + waitingOnRetry = false; + return; + } pkt = cache->getPacket(); + MSHR* mshr = (MSHR*)pkt->senderState; bool success = sendTiming(pkt); DPRINTF(Cache, "Address %x was %s in sending the timing request\n", pkt->getAddr(), success ? "succesful" : "unsuccesful"); - cache->sendResult(pkt, success); + cache->sendResult(pkt, mshr, success); + waitingOnRetry = !success; if (success && cache->doMasterRequest()) { + DPRINTF(CachePort, "%s has more requests\n", name()); //Still more to issue, rerequest in 1 cycle pkt = NULL; BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this); @@ -114,17 +160,23 @@ BaseCache::CachePort::recvRetry() } else { - pkt = cache->getCoherencePacket(); + assert(cshrRetry); + //pkt = cache->getCoherencePacket(); + //We save the packet, no reordering on CSHRS + pkt = cshrRetry; bool success = sendTiming(pkt); + waitingOnRetry = !success; if (success && cache->doSlaveRequest()) { //Still more to issue, rerequest in 1 cycle pkt = NULL; BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this); reqCpu->schedule(curTick + 1); + cshrRetry = NULL; } - } + if (waitingOnRetry) DPRINTF(CachePort, "%s STILL Waiting on retry\n", name()); + else DPRINTF(CachePort, "%s no longer waiting on retry\n", name()); return; } void @@ -169,16 +221,47 @@ BaseCache::CacheEvent::process() { if (!pkt) { - if (!cachePort->isCpuSide) - { - //MSHR + if (cachePort->waitingOnRetry) return; + //We have some responses to drain first + if (!cachePort->drainList.empty()) { + DPRINTF(CachePort, "%s trying to drain a response\n", cachePort->name()); + if (cachePort->sendTiming(cachePort->drainList.front())) { + DPRINTF(CachePort, "%s drains a response succesfully\n", cachePort->name()); + cachePort->drainList.pop_front(); + if (!cachePort->drainList.empty() || + !cachePort->isCpuSide && cachePort->cache->doMasterRequest() || + cachePort->isCpuSide && cachePort->cache->doSlaveRequest()) { + + DPRINTF(CachePort, "%s still has outstanding bus reqs\n", cachePort->name()); + this->schedule(curTick + 1); + } + } + else { + cachePort->waitingOnRetry = true; + DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name()); + } + } + else if (!cachePort->isCpuSide) + { //MSHR + DPRINTF(CachePort, "%s trying to send a MSHR request\n", cachePort->name()); + if (!cachePort->cache->doMasterRequest()) { + //This can happen if I am the owner of a block and see an upgrade + //while the block was in my WB Buffers. I just remove the + //wb and de-assert the masterRequest + return; + } + pkt = cachePort->cache->getPacket(); + MSHR* mshr = (MSHR*) pkt->senderState; bool success = cachePort->sendTiming(pkt); DPRINTF(Cache, "Address %x was %s in sending the timing request\n", pkt->getAddr(), success ? "succesful" : "unsuccesful"); - cachePort->cache->sendResult(pkt, success); + cachePort->cache->sendResult(pkt, mshr, success); + cachePort->waitingOnRetry = !success; + if (cachePort->waitingOnRetry) DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name()); if (success && cachePort->cache->doMasterRequest()) { + DPRINTF(CachePort, "%s still more MSHR requests to send\n", cachePort->name()); //Still more to issue, rerequest in 1 cycle pkt = NULL; this->schedule(curTick+1); @@ -186,10 +269,16 @@ BaseCache::CacheEvent::process() } else { + assert(cachePort->cache->doSlaveRequest()); //CSHR pkt = cachePort->cache->getCoherencePacket(); bool success = cachePort->sendTiming(pkt); - if (success && cachePort->cache->doSlaveRequest()) + if (!success) { + //Need to send on a retry + cachePort->cshrRetry = pkt; + cachePort->waitingOnRetry = true; + } + else if (cachePort->cache->doSlaveRequest()) { //Still more to issue, rerequest in 1 cycle pkt = NULL; @@ -199,8 +288,24 @@ BaseCache::CacheEvent::process() return; } //Response - //Know the packet to send, no need to mark in service (must succed) - assert(cachePort->sendTiming(pkt)); + //Know the packet to send + if (pkt->flags & NACKED_LINE) + pkt->result = Packet::Nacked; + else + pkt->result = Packet::Success; + pkt->makeTimingResponse(); + DPRINTF(CachePort, "%s attempting to send a response\n", cachePort->name()); + if (!cachePort->drainList.empty() || cachePort->waitingOnRetry) { + //Already have a list, just append + cachePort->drainList.push_back(pkt); + DPRINTF(CachePort, "%s appending response onto drain list\n", cachePort->name()); + } + else if (!cachePort->sendTiming(pkt)) { + //It failed, save it to list of drain events + DPRINTF(CachePort, "%s now waiting for a retry\n", cachePort->name()); + cachePort->drainList.push_back(pkt); + cachePort->waitingOnRetry = true; + } } const char * diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 069dbab58..563b1ca8b 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -72,6 +72,7 @@ enum RequestCause{ Request_PF }; +class MSHR; /** * A basic cache interface. Implements some common functions for speed. */ @@ -110,6 +111,12 @@ class BaseCache : public MemObject bool mustSendRetry; bool isCpuSide; + + bool waitingOnRetry; + + std::list<Packet *> drainList; + + Packet *cshrRetry; }; struct CacheEvent : public Event @@ -127,6 +134,8 @@ class BaseCache : public MemObject CachePort *cpuSidePort; CachePort *memSidePort; + bool snoopRangesSent; + public: virtual Port *getPort(const std::string &if_name, int idx = -1); @@ -149,14 +158,15 @@ class BaseCache : public MemObject void recvStatusChange(Port::Status status, bool isCpuSide) { - if (status == Port::RangeChange) - { - if (!isCpuSide) - { + if (status == Port::RangeChange){ + if (!isCpuSide) { cpuSidePort->sendStatusChange(Port::RangeChange); + if (!snoopRangesSent) { + snoopRangesSent = true; + memSidePort->sendStatusChange(Port::RangeChange); + } } - else - { + else { memSidePort->sendStatusChange(Port::RangeChange); } } @@ -172,7 +182,7 @@ class BaseCache : public MemObject fatal("No implementation"); } - virtual void sendResult(Packet* &pkt, bool success) + virtual void sendResult(Packet* &pkt, MSHR* mshr, bool success) { fatal("No implementation"); @@ -205,6 +215,7 @@ class BaseCache : public MemObject /** True if this cache is connected to the CPU. */ bool topLevelCache; + /** Stores time the cache blocked for statistics. */ Tick blockedCycle; @@ -332,6 +343,7 @@ class BaseCache : public MemObject //Start ports at null if more than one is created we should panic cpuSidePort = NULL; memSidePort = NULL; + snoopRangesSent = false; } virtual void init(); @@ -382,9 +394,14 @@ class BaseCache : public MemObject blocked_causes[cause]++; blockedCycle = curTick; } - blocked |= flag; - DPRINTF(Cache,"Blocking for cause %s\n", cause); - cpuSidePort->setBlocked(); + int old_state = blocked; + if (!(blocked & flag)) { + //Wasn't already blocked for this cause + blocked |= flag; + DPRINTF(Cache,"Blocking for cause %s\n", cause); + if (!old_state) + cpuSidePort->setBlocked(); + } } /** @@ -395,8 +412,13 @@ class BaseCache : public MemObject void setBlockedForSnoop(BlockedCause cause) { uint8_t flag = 1 << cause; - blockedSnoop |= flag; - memSidePort->setBlocked(); + uint8_t old_state = blockedSnoop; + if (!(blockedSnoop & flag)) { + //Wasn't already blocked for this cause + blockedSnoop |= flag; + if (!old_state) + memSidePort->setBlocked(); + } } /** @@ -445,7 +467,7 @@ class BaseCache : public MemObject */ void setMasterRequest(RequestCause cause, Tick time) { - if (!doMasterRequest()) + if (!doMasterRequest() && !memSidePort->waitingOnRetry) { BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(memSidePort); reqCpu->schedule(time); @@ -503,10 +525,14 @@ class BaseCache : public MemObject */ void respond(Packet *pkt, Tick time) { - pkt->makeTimingResponse(); - pkt->result = Packet::Success; - CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); - reqCpu->schedule(time); + if (pkt->needsResponse()) { + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); + } + else { + if (pkt->cmd == Packet::Writeback) delete pkt->req; + delete pkt; + } } /** @@ -517,22 +543,29 @@ class BaseCache : public MemObject void respondToMiss(Packet *pkt, Tick time) { if (!pkt->req->isUncacheable()) { - missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time; + missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += time - pkt->time; + } + if (pkt->needsResponse()) { + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); + } + else { + if (pkt->cmd == Packet::Writeback) delete pkt->req; + delete pkt; } - pkt->makeTimingResponse(); - pkt->result = Packet::Success; - CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); - reqCpu->schedule(time); } /** * Suppliess the data if cache to cache transfers are enabled. * @param pkt The bus transaction to fulfill. */ - void respondToSnoop(Packet *pkt) + void respondToSnoop(Packet *pkt, Tick time) { - assert("Implement\n" && 0); +// assert("Implement\n" && 0); // mi->respond(pkt,curTick + hitLatency); + assert (pkt->needsResponse()); + CacheEvent *reqMem = new CacheEvent(memSidePort, pkt); + reqMem->schedule(time); } /** @@ -551,6 +584,16 @@ class BaseCache : public MemObject else { //This is where snoops get updated + AddrRangeList dummy; +// if (!topLevelCache) +// { + cpuSidePort->getPeerAddressRanges(dummy, snoop); +// } +// else +// { +// snoop.push_back(RangeSize(0,-1)); +// } + return; } } diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 989b8743e..41b270030 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -103,6 +103,7 @@ class Cache : public BaseCache * Used to append to target list, to cause an invalidation. */ Packet * invalidatePkt; + Request *invalidateReq; /** * Temporarily move a block into a MSHR. @@ -175,7 +176,7 @@ class Cache : public BaseCache * @param pkt The request. * @param success True if the request was sent successfully. */ - virtual void sendResult(Packet * &pkt, bool success); + virtual void sendResult(Packet * &pkt, MSHR* mshr, bool success); /** * Handles a response (cache line fill/write ack) from the bus. @@ -251,7 +252,7 @@ class Cache : public BaseCache * request. * @return The estimated completion time. */ - Tick probe(Packet * &pkt, bool update); + Tick probe(Packet * &pkt, bool update, CachePort * otherSidePort); /** * Snoop for the provided request in the cache and return the estimated @@ -262,7 +263,7 @@ class Cache : public BaseCache * request. * @return The estimated completion time. */ - Tick snoopProbe(Packet * &pkt, bool update); + Tick snoopProbe(Packet * &pkt); }; #endif // __CACHE_HH__ diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 11cd84e88..a68418f24 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -51,7 +51,7 @@ #include "mem/cache/miss/mshr.hh" #include "mem/cache/prefetch/prefetcher.hh" -#include "sim/sim_events.hh" // for SimExitEvent +#include "sim/sim_exit.hh" // for SimExitEvent template<class TagStore, class Buffering, class Coherence> bool @@ -60,17 +60,21 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) { if (isCpuSide) { - if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { + if (pkt->isWrite() && (pkt->req->isLocked())) { pkt->req->setScResult(1); } access(pkt); + } else { if (pkt->isResponse()) handleResponse(pkt); - else - snoop(pkt); + else { + //Check if we should do the snoop + if (pkt->flags & SNOOP_COMMIT) + snoop(pkt); + } } return true; } @@ -83,11 +87,11 @@ doAtomicAccess(Packet *pkt, bool isCpuSide) if (isCpuSide) { //Temporary solution to LL/SC - if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { + if (pkt->isWrite() && (pkt->req->isLocked())) { pkt->req->setScResult(1); } - probe(pkt, true); + probe(pkt, true, NULL); //TEMP ALWAYS SUCCES FOR NOW pkt->result = Packet::Success; } @@ -96,7 +100,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide) if (pkt->isResponse()) handleResponse(pkt); else - snoopProbe(pkt, true); + return snoopProbe(pkt); } //Fix this timing info return hitLatency; @@ -113,20 +117,17 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide) pkt->req->setThreadContext(0,0); //Temporary solution to LL/SC - if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { + if (pkt->isWrite() && (pkt->req->isLocked())) { assert("Can't handle LL/SC on functional path\n"); } - probe(pkt, true); + probe(pkt, false, memSidePort); //TEMP ALWAYS SUCCESFUL FOR NOW pkt->result = Packet::Success; } else { - if (pkt->isResponse()) - handleResponse(pkt); - else - snoopProbe(pkt, true); + probe(pkt, false, cpuSidePort); } } @@ -147,7 +148,8 @@ Cache(const std::string &_name, prefetchAccess(params.prefetchAccess), tags(params.tags), missQueue(params.missQueue), coherence(params.coherence), prefetcher(params.prefetcher), - doCopy(params.doCopy), blockOnCopy(params.blockOnCopy) + doCopy(params.doCopy), blockOnCopy(params.blockOnCopy), + hitLatency(params.hitLatency) { //FIX BUS POINTERS // if (params.in == NULL) { @@ -162,10 +164,8 @@ Cache(const std::string &_name, prefetcher->setCache(this); prefetcher->setTags(tags); prefetcher->setBuffer(missQueue); -#if 0 - invalidatePkt = new Packet; - invalidatePkt->cmd = Packet::InvalidateReq; -#endif + invalidateReq = new Request((Addr) NULL, blkSize, 0); + invalidatePkt = new Packet(invalidateReq, Packet::InvalidateReq, 0); } template<class TagStore, class Buffering, class Coherence> @@ -194,20 +194,6 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) prefetcher->handleMiss(pkt, curTick); } if (!pkt->req->isUncacheable()) { - if (pkt->isInvalidate() && !pkt->isRead() - && !pkt->isWrite()) { - //Upgrade or Invalidate - //Look into what happens if two slave caches on bus - DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(), - pkt->getAddr() & (((ULL(1))<<48)-1), - pkt->getAddr() & ~((Addr)blkSize - 1)); - - //@todo Should this return latency have the hit latency in it? -// respond(pkt,curTick+lat); - pkt->flags |= SATISFIED; -// return MA_HIT; //@todo, return values - return true; - } blk = tags->handleAccess(pkt, lat, writebacks); } else { size = pkt->getSize(); @@ -234,27 +220,30 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) missQueue->doWriteback(writebacks.front()); writebacks.pop_front(); } - DPRINTF(Cache, "%s %x %s blk_addr: %x pc %x\n", pkt->cmdString(), + DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(), pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC()); + pkt->getAddr() & ~((Addr)blkSize - 1)); if (blk) { // Hit - hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; // clear dirty bit if write through if (pkt->needsResponse()) respond(pkt, curTick+lat); -// return MA_HIT; + if (pkt->cmd == Packet::Writeback) { + //Signal that you can kill the pkt/req + pkt->flags |= SATISFIED; + } return true; } // Miss if (!pkt->req->isUncacheable()) { - misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; /** @todo Move miss count code into BaseCache */ if (missCount) { --missCount; if (missCount == 0) - new SimLoopExitEvent(curTick, "A cache reached the maximum miss count"); + exitSimLoop("A cache reached the maximum miss count"); } } missQueue->handleMiss(pkt, size, curTick + hitLatency); @@ -267,10 +256,11 @@ template<class TagStore, class Buffering, class Coherence> Packet * Cache<TagStore,Buffering,Coherence>::getPacket() { + assert(missQueue->havePending()); Packet * pkt = missQueue->getPacket(); if (pkt) { if (!pkt->req->isUncacheable()) { - if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][pkt->req->getThreadNum()]++; + if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][0/*pkt->req->getThreadNum()*/]++; BlkType *blk = tags->findBlock(pkt); Packet::Command cmd = coherence->getBusCmd(pkt->cmd, (blk)? blk->status : 0); @@ -285,15 +275,30 @@ Cache<TagStore,Buffering,Coherence>::getPacket() template<class TagStore, class Buffering, class Coherence> void -Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, bool success) +Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr, bool success) { - if (success) { - missQueue->markInService(pkt); - //Temp Hack for UPGRADES - if (pkt->cmd == Packet::UpgradeReq) { - handleResponse(pkt); - } + if (success && !(pkt->flags & NACKED_LINE)) { + missQueue->markInService(pkt, mshr); + //Temp Hack for UPGRADES + if (pkt->cmd == Packet::UpgradeReq) { + pkt->flags &= ~CACHE_LINE_FILL; + BlkType *blk = tags->findBlock(pkt); + CacheBlk::State old_state = (blk) ? blk->status : 0; + CacheBlk::State new_state = coherence->getNewState(pkt,old_state); + if (old_state != new_state) + DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n", + pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state); + //Set the state on the upgrade + memcpy(pkt->getPtr<uint8_t>(), blk->data, blkSize); + PacketList writebacks; + tags->handleFill(blk, mshr, new_state, writebacks, pkt); + assert(writebacks.empty()); + missQueue->handleResponse(pkt, curTick + hitLatency); + } } else if (pkt && !pkt->req->isUncacheable()) { + pkt->flags &= ~NACKED_LINE; + pkt->flags &= ~SATISFIED; + pkt->flags &= ~SNOOP_COMMIT; missQueue->restoreOrigCmd(pkt); } } @@ -304,6 +309,14 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt) { BlkType *blk = NULL; if (pkt->senderState) { + if (pkt->result == Packet::Nacked) { + //pkt->reinitFromRequest(); + warn("NACKs from devices not connected to the same bus not implemented\n"); + return; + } + if (pkt->result == Packet::BadAddress) { + //Make the response a Bad address and send it + } // MemDebug::cacheResponse(pkt); DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(), pkt->getAddr() & (((ULL(1))<<48)-1)); @@ -312,11 +325,15 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt) blk = tags->findBlock(pkt); CacheBlk::State old_state = (blk) ? blk->status : 0; PacketList writebacks; + CacheBlk::State new_state = coherence->getNewState(pkt,old_state); + if (old_state != new_state) + DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n", + pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state); blk = tags->handleFill(blk, (MSHR*)pkt->senderState, - coherence->getNewState(pkt,old_state), - writebacks); + new_state, writebacks, pkt); while (!writebacks.empty()) { missQueue->doWriteback(writebacks.front()); + writebacks.pop_front(); } } missQueue->handleResponse(pkt, curTick + hitLatency); @@ -372,7 +389,6 @@ template<class TagStore, class Buffering, class Coherence> void Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) { - Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); MSHR *mshr = missQueue->findMSHR(blk_addr); @@ -385,7 +401,12 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) //If the outstanding request was an invalidate (upgrade,readex,..) //Then we need to ACK the request until we get the data //Also NACK if the outstanding request is not a cachefill (writeback) + assert(!(pkt->flags & SATISFIED)); + pkt->flags |= SATISFIED; pkt->flags |= NACKED_LINE; + ///@todo NACK's from other levels + //warn("NACKs from devices not connected to the same bus not implemented\n"); + //respondToSnoop(pkt, curTick + hitLatency); return; } else { @@ -398,6 +419,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) //@todo Make it so that a read to a pending read can't be exclusive now. //Set the address so find match works + //panic("Don't have invalidates yet\n"); invalidatePkt->addrOverride(pkt->getAddr()); //Append the invalidate on @@ -420,6 +442,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) if (pkt->isRead()) { //Only Upgrades don't get here //Supply the data + assert(!(pkt->flags & SATISFIED)); pkt->flags |= SATISFIED; //If we are in an exclusive protocol, make it ask again @@ -427,18 +450,18 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) pkt->flags |= SHARED_LINE; assert(pkt->isRead()); - Addr offset = pkt->getAddr() & ~(blkSize - 1); + Addr offset = pkt->getAddr() & (blkSize - 1); assert(offset < blkSize); assert(pkt->getSize() <= blkSize); assert(offset + pkt->getSize() <=blkSize); memcpy(pkt->getPtr<uint8_t>(), mshr->pkt->getPtr<uint8_t>() + offset, pkt->getSize()); - respondToSnoop(pkt); + respondToSnoop(pkt, curTick + hitLatency); } if (pkt->isInvalidate()) { //This must be an upgrade or other cache will take ownership - missQueue->markInService(mshr->pkt); + missQueue->markInService(mshr->pkt, mshr); } return; } @@ -448,10 +471,16 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) CacheBlk::State new_state; bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); if (satisfy) { + DPRINTF(Cache, "Cache snooped a %s request for addr %x and now supplying data," + "new state is %i\n", + pkt->cmdString(), blk_addr, new_state); + tags->handleSnoop(blk, new_state, pkt); - respondToSnoop(pkt); + respondToSnoop(pkt, curTick + hitLatency); return; } + if (blk) DPRINTF(Cache, "Cache snooped a %s request for addr %x, new state is %i\n", + pkt->cmdString(), blk_addr, new_state); tags->handleSnoop(blk, new_state); } @@ -486,7 +515,7 @@ Cache<TagStore,Buffering,Coherence>::invalidateBlk(Addr addr) */ template<class TagStore, class Buffering, class Coherence> Tick -Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update) +Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort* otherSidePort) { // MemDebug::cacheProbe(pkt); if (!pkt->req->isUncacheable()) { @@ -505,6 +534,10 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update) int lat; BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); + DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(), + pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", + pkt->getAddr() & ~((Addr)blkSize - 1)); + if (!blk) { // Need to check for outstanding misses and writes Addr blk_addr = pkt->getAddr() & ~(blkSize - 1); @@ -517,7 +550,8 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update) missQueue->findWrites(blk_addr, writes); if (!update) { - memSidePort->sendFunctional(pkt); + otherSidePort->sendFunctional(pkt); + // Check for data in MSHR and writebuffer. if (mshr) { warn("Found outstanding miss on an non-update probe"); @@ -596,7 +630,7 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update) // update the cache state and statistics if (mshr || !writes.empty()){ // Can't handle it, return pktuest unsatisfied. - return 0; + panic("Atomic access ran into outstanding MSHR's or WB's!"); } if (!pkt->req->isUncacheable()) { // Fetch the cache block to fill @@ -610,23 +644,46 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update) busPkt->time = curTick; + DPRINTF(Cache, "Sending a atomic %s for %x blk_addr: %x\n", + busPkt->cmdString(), + busPkt->getAddr() & (((ULL(1))<<48)-1), + busPkt->getAddr() & ~((Addr)blkSize - 1)); + lat = memSidePort->sendAtomic(busPkt); + //Be sure to flip the response to a request for coherence + if (busPkt->needsResponse()) { + busPkt->makeAtomicResponse(); + } + /* if (!(busPkt->flags & SATISFIED)) { // blocked at a higher level, just return return 0; } -*/ misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; +*/ misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; CacheBlk::State old_state = (blk) ? blk->status : 0; + CacheBlk::State new_state = coherence->getNewState(busPkt, old_state); + DPRINTF(Cache, "Receive response:%s for blk addr %x in state %i\n", + busPkt->cmdString(), + busPkt->getAddr() & (((ULL(1))<<48)-1), old_state); + if (old_state != new_state) + DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n", + busPkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state); + tags->handleFill(blk, busPkt, - coherence->getNewState(busPkt, old_state), + new_state, writebacks, pkt); + //Free the packet + delete busPkt; + // Handle writebacks if needed while (!writebacks.empty()){ - memSidePort->sendAtomic(writebacks.front()); + Packet *wbPkt = writebacks.front(); + memSidePort->sendAtomic(wbPkt); writebacks.pop_front(); + delete wbPkt; } return lat + hitLatency; } else { @@ -642,12 +699,12 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update) } if (update) { - hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; } else if (pkt->isWrite()) { // Still need to change data in all locations. - return memSidePort->sendAtomic(pkt); + otherSidePort->sendFunctional(pkt); } - return curTick + lat; + return hitLatency; } fatal("Probe not handled.\n"); return 0; @@ -655,18 +712,24 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update) template<class TagStore, class Buffering, class Coherence> Tick -Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt, bool update) +Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt) { - Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); - BlkType *blk = tags->findBlock(pkt); - MSHR *mshr = missQueue->findMSHR(blk_addr); - CacheBlk::State new_state = 0; - bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); - if (satisfy) { - tags->handleSnoop(blk, new_state, pkt); - return hitLatency; - } - tags->handleSnoop(blk, new_state); - return 0; + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); + BlkType *blk = tags->findBlock(pkt); + MSHR *mshr = missQueue->findMSHR(blk_addr); + CacheBlk::State new_state = 0; + bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); + if (satisfy) { + DPRINTF(Cache, "Cache snooped a %s request for addr %x and now supplying data," + "new state is %i\n", + pkt->cmdString(), blk_addr, new_state); + + tags->handleSnoop(blk, new_state, pkt); + return hitLatency; + } + if (blk) DPRINTF(Cache, "Cache snooped a %s request for addr %x, new state is %i\n", + pkt->cmdString(), blk_addr, new_state); + tags->handleSnoop(blk, new_state); + return 0; } diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index bcf3ce9c5..e28dda3dc 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -271,7 +271,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name, } Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq; - Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeResp : Packet::ReadExResp; + Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeReq : Packet::ReadExResp; //@todo add in hardware prefetch to this list if (protocol == "msi") { diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc index 67fc7ae56..f7aacff89 100644 --- a/src/mem/cache/miss/blocking_buffer.cc +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -123,12 +123,12 @@ BlockingBuffer::restoreOrigCmd(Packet * &pkt) } void -BlockingBuffer::markInService(Packet * &pkt) +BlockingBuffer::markInService(Packet * &pkt, MSHR* mshr) { if (!pkt->isCacheFill() && pkt->isWrite()) { // Forwarding a write/ writeback, don't need to change // the command - assert((MSHR*)pkt->senderState == &wb); + assert(mshr == &wb); cache->clearMasterRequest(Request_WB); if (!pkt->needsResponse()) { assert(wb.getNumTargets() == 0); @@ -138,7 +138,7 @@ BlockingBuffer::markInService(Packet * &pkt) wb.inService = true; } } else { - assert((MSHR*)pkt->senderState == &miss); + assert(mshr == &miss); cache->clearMasterRequest(Request_MSHR); if (!pkt->needsResponse()) { assert(miss.getNumTargets() == 0); @@ -189,7 +189,7 @@ BlockingBuffer::squash(int threadNum) if (miss.threadNum == threadNum) { Packet * target = miss.getTarget(); miss.popTarget(); - assert(target->req->getThreadNum() == threadNum); + assert(0/*target->req->getThreadNum()*/ == threadNum); target = NULL; assert(!miss.hasTargets()); miss.ntargets=0; @@ -218,7 +218,7 @@ BlockingBuffer::doWriteback(Addr addr, } ///All writebacks charged to same thread @todo figure this out - writebacks[pkt->req->getThreadNum()]++; + writebacks[0/*pkt->req->getThreadNum()*/]++; wb.allocateAsBuffer(pkt); cache->setMasterRequest(Request_WB, curTick); @@ -230,7 +230,7 @@ BlockingBuffer::doWriteback(Addr addr, void BlockingBuffer::doWriteback(Packet * &pkt) { - writebacks[pkt->req->getThreadNum()]++; + writebacks[0/*pkt->req->getThreadNum()*/]++; wb.allocateAsBuffer(pkt); diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh index 641d5a798..f7069696c 100644 --- a/src/mem/cache/miss/blocking_buffer.hh +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -152,7 +152,7 @@ public: * are successfully sent. * @param pkt The request that was sent on the bus. */ - void markInService(Packet * &pkt); + void markInService(Packet * &pkt, MSHR* mshr); /** * Frees the resources of the pktuest and unblock the cache. diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 76fb25716..c7b0e0890 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -372,7 +372,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) MSHR* MissQueue::allocateWrite(Packet * &pkt, int size, Tick time) { - MSHR* mshr = wb.allocate(pkt,blkSize); + MSHR* mshr = wb.allocate(pkt,size); mshr->order = order++; //REMOVING COMPRESSION FOR NOW @@ -413,8 +413,8 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) mshr = mq.findMatch(blkAddr); if (mshr) { //@todo remove hw_pf here - mshr_hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; - if (mshr->threadNum != pkt->req->getThreadNum()) { + mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) { mshr->threadNum = -1; } mq.allocateTarget(mshr, pkt); @@ -434,11 +434,11 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) mshr_no_allocate_misses++; } else { - mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; } } else { //Count uncacheable accesses - mshr_uncacheable[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + mshr_uncacheable[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; size = pkt->getSize(); } if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate || @@ -446,7 +446,7 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) /** * @todo Add write merging here. */ - mshr = allocateWrite(pkt, blkSize, time); + mshr = allocateWrite(pkt, pkt->getSize(), time); return; } @@ -499,7 +499,7 @@ MissQueue::getPacket() pkt = prefetcher->getPacket(); if (pkt) { //Update statistic on number of prefetches issued (hwpf_mshr_misses) - mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; //It will request the bus for the future, but should clear that immedieatley allocateMiss(pkt, pkt->getSize(), curTick); pkt = mq.getReq(); @@ -515,6 +515,14 @@ MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd) assert(pkt->senderState != 0); MSHR * mshr = (MSHR*)pkt->senderState; mshr->originalCmd = pkt->cmd; + if (cmd == Packet::UpgradeReq || cmd == Packet::InvalidateReq) { + pkt->flags |= NO_ALLOCATE; + pkt->flags &= ~CACHE_LINE_FILL; + } + else if (!pkt->req->isUncacheable() && !pkt->isNoAllocate() && + (cmd & (1 << 6)/*NeedsResponse*/)) { + pkt->flags |= CACHE_LINE_FILL; + } if (pkt->isCacheFill() || pkt->isNoAllocate()) pkt->cmd = cmd; } @@ -526,9 +534,8 @@ MissQueue::restoreOrigCmd(Packet * &pkt) } void -MissQueue::markInService(Packet * &pkt) +MissQueue::markInService(Packet * &pkt, MSHR* mshr) { - assert(pkt->senderState != 0); bool unblock = false; BlockedCause cause = NUM_BLOCKED_CAUSES; @@ -540,7 +547,7 @@ MissQueue::markInService(Packet * &pkt) // Forwarding a write/ writeback, don't need to change // the command unblock = wb.isFull(); - wb.markInService((MSHR*)pkt->senderState); + wb.markInService(mshr); if (!wb.havePending()){ cache->clearMasterRequest(Request_WB); } @@ -551,11 +558,11 @@ MissQueue::markInService(Packet * &pkt) } } else { unblock = mq.isFull(); - mq.markInService((MSHR*)pkt->senderState); + mq.markInService(mshr); if (!mq.havePending()){ cache->clearMasterRequest(Request_MSHR); } - if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) { + if (mshr->originalCmd == Packet::HardPFReq) { DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", cache->name()); //Also clear pending if need be @@ -592,7 +599,7 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) BlockedCause cause = NUM_BLOCKED_CAUSES; if (pkt->isCacheFill() && !pkt->isNoAllocate()) { - mshr_miss_latency[mshr->originalCmd][pkt->req->getThreadNum()] += + mshr_miss_latency[mshr->originalCmd][0/*pkt->req->getThreadNum()*/] += curTick - pkt->time; // targets were handled in the cache tags if (mshr == noTargetMSHR) { @@ -619,7 +626,7 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) } } else { if (pkt->req->isUncacheable()) { - mshr_uncacheable_lat[pkt->cmd][pkt->req->getThreadNum()] += + mshr_uncacheable_lat[pkt->cmd][0/*pkt->req->getThreadNum()*/] += curTick - pkt->time; } if (mshr->hasTargets() && pkt->req->isUncacheable()) { @@ -725,7 +732,7 @@ MissQueue::doWriteback(Addr addr, } ///All writebacks charged to same thread @todo figure this out - writebacks[pkt->req->getThreadNum()]++; + writebacks[0/*pkt->req->getThreadNum()*/]++; allocateWrite(pkt, 0, curTick); } @@ -734,7 +741,7 @@ MissQueue::doWriteback(Addr addr, void MissQueue::doWriteback(Packet * &pkt) { - writebacks[pkt->req->getThreadNum()]++; + writebacks[0/*pkt->req->getThreadNum()*/]++; allocateWrite(pkt, 0, curTick); } diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh index 505d1f90c..179638d2b 100644 --- a/src/mem/cache/miss/miss_queue.hh +++ b/src/mem/cache/miss/miss_queue.hh @@ -256,7 +256,7 @@ class MissQueue * are successfully sent. * @param pkt The request that was sent on the bus. */ - void markInService(Packet * &pkt); + void markInService(Packet * &pkt, MSHR* mshr); /** * Collect statistics and free resources of a satisfied pktuest. diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 519ec5ebd..455798f15 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -88,7 +88,7 @@ void MSHR::allocateAsBuffer(Packet * &target) { addr = target->getAddr(); - threadNum = target->req->getThreadNum(); + threadNum = 0/*target->req->getThreadNum()*/; pkt = new Packet(target->req, target->cmd, -1); pkt->allocate(); pkt->senderState = (Packet::SenderState*)this; @@ -100,6 +100,7 @@ MSHR::deallocate() { assert(targets.empty()); assert(ntargets == 0); + delete pkt; pkt = NULL; inService = false; //allocIter = NULL; diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index 97a56119f..1876a8987 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -128,6 +128,7 @@ MSHR* MSHRQueue::allocate(Packet * &pkt, int size) { Addr aligned_addr = pkt->getAddr() & ~((Addr)size - 1); + assert(!freeList.empty()); MSHR *mshr = freeList.front(); assert(mshr->getNumTargets() == 0); freeList.pop_front(); @@ -212,8 +213,13 @@ void MSHRQueue::markInService(MSHR* mshr) { //assert(mshr == pendingList.front()); - if (!mshr->pkt->needsResponse()) { + if (!(mshr->pkt->needsResponse() || mshr->pkt->cmd == Packet::UpgradeReq)) { assert(mshr->getNumTargets() == 0); + if ((mshr->pkt->flags & SATISFIED) && (mshr->pkt->cmd == Packet::Writeback)) { + //Writeback hit, so delete it + //otherwise the consumer will delete it + delete mshr->pkt->req; + } deallocate(mshr); return; } @@ -251,7 +257,7 @@ MSHRQueue::squash(int threadNum) Packet * target = mshr->getTarget(); mshr->popTarget(); - assert(target->req->getThreadNum() == threadNum); + assert(0/*target->req->getThreadNum()*/ == threadNum); target = NULL; } assert(!mshr->hasTargets()); diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 91298df8c..4758fda89 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -39,9 +39,18 @@ static const std::string ReadReqString("ReadReq"); static const std::string WriteReqString("WriteReq"); -static const std::string WriteReqNoAckString("WriteReqNoAck"); +static const std::string WriteReqNoAckString("WriteReqNoAck|Writeback"); static const std::string ReadRespString("ReadResp"); static const std::string WriteRespString("WriteResp"); +static const std::string SoftPFReqString("SoftPFReq"); +static const std::string SoftPFRespString("SoftPFResp"); +static const std::string HardPFReqString("HardPFReq"); +static const std::string HardPFRespString("HardPFResp"); +static const std::string InvalidateReqString("InvalidateReq"); +static const std::string WriteInvalidateReqString("WriteInvalidateReq"); +static const std::string UpgradeReqString("UpgradeReq"); +static const std::string ReadExReqString("ReadExReq"); +static const std::string ReadExRespString("ReadExResp"); static const std::string OtherCmdString("<other>"); const std::string & @@ -53,6 +62,15 @@ Packet::cmdString() const case WriteReqNoAck: return WriteReqNoAckString; case ReadResp: return ReadRespString; case WriteResp: return WriteRespString; + case SoftPFReq: return SoftPFReqString; + case SoftPFResp: return SoftPFRespString; + case HardPFReq: return HardPFReqString; + case HardPFResp: return HardPFRespString; + case InvalidateReq: return InvalidateReqString; + case WriteInvalidateReq:return WriteInvalidateReqString; + case UpgradeReq: return UpgradeReqString; + case ReadExReq: return ReadExReqString; + case ReadExResp: return ReadExRespString; default: return OtherCmdString; } } @@ -66,6 +84,15 @@ Packet::cmdIdxToString(Packet::Command idx) case WriteReqNoAck: return WriteReqNoAckString; case ReadResp: return ReadRespString; case WriteResp: return WriteRespString; + case SoftPFReq: return SoftPFReqString; + case SoftPFResp: return SoftPFRespString; + case HardPFReq: return HardPFReqString; + case HardPFResp: return HardPFRespString; + case InvalidateReq: return InvalidateReqString; + case WriteInvalidateReq:return WriteInvalidateReqString; + case UpgradeReq: return UpgradeReqString; + case ReadExReq: return ReadExReqString; + case ReadExResp: return ReadExRespString; default: return OtherCmdString; } } @@ -102,15 +129,11 @@ bool Packet::intersect(Packet *p) { Addr s1 = getAddr(); - Addr e1 = getAddr() + getSize(); + Addr e1 = getAddr() + getSize() - 1; Addr s2 = p->getAddr(); - Addr e2 = p->getAddr() + p->getSize(); + Addr e2 = p->getAddr() + p->getSize() - 1; - if (s1 >= s2 && s1 < e2) - return true; - if (e1 >= s2 && e1 < e2) - return true; - return false; + return !(s1 > e2 || e1 < s2); } bool diff --git a/src/mem/packet.hh b/src/mem/packet.hh index c7d28010c..7ede48bfd 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -58,10 +58,8 @@ typedef std::list<PacketPtr> PacketList; #define NO_ALLOCATE 1 << 5 #define SNOOP_COMMIT 1 << 6 -//For statistics we need max number of commands, hard code it at -//20 for now. @todo fix later -#define NUM_MEM_CMDS 1 << 9 - +//for now. @todo fix later +#define NUM_MEM_CMDS 1 << 11 /** * A Packet is used to encapsulate a transfer between two objects in * the memory system (e.g., the L1 and L2 cache). (In contrast, a @@ -94,7 +92,6 @@ class Packet * be called on it rather than simply delete.*/ bool arrayData; - /** The address of the request. This address could be virtual or * physical, depending on the system configuration. */ Addr addr; @@ -126,6 +123,12 @@ class Packet /** Used to calculate latencies for each packet.*/ Tick time; + /** The time at which the packet will be fully transmitted */ + Tick finishTime; + + /** The time at which the first chunk of the packet will be transmitted */ + Tick firstWordTime; + /** The special destination address indicating that the packet * should be routed based on its address. */ static const short Broadcast = -1; @@ -164,6 +167,8 @@ class Packet private: /** List of command attributes. */ + // If you add a new CommandAttribute, make sure to increase NUM_MEM_CMDS + // as well. enum CommandAttribute { IsRead = 1 << 0, @@ -174,7 +179,9 @@ class Packet IsResponse = 1 << 5, NeedsResponse = 1 << 6, IsSWPrefetch = 1 << 7, - IsHWPrefetch = 1 << 8 + IsHWPrefetch = 1 << 8, + IsUpgrade = 1 << 9, + HasData = 1 << 10 }; public: @@ -183,21 +190,23 @@ class Packet { InvalidCmd = 0, ReadReq = IsRead | IsRequest | NeedsResponse, - WriteReq = IsWrite | IsRequest | NeedsResponse, - WriteReqNoAck = IsWrite | IsRequest, - ReadResp = IsRead | IsResponse | NeedsResponse, + WriteReq = IsWrite | IsRequest | NeedsResponse | HasData, + WriteReqNoAck = IsWrite | IsRequest | HasData, + ReadResp = IsRead | IsResponse | NeedsResponse | HasData, WriteResp = IsWrite | IsResponse | NeedsResponse, - Writeback = IsWrite | IsRequest, + Writeback = IsWrite | IsRequest | HasData, SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, - SoftPFResp = IsRead | IsResponse | IsSWPrefetch | NeedsResponse, - HardPFResp = IsRead | IsResponse | IsHWPrefetch | NeedsResponse, + SoftPFResp = IsRead | IsResponse | IsSWPrefetch + | NeedsResponse | HasData, + HardPFResp = IsRead | IsResponse | IsHWPrefetch + | NeedsResponse | HasData, InvalidateReq = IsInvalidate | IsRequest, - WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, - UpgradeReq = IsInvalidate | IsRequest | NeedsResponse, - UpgradeResp = IsInvalidate | IsResponse | NeedsResponse, + WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest | HasData, + UpgradeReq = IsInvalidate | IsRequest | IsUpgrade, ReadExReq = IsRead | IsInvalidate | IsRequest | NeedsResponse, - ReadExResp = IsRead | IsInvalidate | IsResponse | NeedsResponse + ReadExResp = IsRead | IsInvalidate | IsResponse + | NeedsResponse | HasData }; /** Return the string name of the cmd field (for debugging and @@ -219,6 +228,7 @@ class Packet bool isResponse() { return (cmd & IsResponse) != 0; } bool needsResponse() { return (cmd & NeedsResponse) != 0; } bool isInvalidate() { return (cmd & IsInvalidate) != 0; } + bool hasData() { return (cmd & HasData) != 0; } bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; } bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; } @@ -312,7 +322,7 @@ class Packet * for returning as a response to that request. Used for timing * accesses only. For atomic and functional accesses, the * request packet is always implicitly passed back *without* - * modifying the command or destination fields, so this function + * modifying the destination fields, so this function * should not be called. */ void makeTimingResponse() { assert(needsResponse()); @@ -320,11 +330,31 @@ class Packet int icmd = (int)cmd; icmd &= ~(IsRequest); icmd |= IsResponse; + if (isRead()) + icmd |= HasData; + if (isWrite()) + icmd &= ~HasData; cmd = (Command)icmd; dest = src; srcValid = false; } + /** Take a request packet and modify it in place to be suitable + * for returning as a response to that request. + */ + void makeAtomicResponse() { + assert(needsResponse()); + assert(isRequest()); + int icmd = (int)cmd; + icmd &= ~(IsRequest); + icmd |= IsResponse; + if (isRead()) + icmd |= HasData; + if (isWrite()) + icmd &= ~HasData; + cmd = (Command)icmd; + } + /** Take a request packet that has been returned as NACKED and modify it so * that it can be sent out again. Only packets that need a response can be * NACKED, so verify that that is true. */ diff --git a/src/mem/physical.cc b/src/mem/physical.cc index 8fea733ec..7303f278e 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -110,28 +110,112 @@ PhysicalMemory::calculateLatency(Packet *pkt) return lat; } + + +// Add load-locked to tracking list. Should only be called if the +// operation is a load and the LOCKED flag is set. +void +PhysicalMemory::trackLoadLocked(Request *req) +{ + Addr paddr = LockedAddr::mask(req->getPaddr()); + + // first we check if we already have a locked addr for this + // xc. Since each xc only gets one, we just update the + // existing record with the new address. + list<LockedAddr>::iterator i; + + for (i = lockedAddrList.begin(); i != lockedAddrList.end(); ++i) { + if (i->matchesContext(req)) { + DPRINTF(LLSC, "Modifying lock record: cpu %d thread %d addr %#x\n", + req->getCpuNum(), req->getThreadNum(), paddr); + i->addr = paddr; + return; + } + } + + // no record for this xc: need to allocate a new one + DPRINTF(LLSC, "Adding lock record: cpu %d thread %d addr %#x\n", + req->getCpuNum(), req->getThreadNum(), paddr); + lockedAddrList.push_front(LockedAddr(req)); +} + + +// Called on *writes* only... both regular stores and +// store-conditional operations. Check for conventional stores which +// conflict with locked addresses, and for success/failure of store +// conditionals. +bool +PhysicalMemory::checkLockedAddrList(Request *req) +{ + Addr paddr = LockedAddr::mask(req->getPaddr()); + bool isLocked = req->isLocked(); + + // Initialize return value. Non-conditional stores always + // succeed. Assume conditional stores will fail until proven + // otherwise. + bool success = !isLocked; + + // Iterate over list. Note that there could be multiple matching + // records, as more than one context could have done a load locked + // to this location. + list<LockedAddr>::iterator i = lockedAddrList.begin(); + + while (i != lockedAddrList.end()) { + + if (i->addr == paddr) { + // we have a matching address + + if (isLocked && i->matchesContext(req)) { + // it's a store conditional, and as far as the memory + // system can tell, the requesting context's lock is + // still valid. + DPRINTF(LLSC, "StCond success: cpu %d thread %d addr %#x\n", + req->getCpuNum(), req->getThreadNum(), paddr); + success = true; + } + + // Get rid of our record of this lock and advance to next + DPRINTF(LLSC, "Erasing lock record: cpu %d thread %d addr %#x\n", + i->cpuNum, i->threadNum, paddr); + i = lockedAddrList.erase(i); + } + else { + // no match: advance to next record + ++i; + } + } + + if (isLocked) { + req->setScResult(success ? 1 : 0); + } + + return success; +} + void PhysicalMemory::doFunctionalAccess(Packet *pkt) { - assert(pkt->getAddr() + pkt->getSize() < params()->addrRange.size()); + assert(pkt->getAddr() + pkt->getSize() <= params()->addrRange.size()); - switch (pkt->cmd) { - case Packet::ReadReq: + if (pkt->isRead()) { + if (pkt->req->isLocked()) { + trackLoadLocked(pkt->req); + } memcpy(pkt->getPtr<uint8_t>(), pmemAddr + pkt->getAddr() - params()->addrRange.start, pkt->getSize()); - break; - case Packet::WriteReq: - memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start, - pkt->getPtr<uint8_t>(), - pkt->getSize()); - // temporary hack: will need to add real LL/SC implementation - // for cacheless systems later. - if (pkt->req->getFlags() & LOCKED) { - pkt->req->setScResult(1); + } + else if (pkt->isWrite()) { + if (writeOK(pkt->req)) { + memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start, + pkt->getPtr<uint8_t>(), pkt->getSize()); } - break; - default: + } + else if (pkt->isInvalidate()) { + //upgrade or invalidate + pkt->flags |= SATISFIED; + } + else { panic("unimplemented"); } @@ -147,7 +231,7 @@ PhysicalMemory::getPort(const std::string &if_name, int idx) port = new MemoryPort(name() + "-port", this); return port; } else if (if_name == "functional") { - /* special port for functional writes at startup. */ + /* special port for functional writes at startup. And for memtester */ return new MemoryPort(name() + "-funcport", this); } else { panic("PhysicalMemory::getPort: unknown port %s requested", if_name); diff --git a/src/mem/physical.hh b/src/mem/physical.hh index 02308b2ef..97bea2ec4 100644 --- a/src/mem/physical.hh +++ b/src/mem/physical.hh @@ -78,6 +78,68 @@ class PhysicalMemory : public MemObject const PhysicalMemory &operator=(const PhysicalMemory &specmem); protected: + + class LockedAddr { + public: + // on alpha, minimum LL/SC granularity is 16 bytes, so lower + // bits need to masked off. + static const Addr Addr_Mask = 0xf; + + static Addr mask(Addr paddr) { return (paddr & ~Addr_Mask); } + + Addr addr; // locked address + int cpuNum; // locking CPU + int threadNum; // locking thread ID within CPU + + // check for matching execution context + bool matchesContext(Request *req) + { + return (cpuNum == req->getCpuNum() && + threadNum == req->getThreadNum()); + } + + LockedAddr(Request *req) + : addr(mask(req->getPaddr())), + cpuNum(req->getCpuNum()), + threadNum(req->getThreadNum()) + { + } + }; + + std::list<LockedAddr> lockedAddrList; + + // helper function for checkLockedAddrs(): we really want to + // inline a quick check for an empty locked addr list (hopefully + // the common case), and do the full list search (if necessary) in + // this out-of-line function + bool checkLockedAddrList(Request *req); + + // Record the address of a load-locked operation so that we can + // clear the execution context's lock flag if a matching store is + // performed + void trackLoadLocked(Request *req); + + // Compare a store address with any locked addresses so we can + // clear the lock flag appropriately. Return value set to 'false' + // if store operation should be suppressed (because it was a + // conditional store and the address was no longer locked by the + // requesting execution context), 'true' otherwise. Note that + // this method must be called on *all* stores since even + // non-conditional stores must clear any matching lock addresses. + bool writeOK(Request *req) { + if (lockedAddrList.empty()) { + // no locked addrs: nothing to check, store_conditional fails + bool isLocked = req->isLocked(); + if (isLocked) { + req->setScResult(0); + } + return !isLocked; // only do write if not an sc + } else { + // iterate over list... + return checkLockedAddrList(req); + } + } + uint8_t *pmemAddr; MemoryPort *port; int pagePtr; diff --git a/src/mem/port.hh b/src/mem/port.hh index 6b4184043..bb3bc1b1b 100644 --- a/src/mem/port.hh +++ b/src/mem/port.hh @@ -106,8 +106,7 @@ class Port /** Holds the ports status. Currently just that a range recomputation needs * to be done. */ enum Status { - RangeChange, - SnoopSquash + RangeChange }; void setName(const std::string &name) diff --git a/src/mem/request.hh b/src/mem/request.hh index 6acd7526c..e54984fcd 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -232,9 +232,11 @@ class Request Addr getPC() { assert(validPC); return pc; } /** Accessor Function to Check Cacheability. */ - bool isUncacheable() { return getFlags() & UNCACHEABLE; } + bool isUncacheable() { return (getFlags() & UNCACHEABLE) != 0; } - bool isInstRead() { return getFlags() & INST_READ; } + bool isInstRead() { return (getFlags() & INST_READ) != 0; } + + bool isLocked() { return (getFlags() & LOCKED) != 0; } friend class Packet; }; diff --git a/src/mem/tport.cc b/src/mem/tport.cc index 55c301c87..456878d0a 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -47,22 +47,28 @@ SimpleTimingPort::recvTiming(Packet *pkt) // if we ever added it back. assert(pkt->result != Packet::Nacked); Tick latency = recvAtomic(pkt); - // turn packet around to go back to requester - pkt->makeTimingResponse(); - sendTimingLater(pkt, latency); + // turn packet around to go back to requester if response expected + if (pkt->needsResponse()) { + pkt->makeTimingResponse(); + sendTimingLater(pkt, latency); + } return true; } void SimpleTimingPort::recvRetry() { - bool result = true; - while (result && transmitList.size()) { - result = sendTiming(transmitList.front()); - if (result) - transmitList.pop_front(); + assert(outTiming > 0); + assert(!transmitList.empty()); + if (sendTiming(transmitList.front())) { + transmitList.pop_front(); + outTiming--; + DPRINTF(Bus, "No Longer waiting on retry\n"); + if (!transmitList.empty()) + sendTimingLater(transmitList.front(), 1); } - if (transmitList.size() == 0 && drainEvent) { + + if (transmitList.empty() && drainEvent) { drainEvent->process(); drainEvent = NULL; } @@ -71,18 +77,28 @@ SimpleTimingPort::recvRetry() void SimpleTimingPort::SendEvent::process() { - port->outTiming--; - assert(port->outTiming >= 0); - if (port->sendTiming(packet)) { - // send successfule - if (port->transmitList.size() == 0 && port->drainEvent) { + assert(port->outTiming > 0); + if (!port->transmitList.empty() && port->transmitList.front() != packet) { + //We are not the head of the list + port->transmitList.push_back(packet); + } else if (port->sendTiming(packet)) { + // send successful + if (port->transmitList.size()) { + port->transmitList.pop_front(); + port->outTiming--; + if (!port->transmitList.empty()) + port->sendTimingLater(port->transmitList.front(), 1); + } + if (port->transmitList.empty() && port->drainEvent) { port->drainEvent->process(); port->drainEvent = NULL; } } else { // send unsuccessful (due to flow control). Will get retry - // callback later; save for then. - port->transmitList.push_back(packet); + // callback later; save for then if not already + DPRINTF(Bus, "Waiting on retry\n"); + if (!(port->transmitList.front() == packet)) + port->transmitList.push_back(packet); } } diff --git a/src/python/m5/objects/BaseCPU.py b/src/python/m5/objects/BaseCPU.py index 0b887cceb..b6dc08e46 100644 --- a/src/python/m5/objects/BaseCPU.py +++ b/src/python/m5/objects/BaseCPU.py @@ -11,10 +11,11 @@ class BaseCPU(SimObject): mem = Param.MemObject("memory") system = Param.System(Parent.any, "system object") + cpu_id = Param.Int("CPU identifier") + if build_env['FULL_SYSTEM']: dtb = Param.AlphaDTB(AlphaDTB(), "Data TLB") itb = Param.AlphaITB(AlphaITB(), "Instruction TLB") - cpu_id = Param.Int(-1, "CPU identifier") else: workload = VectorParam.Process("processes to run") diff --git a/src/python/m5/objects/Bus.py b/src/python/m5/objects/Bus.py index f6828a0d5..6710111e5 100644 --- a/src/python/m5/objects/Bus.py +++ b/src/python/m5/objects/Bus.py @@ -6,3 +6,5 @@ class Bus(MemObject): port = VectorPort("vector port for connecting devices") default = Port("Default port for requests that aren't handeled by a device.") bus_id = Param.Int(0, "blah") + clock = Param.Clock("1GHz", "bus clock speed") + width = Param.Int(64, "bus width (bytes)") diff --git a/src/python/m5/objects/FUPool.py b/src/python/m5/objects/FUPool.py index 4b4be79a6..916183bd7 100644 --- a/src/python/m5/objects/FUPool.py +++ b/src/python/m5/objects/FUPool.py @@ -1,6 +1,12 @@ from m5.SimObject import SimObject from m5.params import * +from FuncUnit import * +from FuncUnitConfig import * class FUPool(SimObject): type = 'FUPool' FUList = VectorParam.FUDesc("list of FU's for this pool") + +class DefaultFUPool(FUPool): + FUList = [ IntALU(), IntMultDiv(), FP_ALU(), FP_MultDiv(), ReadPort(), + WritePort(), RdWrPort(), IprPort() ] diff --git a/src/python/m5/objects/FuncUnitConfig.py b/src/python/m5/objects/FuncUnitConfig.py new file mode 100644 index 000000000..43d7a4bb7 --- /dev/null +++ b/src/python/m5/objects/FuncUnitConfig.py @@ -0,0 +1,41 @@ +from m5.SimObject import SimObject +from m5.params import * +from FuncUnit import * + +class IntALU(FUDesc): + opList = [ OpDesc(opClass='IntAlu') ] + count = 6 + +class IntMultDiv(FUDesc): + opList = [ OpDesc(opClass='IntMult', opLat=3), + OpDesc(opClass='IntDiv', opLat=20, issueLat=19) ] + count=2 + +class FP_ALU(FUDesc): + opList = [ OpDesc(opClass='FloatAdd', opLat=2), + OpDesc(opClass='FloatCmp', opLat=2), + OpDesc(opClass='FloatCvt', opLat=2) ] + count = 4 + +class FP_MultDiv(FUDesc): + opList = [ OpDesc(opClass='FloatMult', opLat=4), + OpDesc(opClass='FloatDiv', opLat=12, issueLat=12), + OpDesc(opClass='FloatSqrt', opLat=24, issueLat=24) ] + count = 2 + +class ReadPort(FUDesc): + opList = [ OpDesc(opClass='MemRead') ] + count = 0 + +class WritePort(FUDesc): + opList = [ OpDesc(opClass='MemWrite') ] + count = 0 + +class RdWrPort(FUDesc): + opList = [ OpDesc(opClass='MemRead'), OpDesc(opClass='MemWrite') ] + count = 4 + +class IprPort(FUDesc): + opList = [ OpDesc(opClass='IprAccess', opLat = 3, issueLat = 3) ] + count = 1 + diff --git a/src/python/m5/objects/MemTest.py b/src/python/m5/objects/MemTest.py index 97600768f..83399be80 100644 --- a/src/python/m5/objects/MemTest.py +++ b/src/python/m5/objects/MemTest.py @@ -1,13 +1,13 @@ from m5.SimObject import SimObject from m5.params import * +from m5.proxy import * +from m5 import build_env + class MemTest(SimObject): type = 'MemTest' - cache = Param.BaseCache("L1 cache") - check_mem = Param.FunctionalMemory("check memory") - main_mem = Param.FunctionalMemory("hierarchical memory") max_loads = Param.Counter("number of loads to execute") + atomic = Param.Bool(False, "Execute tester in atomic mode? (or timing)\n") memory_size = Param.Int(65536, "memory size") - percent_copies = Param.Percent(0, "target copy percentage") percent_dest_unaligned = Param.Percent(50, "percent of copy dest address that are unaligned") percent_reads = Param.Percent(65, "target read percentage") @@ -18,3 +18,6 @@ class MemTest(SimObject): progress_interval = Param.Counter(1000000, "progress report interval (in accesses)") trace_addr = Param.Addr(0, "address to trace") + + test = Port("Port to the memory system to test") + functional = Port("Port to the functional memory used for verification") diff --git a/src/python/m5/objects/O3CPU.py b/src/python/m5/objects/O3CPU.py index 59b40c6e8..20eef383f 100644 --- a/src/python/m5/objects/O3CPU.py +++ b/src/python/m5/objects/O3CPU.py @@ -3,6 +3,7 @@ from m5.proxy import * from m5 import build_env from BaseCPU import BaseCPU from Checker import O3Checker +from FUPool import * class DerivO3CPU(BaseCPU): type = 'DerivO3CPU' @@ -14,11 +15,13 @@ class DerivO3CPU(BaseCPU): if build_env['USE_CHECKER']: if not build_env['FULL_SYSTEM']: checker = Param.BaseCPU(O3Checker(workload=Parent.workload, - exitOnError=True, + exitOnError=False, + updateOnError=True, warnOnlyOnLoadError=False), "checker") else: - checker = Param.BaseCPU(O3Checker(exitOnError=True, warnOnlyOnLoadError=False), "checker") + checker = Param.BaseCPU(O3Checker(exitOnError=False, updateOnError=True, + warnOnlyOnLoadError=False), "checker") checker.itb = Parent.itb checker.dtb = Parent.dtb @@ -57,7 +60,7 @@ class DerivO3CPU(BaseCPU): issueWidth = Param.Unsigned(8, "Issue width") wbWidth = Param.Unsigned(8, "Writeback width") wbDepth = Param.Unsigned(1, "Writeback depth") - fuPool = Param.FUPool("Functional Unit pool") + fuPool = Param.FUPool(DefaultFUPool(), "Functional Unit pool") iewToCommitDelay = Param.Unsigned(1, "Issue/Execute/Writeback to commit " "delay") @@ -77,7 +80,7 @@ class DerivO3CPU(BaseCPU): localHistoryBits = Param.Unsigned(11, "Bits for the local history") globalPredictorSize = Param.Unsigned(8192, "Size of global predictor") globalCtrBits = Param.Unsigned(2, "Bits per counter") - globalHistoryBits = Param.Unsigned(4096, "Bits of history") + globalHistoryBits = Param.Unsigned(13, "Bits of history") choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor") choiceCtrBits = Param.Unsigned(2, "Bits of choice counters") diff --git a/src/python/m5/objects/PhysicalMemory.py b/src/python/m5/objects/PhysicalMemory.py index dd3ffd651..4e097543d 100644 --- a/src/python/m5/objects/PhysicalMemory.py +++ b/src/python/m5/objects/PhysicalMemory.py @@ -5,6 +5,7 @@ from MemObject import * class PhysicalMemory(MemObject): type = 'PhysicalMemory' port = Port("the access port") + functional = Port("Functional Access Port") range = Param.AddrRange(AddrRange('128MB'), "Device Address") file = Param.String('', "memory mapped file") latency = Param.Latency(Parent.clock, "latency of an access") diff --git a/src/python/m5/params.py b/src/python/m5/params.py index cbbd23004..93d784181 100644 --- a/src/python/m5/params.py +++ b/src/python/m5/params.py @@ -804,7 +804,7 @@ class PortRef(object): newRef.simobj = simobj assert(isSimObject(newRef.simobj)) if self.peer and not proxy.isproxy(self.peer): - peerObj = memo[self.peer.simobj] + peerObj = self.peer.simobj(_memo=memo) newRef.peer = self.peer.clone(peerObj, memo) assert(not isinstance(newRef.peer, VectorPortRef)) return newRef diff --git a/src/python/m5/proxy.py b/src/python/m5/proxy.py index 7ebc0ae19..e539f14ee 100644 --- a/src/python/m5/proxy.py +++ b/src/python/m5/proxy.py @@ -33,6 +33,8 @@ # ##################################################################### +import copy + class BaseProxy(object): def __init__(self, search_self, search_up): self._search_self = search_self @@ -129,15 +131,22 @@ class AttrProxy(BaseProxy): return super(AttrProxy, self).__getattr__(self, attr) if hasattr(self, '_pdesc'): raise AttributeError, "Attribute reference on bound proxy" - self._modifiers.append(attr) - return self + # Return a copy of self rather than modifying self in place + # since self could be an indirect reference via a variable or + # parameter + new_self = copy.deepcopy(self) + new_self._modifiers.append(attr) + return new_self # support indexing on proxies (e.g., Self.cpu[0]) def __getitem__(self, key): if not isinstance(key, int): raise TypeError, "Proxy object requires integer index" - self._modifiers.append(key) - return self + if hasattr(self, '_pdesc'): + raise AttributeError, "Index operation on bound proxy" + new_self = copy.deepcopy(self) + new_self._modifiers.append(key) + return new_self def find(self, obj): try: diff --git a/src/sim/main.cc b/src/sim/main.cc index 5725897f8..874d0ac85 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -317,8 +317,8 @@ simulate(Tick num_cycles = -1) else num_cycles = curTick + num_cycles; - Event *limit_event = new SimLoopExitEvent(num_cycles, - "simulate() limit reached"); + Event *limit_event = schedExitSimLoop("simulate() limit reached", + num_cycles); while (1) { // there should always be at least one event (the SimLoopExitEvent @@ -414,7 +414,12 @@ unserializeAll(const std::string &cpt_dir) /** * Queue of C++ callbacks to invoke on simulator exit. */ -CallbackQueue exitCallbacks; +CallbackQueue& +exitCallbacks() +{ + static CallbackQueue theQueue; + return theQueue; +} /** * Register an exit callback. @@ -422,7 +427,7 @@ CallbackQueue exitCallbacks; void registerExitCallback(Callback *callback) { - exitCallbacks.add(callback); + exitCallbacks().add(callback); } BaseCPU * @@ -442,8 +447,8 @@ convertToBaseCPUPtr(SimObject *obj) void doExitCleanup() { - exitCallbacks.process(); - exitCallbacks.clear(); + exitCallbacks().process(); + exitCallbacks().clear(); cout.flush(); diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index b66c78b2c..addf897c6 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -138,14 +138,14 @@ namespace AlphaPseudo void m5exit_old(ThreadContext *tc) { - exitSimLoop(curTick, "m5_exit_old instruction encountered"); + exitSimLoop("m5_exit_old instruction encountered"); } void m5exit(ThreadContext *tc, Tick delay) { Tick when = curTick + delay * Clock::Int::ns; - exitSimLoop(when, "m5_exit instruction encountered"); + schedExitSimLoop("m5_exit instruction encountered", when); } void @@ -270,7 +270,11 @@ namespace AlphaPseudo { if (!doCheckpointInsts) return; - exitSimLoop("checkpoint"); + + Tick when = curTick + delay * Clock::Int::ns; + Tick repeat = period * Clock::Int::ns; + + schedExitSimLoop("checkpoint", when, repeat); } uint64_t diff --git a/src/sim/root.cc b/src/sim/root.cc index ec5e2f7e2..565b57269 100644 --- a/src/sim/root.cc +++ b/src/sim/root.cc @@ -100,7 +100,7 @@ void Root::startup() { if (max_tick != 0) - exitSimLoop(curTick + max_tick, "reached maximum cycle count"); + schedExitSimLoop("reached maximum cycle count", curTick + max_tick); if (progress_interval != 0) new ProgressEvent(&mainEventQueue, progress_interval); diff --git a/src/sim/sim_events.cc b/src/sim/sim_events.cc index d9e8bdeaa..2ccc9dad2 100644 --- a/src/sim/sim_events.cc +++ b/src/sim/sim_events.cc @@ -57,6 +57,11 @@ SimLoopExitEvent::process() // otherwise do nothing... the IsExitEvent flag takes care of // exiting the simulation loop and returning this object to Python + + // but if you are doing this on intervals, don't forget to make another + if (repeat) { + schedule(curTick + repeat); + } } @@ -66,16 +71,20 @@ SimLoopExitEvent::description() return "simulation loop exit"; } -void -exitSimLoop(Tick when, const std::string &message, int exit_code) +SimLoopExitEvent * +schedExitSimLoop(const std::string &message, Tick when, Tick repeat, + EventQueue *q, int exit_code) { - new SimLoopExitEvent(when, message, exit_code); + if (q == NULL) + q = &mainEventQueue; + + return new SimLoopExitEvent(q, when, repeat, message, exit_code); } void exitSimLoop(const std::string &message, int exit_code) { - exitSimLoop(curTick, message, exit_code); + schedExitSimLoop(message, curTick, 0, NULL, exit_code); } void diff --git a/src/sim/sim_events.hh b/src/sim/sim_events.hh index 3c4a9dd05..e1576b38c 100644 --- a/src/sim/sim_events.hh +++ b/src/sim/sim_events.hh @@ -42,6 +42,7 @@ class SimLoopExitEvent : public Event // string explaining why we're terminating std::string cause; int code; + Tick repeat; public: // Default constructor. Only really used for derived classes. @@ -49,16 +50,19 @@ class SimLoopExitEvent : public Event : Event(&mainEventQueue, Sim_Exit_Pri) { } - SimLoopExitEvent(Tick _when, const std::string &_cause, int c = 0) - : Event(&mainEventQueue, Sim_Exit_Pri), cause(_cause), - code(c) - { setFlags(IsExitEvent); schedule(_when); } - SimLoopExitEvent(EventQueue *q, - Tick _when, const std::string &_cause, int c = 0) - : Event(q, Sim_Exit_Pri), cause(_cause), code(c) + Tick _when, Tick _repeat, const std::string &_cause, + int c = 0) + : Event(q, Sim_Exit_Pri), cause(_cause), + code(c), repeat(_repeat) { setFlags(IsExitEvent); schedule(_when); } +// SimLoopExitEvent(EventQueue *q, +// Tick _when, const std::string &_cause, +// Tick _repeat = 0, int c = 0) +// : Event(q, Sim_Exit_Pri), cause(_cause), code(c), repeat(_repeat) +// { setFlags(IsExitEvent); schedule(_when); } + std::string getCause() { return cause; } int getCode() { return code; } diff --git a/src/sim/sim_exit.hh b/src/sim/sim_exit.hh index 545bf4ae0..d4b31d1ea 100644 --- a/src/sim/sim_exit.hh +++ b/src/sim/sim_exit.hh @@ -38,6 +38,8 @@ // forward declaration class Callback; +class EventQueue; +class SimLoopExitEvent; /// Register a callback to be called when Python exits. Defined in /// sim/main.cc. @@ -47,12 +49,14 @@ void registerExitCallback(Callback *); /// Python) at the indicated tick. The message and exit_code /// parameters are saved in the SimLoopExitEvent to indicate why the /// exit occurred. -void exitSimLoop(Tick when, const std::string &message, int exit_code = 0); +SimLoopExitEvent *schedExitSimLoop(const std::string &message, Tick when, + Tick repeat = 0, EventQueue *q = NULL, + int exit_code = 0); /// Schedule an event to exit the simulation loop (returning to /// Python) at the end of the current cycle (curTick). The message /// and exit_code parameters are saved in the SimLoopExitEvent to /// indicate why the exit occurred. -void exitSimLoop(const std::string &cause, int exit_code = 0); +void exitSimLoop(const std::string &message, int exit_code = 0); #endif // __SIM_EXIT_HH__ diff --git a/src/sim/stat_control.cc b/src/sim/stat_control.cc index dfed2a0c8..3fad8beb5 100644 --- a/src/sim/stat_control.cc +++ b/src/sim/stat_control.cc @@ -186,7 +186,7 @@ StatEvent::process() DumpNow(); if (flags & Stats::Reset) { - cprintf("Resetting stats!\n"); + cprintf("Resetting stats at cycle %d!\n", curTick); reset(); } diff --git a/src/sim/system.cc b/src/sim/system.cc index ad70b9b03..11ae492b9 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -219,6 +219,8 @@ System::new_page() { Addr return_addr = page_ptr << LogVMPageSize; ++page_ptr; + if (return_addr >= physmem->size()) + fatal("Out of memory, please increase size of physical memory."); return return_addr; } #endif diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py new file mode 100644 index 000000000..116e71af6 --- /dev/null +++ b/tests/configs/memtest.py @@ -0,0 +1,95 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +import m5 +from m5.objects import * + +# -------------------- +# Base L1 Cache +# ==================== + +class L1(BaseCache): + latency = 1 + block_size = 64 + mshrs = 12 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol='moesi') + +# ---------------------- +# Base L2 Cache +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = 10 + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +#MAX CORES IS 8 with the fals sharing method +nb_cores = 8 +cpus = [ MemTest(max_loads=1e12, percent_uncacheable=0, progress_interval=1000) for i in xrange(nb_cores) ] + +# system simulated +system = System(cpu = cpus, funcmem = PhysicalMemory(), + physmem = PhysicalMemory(), membus = Bus(clock="500GHz", width=16)) + +# l2cache & bus +system.toL2Bus = Bus(clock="500GHz", width=16) +system.l2c = L2(size='64kB', assoc=8) +system.l2c.cpu_side = system.toL2Bus.port + +# connect l2c to membus +system.l2c.mem_side = system.membus.port + +which_port = 0 +# add L1 caches +for cpu in cpus: + cpu.l1c = L1(size = '32kB', assoc = 4) + cpu.l1c.cpu_side = cpu.test + cpu.l1c.mem_side = system.toL2Bus.port + if which_port == 0: + system.funcmem.port = cpu.functional + which_port = 1 + else: + system.funcmem.functional = cpu.functional + + +# connect memory to membus +system.physmem.port = system.membus.port + + +# ----------------------- +# run simulation +# ----------------------- + +root = Root( system = system ) +root.system.mem_mode = 'timing' +#root.trace.flags="Cache CachePort Bus" +#root.trace.cycle=3810800 + diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py new file mode 100644 index 000000000..68631b3d2 --- /dev/null +++ b/tests/configs/o3-timing-mp.py @@ -0,0 +1,89 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +import m5 +from m5.objects import * +m5.AddToPath('../configs/common') + +# -------------------- +# Base L1 Cache +# ==================== + +class L1(BaseCache): + latency = 1 + block_size = 64 + mshrs = 4 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol='moesi') + +# ---------------------- +# Base L2 Cache +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = 100 + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +nb_cores = 4 +cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(nb_cores) ] + +# system simulated +system = System(cpu = cpus, physmem = PhysicalMemory(), membus = +Bus()) + +# l2cache & bus +system.toL2Bus = Bus() +system.l2c = L2(size='4MB', assoc=8) +system.l2c.cpu_side = system.toL2Bus.port + +# connect l2c to membus +system.l2c.mem_side = system.membus.port + +# add L1 caches +for cpu in cpus: + cpu.addPrivateSplitL1Caches(L1(size = '32kB', assoc = 1), + L1(size = '32kB', assoc = 4)) + cpu.mem = cpu.dcache + # connect cpu level-1 caches to shared level-2 cache + cpu.connectMemPorts(system.toL2Bus) + +# connect memory to membus +system.physmem.port = system.membus.port + + +# ----------------------- +# run simulation +# ----------------------- + +root = Root( system = system ) +root.system.mem_mode = 'timing' +#root.trace.flags="Bus Cache" +#root.trace.flags = "BusAddrRanges" diff --git a/tests/configs/o3-timing.py b/tests/configs/o3-timing.py index 227e1ba21..0dd7be506 100644 --- a/tests/configs/o3-timing.py +++ b/tests/configs/o3-timing.py @@ -29,7 +29,6 @@ import m5 from m5.objects import * m5.AddToPath('../configs/common') -from FullO3Config import * class MyCache(BaseCache): assoc = 2 @@ -38,7 +37,7 @@ class MyCache(BaseCache): mshrs = 10 tgts_per_mshr = 5 -cpu = DetailedO3CPU() +cpu = DerivO3CPU() cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), MyCache(size = '2MB')) cpu.mem = cpu.dcache diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py new file mode 100644 index 000000000..eaa6ec66e --- /dev/null +++ b/tests/configs/simple-atomic-mp.py @@ -0,0 +1,86 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +import m5 +from m5.objects import * + +# -------------------- +# Base L1 Cache +# ==================== + +class L1(BaseCache): + latency = 1 + block_size = 64 + mshrs = 4 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol='moesi') + +# ---------------------- +# Base L2 Cache +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = 100 + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +nb_cores = 4 +cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(nb_cores) ] + +# system simulated +system = System(cpu = cpus, physmem = PhysicalMemory(range = AddrRange('1024MB')), membus = +Bus()) + +# l2cache & bus +system.toL2Bus = Bus() +system.l2c = L2(size='4MB', assoc=8) +system.l2c.cpu_side = system.toL2Bus.port + +# connect l2c to membus +system.l2c.mem_side = system.membus.port + +# add L1 caches +for cpu in cpus: + cpu.addPrivateSplitL1Caches(L1(size = '32kB', assoc = 1), + L1(size = '32kB', assoc = 4)) + cpu.mem = cpu.dcache + # connect cpu level-1 caches to shared level-2 cache + cpu.connectMemPorts(system.toL2Bus) + +# connect memory to membus +system.physmem.port = system.membus.port + + +# ----------------------- +# run simulation +# ----------------------- + +root = Root( system = system ) +root.system.mem_mode = 'atomic' diff --git a/tests/configs/simple-atomic.py b/tests/configs/simple-atomic.py index 2bf67f3b1..d35ac4ae0 100644 --- a/tests/configs/simple-atomic.py +++ b/tests/configs/simple-atomic.py @@ -29,7 +29,7 @@ import m5 from m5.objects import * -system = System(cpu = AtomicSimpleCPU(), +system = System(cpu = AtomicSimpleCPU(cpu_id=0), physmem = PhysicalMemory(), membus = Bus()) system.physmem.port = system.membus.port diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py new file mode 100644 index 000000000..8f9ab0dde --- /dev/null +++ b/tests/configs/simple-timing-mp.py @@ -0,0 +1,86 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +import m5 +from m5.objects import * + +# -------------------- +# Base L1 Cache +# ==================== + +class L1(BaseCache): + latency = 1 + block_size = 64 + mshrs = 4 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol='moesi') + +# ---------------------- +# Base L2 Cache +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = 100 + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +nb_cores = 4 +cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(nb_cores) ] + +# system simulated +system = System(cpu = cpus, physmem = PhysicalMemory(), membus = +Bus()) + +# l2cache & bus +system.toL2Bus = Bus() +system.l2c = L2(size='4MB', assoc=8) +system.l2c.cpu_side = system.toL2Bus.port + +# connect l2c to membus +system.l2c.mem_side = system.membus.port + +# add L1 caches +for cpu in cpus: + cpu.addPrivateSplitL1Caches(L1(size = '32kB', assoc = 1), + L1(size = '32kB', assoc = 4)) + cpu.mem = cpu.dcache + # connect cpu level-1 caches to shared level-2 cache + cpu.connectMemPorts(system.toL2Bus) + +# connect memory to membus +system.physmem.port = system.membus.port + + +# ----------------------- +# run simulation +# ----------------------- + +root = Root( system = system ) +root.system.mem_mode = 'timing' diff --git a/tests/configs/simple-timing.py b/tests/configs/simple-timing.py index 7bb76db0e..60190b47c 100644 --- a/tests/configs/simple-timing.py +++ b/tests/configs/simple-timing.py @@ -36,7 +36,7 @@ class MyCache(BaseCache): mshrs = 10 tgts_per_mshr = 5 -cpu = TimingSimpleCPU() +cpu = TimingSimpleCPU(cpu_id=0) cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), MyCache(size = '2MB')) cpu.mem = cpu.dcache diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py index e3945f7dc..f798213db 100644 --- a/tests/configs/tsunami-simple-atomic-dual.py +++ b/tests/configs/tsunami-simple-atomic-dual.py @@ -34,7 +34,7 @@ import FSConfig AlphaConsole.cpu = Parent.cpu[0] IntrControl.cpu = Parent.cpu[0] -cpus = [ AtomicSimpleCPU() for i in xrange(2) ] +cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ] system = FSConfig.makeLinuxAlphaSystem('atomic') system.cpu = cpus for c in cpus: diff --git a/tests/configs/tsunami-simple-atomic.py b/tests/configs/tsunami-simple-atomic.py index ca1dd5c77..623d285e4 100644 --- a/tests/configs/tsunami-simple-atomic.py +++ b/tests/configs/tsunami-simple-atomic.py @@ -31,7 +31,7 @@ from m5.objects import * m5.AddToPath('../configs/common') import FSConfig -cpu = AtomicSimpleCPU() +cpu = AtomicSimpleCPU(cpu_id=0) system = FSConfig.makeLinuxAlphaSystem('atomic') system.cpu = cpu cpu.connectMemPorts(system.membus) diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py index 967d6a2d2..bf94214fd 100644 --- a/tests/configs/tsunami-simple-timing-dual.py +++ b/tests/configs/tsunami-simple-timing-dual.py @@ -34,7 +34,7 @@ import FSConfig AlphaConsole.cpu = Parent.cpu[0] IntrControl.cpu = Parent.cpu[0] -cpus = [ TimingSimpleCPU() for i in xrange(2) ] +cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(2) ] system = FSConfig.makeLinuxAlphaSystem('timing') system.cpu = cpus for c in cpus: diff --git a/tests/configs/tsunami-simple-timing.py b/tests/configs/tsunami-simple-timing.py index b3fc9d105..2edf5ac32 100644 --- a/tests/configs/tsunami-simple-timing.py +++ b/tests/configs/tsunami-simple-timing.py @@ -31,7 +31,7 @@ from m5.objects import * m5.AddToPath('../configs/common') import FSConfig -cpu = TimingSimpleCPU() +cpu = TimingSimpleCPU(cpu_id=0) system = FSConfig.makeLinuxAlphaSystem('timing') system.cpu = cpu cpu.connectMemPorts(system.membus) diff --git a/tests/quick/00.hello.mp/test.py b/tests/quick/00.hello.mp/test.py new file mode 100644 index 000000000..91fbfb7ed --- /dev/null +++ b/tests/quick/00.hello.mp/test.py @@ -0,0 +1,44 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +# workload +benchmarks = [ + "tests/test-progs/hello/bin/alpha/linux/hello", "'hello'", + "tests/test-progs/hello/bin/alpha/linux/hello", "'hello'", + "tests/test-progs/hello/bin/alpha/linux/hello", "'hello'", + "tests/test-progs/hello/bin/alpha/linux/hello", "'hello'", + ] + +for i, cpu in zip(range(len(cpus)), root.system.cpu): + p = LiveProcess() + p.executable = benchmarks[i*2] + p.cmd = benchmarks[(i*2)+1] + root.system.cpu[i].workload = p + root.system.cpu[i].max_insts_all_threads = 10000000 +#root.system.cpu.workload = LiveProcess(cmd = 'hello', + # executable = binpath('hello')) diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini index c3a59fbce..86e688c3d 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini @@ -110,6 +110,7 @@ numROBEntries=192 numRobs=1 numThreads=1 predType=tournament +progress_interval=0 renameToDecodeDelay=1 renameToFetchDelay=1 renameToIEWDelay=2 @@ -384,20 +385,30 @@ mem_side=system.membus.port[1] [system.cpu.toL2Bus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side [system.cpu.workload] type=LiveProcess cmd=hello +egid=100 env= +euid=100 executable=tests/test-progs/hello/bin/alpha/linux/hello +gid=100 input=cin output=cout +pid=100 +ppid=99 system=system +uid=100 [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.physmem.port system.cpu.l2cache.mem_side [system.physmem] @@ -409,6 +420,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out index f491a3081..1b8e6d980 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out @@ -19,6 +19,8 @@ mem_mode=atomic [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.workload] type=LiveProcess @@ -28,6 +30,12 @@ input=cin output=cout env= system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 [system.cpu.dcache] type=BaseCache @@ -208,6 +216,7 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 cachePorts=200 decodeToFetchDelay=1 renameToFetchDelay=1 @@ -354,10 +363,13 @@ hit_latency=1 [system.cpu.toL2Bus] type=Bus bus_id=0 +clock=1000 +width=64 [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -401,3 +413,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt index 5d4f9235a..59cda42d9 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt @@ -2,39 +2,39 @@ ---------- Begin Simulation Statistics ---------- global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. global.BPredUnit.BTBHits 542 # Number of BTB hits -global.BPredUnit.BTBLookups 1938 # Number of BTB lookups +global.BPredUnit.BTBLookups 1936 # Number of BTB lookups global.BPredUnit.RASInCorrect 48 # Number of incorrect RAS predictions. global.BPredUnit.condIncorrect 420 # Number of conditional branches incorrect -global.BPredUnit.condPredicted 1304 # Number of conditional branches predicted -global.BPredUnit.lookups 2256 # Number of BP lookups +global.BPredUnit.condPredicted 1302 # Number of conditional branches predicted +global.BPredUnit.lookups 2254 # Number of BP lookups global.BPredUnit.usedRAS 291 # Number of times the RAS was used to get a target. -host_inst_rate 41797 # Simulator instruction rate (inst/s) -host_mem_usage 160344 # Number of bytes of host memory used -host_seconds 0.13 # Real time elapsed on the host -host_tick_rate 50948 # Simulator tick rate (ticks/s) +host_inst_rate 46995 # Simulator instruction rate (inst/s) +host_mem_usage 160420 # Number of bytes of host memory used +host_seconds 0.12 # Real time elapsed on the host +host_tick_rate 57256 # Simulator tick rate (ticks/s) memdepunit.memDep.conflictingLoads 12 # Number of conflicting loads. memdepunit.memDep.conflictingStores 259 # Number of conflicting stores. -memdepunit.memDep.insertedLoads 2050 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedLoads 2049 # Number of loads inserted to the mem dependence unit. memdepunit.memDep.insertedStores 1221 # Number of stores inserted to the mem dependence unit. sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5623 # Number of instructions simulated sim_seconds 0.000000 # Number of seconds simulated -sim_ticks 6870 # Number of ticks simulated +sim_ticks 6868 # Number of ticks simulated system.cpu.commit.COM:branches 862 # Number of branches committed system.cpu.commit.COM:bw_lim_events 74 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 6116 +system.cpu.commit.COM:committed_per_cycle.samples 6115 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 3908 6389.80% - 1 1064 1739.70% - 2 389 636.04% - 3 210 343.36% - 4 153 250.16% - 5 93 152.06% - 6 76 124.26% - 7 149 243.62% - 8 74 120.99% + 0 3908 6390.84% + 1 1063 1738.35% + 2 389 636.14% + 3 210 343.42% + 4 152 248.57% + 5 94 153.72% + 6 76 124.28% + 7 149 243.66% + 8 74 121.01% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -46,67 +46,67 @@ system.cpu.commit.COM:swp_count 0 # Nu system.cpu.commit.branchMispredicts 337 # The number of times a branch was mispredicted system.cpu.commit.commitCommittedInsts 5640 # The number of committed instructions system.cpu.commit.commitNonSpecStalls 17 # The number of times commit has been forced to stall to communicate backwards -system.cpu.commit.commitSquashedInsts 4350 # The number of squashed insts skipped by commit +system.cpu.commit.commitSquashedInsts 4342 # The number of squashed insts skipped by commit system.cpu.committedInsts 5623 # Number of Instructions Simulated system.cpu.committedInsts_total 5623 # Number of Instructions Simulated -system.cpu.cpi 1.221768 # CPI: Cycles Per Instruction -system.cpu.cpi_total 1.221768 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 1538 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 3.072000 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2.240000 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 1413 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 384 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.081274 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 125 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 25 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 224 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.065020 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 100 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 821 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 2.467742 # average WriteReq miss latency +system.cpu.cpi 1.221412 # CPI: Cycles Per Instruction +system.cpu.cpi_total 1.221412 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 1536 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 3.038760 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2.235294 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 1407 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 392 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.083984 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 129 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 27 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 228 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.066406 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 102 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 812 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 2.564246 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 2.140845 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 635 # number of WriteReq hits +system.cpu.dcache.WriteReq_hits 633 # number of WriteReq hits system.cpu.dcache.WriteReq_miss_latency 459 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.226553 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 186 # number of WriteReq misses +system.cpu.dcache.WriteReq_miss_rate 0.220443 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 179 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_hits 108 # number of WriteReq MSHR hits system.cpu.dcache.WriteReq_mshr_miss_latency 152 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.086480 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_miss_rate 0.087438 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 71 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets 0.800000 # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 11.505618 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 11.791908 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 5 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 4 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 2359 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 2.710611 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 2.198830 # average overall mshr miss latency -system.cpu.dcache.demand_hits 2048 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 843 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.131836 # miss rate for demand accesses -system.cpu.dcache.demand_misses 311 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 133 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 376 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.072488 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 171 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_accesses 2348 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 2.762987 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 2.196532 # average overall mshr miss latency +system.cpu.dcache.demand_hits 2040 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 851 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.131175 # miss rate for demand accesses +system.cpu.dcache.demand_misses 308 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 135 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 380 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.073680 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 173 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 2359 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 2.710611 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 2.198830 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 2348 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 2.762987 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 2.196532 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 2048 # number of overall hits -system.cpu.dcache.overall_miss_latency 843 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.131836 # miss rate for overall accesses -system.cpu.dcache.overall_misses 311 # number of overall misses -system.cpu.dcache.overall_mshr_hits 133 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 376 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.072488 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 171 # number of overall MSHR misses +system.cpu.dcache.overall_hits 2040 # number of overall hits +system.cpu.dcache.overall_miss_latency 851 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.131175 # miss rate for overall accesses +system.cpu.dcache.overall_misses 308 # number of overall misses +system.cpu.dcache.overall_mshr_hits 135 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 380 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.073680 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 173 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -119,43 +119,43 @@ system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.dcache.replacements 0 # number of replacements -system.cpu.dcache.sampled_refs 178 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 173 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 119.831029 # Cycle average of tags in use -system.cpu.dcache.total_refs 2048 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 114.960547 # Cycle average of tags in use +system.cpu.dcache.total_refs 2040 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.decode.DECODE:BlockedCycles 387 # Number of cycles decode is blocked system.cpu.decode.DECODE:BranchMispred 93 # Number of times decode detected a branch misprediction system.cpu.decode.DECODE:BranchResolved 185 # Number of times decode resolved a branch system.cpu.decode.DECODE:DecodedInsts 12349 # Number of instructions handled by decode -system.cpu.decode.DECODE:IdleCycles 3542 # Number of cycles decode is idle +system.cpu.decode.DECODE:IdleCycles 3541 # Number of cycles decode is idle system.cpu.decode.DECODE:RunCycles 2158 # Number of cycles decode is running -system.cpu.decode.DECODE:SquashCycles 754 # Number of cycles decode is squashing +system.cpu.decode.DECODE:SquashCycles 753 # Number of cycles decode is squashing system.cpu.decode.DECODE:SquashedInsts 286 # Number of squashed instructions handled by decode system.cpu.decode.DECODE:UnblockCycles 30 # Number of cycles decode is unblocking -system.cpu.fetch.Branches 2256 # Number of branches that fetch encountered +system.cpu.fetch.Branches 2254 # Number of branches that fetch encountered system.cpu.fetch.CacheLines 1582 # Number of cache lines fetched -system.cpu.fetch.Cycles 3905 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.Cycles 3904 # Number of cycles fetch has run and was not squashing or blocked system.cpu.fetch.IcacheSquashes 148 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 13707 # Number of instructions fetch has processed +system.cpu.fetch.Insts 13699 # Number of instructions fetch has processed system.cpu.fetch.SquashCycles 456 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.328336 # Number of branch fetches per cycle +system.cpu.fetch.branchRate 0.328141 # Number of branch fetches per cycle system.cpu.fetch.icacheStallCycles 1582 # Number of cycles fetch is stalled on an Icache miss system.cpu.fetch.predictedBranches 833 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 1.994906 # Number of inst fetches per cycle +system.cpu.fetch.rate 1.994322 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 6871 +system.cpu.fetch.rateDist.samples 6869 system.cpu.fetch.rateDist.min_value 0 - 0 4549 6620.58% - 1 174 253.24% - 2 186 270.70% - 3 157 228.50% - 4 211 307.09% - 5 153 222.68% - 6 171 248.87% - 7 105 152.82% - 8 1165 1695.53% + 0 4548 6621.05% + 1 174 253.31% + 2 186 270.78% + 3 157 228.56% + 4 211 307.18% + 5 153 222.74% + 6 171 248.94% + 7 105 152.86% + 8 1164 1694.57% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist @@ -170,8 +170,8 @@ system.cpu.icache.ReadReq_mshr_hits 6 # nu system.cpu.icache.ReadReq_mshr_miss_latency 641 # number of ReadReq MSHR miss cycles system.cpu.icache.ReadReq_mshr_miss_rate 0.202908 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 321 # number of ReadReq MSHR misses -system.cpu.icache.avg_blocked_cycles_no_mshrs no value # average number of cycles each access was blocked -system.cpu.icache.avg_blocked_cycles_no_targets no value # average number of cycles each access was blocked +system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked system.cpu.icache.avg_refs 3.909657 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked @@ -218,38 +218,38 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 321 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 176.393247 # Cycle average of tags in use +system.cpu.icache.tagsinuse 176.439074 # Cycle average of tags in use system.cpu.icache.total_refs 1255 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.iew.EXEC:branches 1206 # Number of branches executed -system.cpu.iew.EXEC:insts 7969 # Number of executed instructions -system.cpu.iew.EXEC:loads 1610 # Number of load instructions executed system.cpu.iew.EXEC:nop 37 # number of nop insts executed -system.cpu.iew.EXEC:rate 1.159802 # Inst execution rate -system.cpu.iew.EXEC:refs 2599 # number of memory reference insts executed -system.cpu.iew.EXEC:squashedInsts 419 # Number of squashed instructions skipped in execute -system.cpu.iew.EXEC:stores 989 # Number of stores executed +system.cpu.iew.EXEC:rate 1.157374 # Inst execution rate +system.cpu.iew.EXEC:refs 2595 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 985 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed -system.cpu.iew.WB:consumers 5438 # num instructions consuming a value -system.cpu.iew.WB:count 7722 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 0.744575 # average fanout of values written-back +system.cpu.iew.WB:consumers 5409 # num instructions consuming a value +system.cpu.iew.WB:count 7670 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.744130 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 4049 # num instructions producing a value -system.cpu.iew.WB:rate 1.123854 # insts written-back per cycle -system.cpu.iew.WB:sent 7762 # cumulative count of insts sent to commit +system.cpu.iew.WB:producers 4025 # num instructions producing a value +system.cpu.iew.WB:rate 1.116611 # insts written-back per cycle +system.cpu.iew.WB:sent 7743 # cumulative count of insts sent to commit system.cpu.iew.branchMispredicts 393 # Number of branch mispredicts detected at execute system.cpu.iew.iewBlockCycles 4 # Number of cycles IEW is blocking -system.cpu.iew.iewDispLoadInsts 2050 # Number of dispatched load instructions +system.cpu.iew.iewDispLoadInsts 2049 # Number of dispatched load instructions system.cpu.iew.iewDispNonSpecInsts 21 # Number of dispatched non-speculative instructions system.cpu.iew.iewDispSquashedInsts 272 # Number of squashed instructions skipped by dispatch system.cpu.iew.iewDispStoreInsts 1221 # Number of dispatched store instructions -system.cpu.iew.iewDispatchedInsts 9990 # Number of instructions dispatched to IQ +system.cpu.iew.iewDispatchedInsts 9982 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 1610 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 409 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 7950 # Number of executed instructions system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall -system.cpu.iew.iewSquashCycles 754 # Number of cycles IEW is squashing +system.cpu.iew.iewSquashCycles 753 # Number of cycles IEW is squashing system.cpu.iew.iewUnblockCycles 0 # Number of cycles IEW is unblocking system.cpu.iew.lsq.thread.0.blockedLoads 1 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.0.cacheBlocked 5 # Number of times an access to memory failed due to the cache being blocked @@ -257,1586 +257,19 @@ system.cpu.iew.lsq.thread.0.forwLoads 55 # Nu system.cpu.iew.lsq.thread.0.ignoredResponses 5 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address +system.cpu.iew.lsq.thread.0.memOrderViolation 41 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.0.squashedLoads 1071 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedLoads 1070 # Number of loads squashed system.cpu.iew.lsq.thread.0.squashedStores 409 # Number of stores squashed system.cpu.iew.memOrderViolationEvents 41 # Number of memory order violations system.cpu.iew.predictedNotTakenIncorrect 296 # Number of branches that were predicted not taken incorrectly system.cpu.iew.predictedTakenIncorrect 97 # Number of branches that were predicted taken incorrectly -system.cpu.ipc 0.818486 # IPC: Instructions Per Cycle -system.cpu.ipc_total 0.818486 # IPC: Total IPC of All Threads -system.cpu.iq.IQ:residence:(null).start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:(null).samples 0 -system.cpu.iq.IQ:residence:(null).min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:(null).max_value 0 -system.cpu.iq.IQ:residence:(null).end_dist - -system.cpu.iq.IQ:residence:IntAlu.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:IntAlu.samples 0 -system.cpu.iq.IQ:residence:IntAlu.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:IntAlu.max_value 0 -system.cpu.iq.IQ:residence:IntAlu.end_dist - -system.cpu.iq.IQ:residence:IntMult.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:IntMult.samples 0 -system.cpu.iq.IQ:residence:IntMult.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:IntMult.max_value 0 -system.cpu.iq.IQ:residence:IntMult.end_dist - -system.cpu.iq.IQ:residence:IntDiv.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:IntDiv.samples 0 -system.cpu.iq.IQ:residence:IntDiv.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:IntDiv.max_value 0 -system.cpu.iq.IQ:residence:IntDiv.end_dist - -system.cpu.iq.IQ:residence:FloatAdd.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatAdd.samples 0 -system.cpu.iq.IQ:residence:FloatAdd.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatAdd.max_value 0 -system.cpu.iq.IQ:residence:FloatAdd.end_dist - -system.cpu.iq.IQ:residence:FloatCmp.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatCmp.samples 0 -system.cpu.iq.IQ:residence:FloatCmp.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatCmp.max_value 0 -system.cpu.iq.IQ:residence:FloatCmp.end_dist - -system.cpu.iq.IQ:residence:FloatCvt.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatCvt.samples 0 -system.cpu.iq.IQ:residence:FloatCvt.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatCvt.max_value 0 -system.cpu.iq.IQ:residence:FloatCvt.end_dist - -system.cpu.iq.IQ:residence:FloatMult.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatMult.samples 0 -system.cpu.iq.IQ:residence:FloatMult.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatMult.max_value 0 -system.cpu.iq.IQ:residence:FloatMult.end_dist - -system.cpu.iq.IQ:residence:FloatDiv.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatDiv.samples 0 -system.cpu.iq.IQ:residence:FloatDiv.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatDiv.max_value 0 -system.cpu.iq.IQ:residence:FloatDiv.end_dist - -system.cpu.iq.IQ:residence:FloatSqrt.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatSqrt.samples 0 -system.cpu.iq.IQ:residence:FloatSqrt.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatSqrt.max_value 0 -system.cpu.iq.IQ:residence:FloatSqrt.end_dist - -system.cpu.iq.IQ:residence:MemRead.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:MemRead.samples 0 -system.cpu.iq.IQ:residence:MemRead.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:MemRead.max_value 0 -system.cpu.iq.IQ:residence:MemRead.end_dist - -system.cpu.iq.IQ:residence:MemWrite.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:MemWrite.samples 0 -system.cpu.iq.IQ:residence:MemWrite.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:MemWrite.max_value 0 -system.cpu.iq.IQ:residence:MemWrite.end_dist - -system.cpu.iq.IQ:residence:IprAccess.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:IprAccess.samples 0 -system.cpu.iq.IQ:residence:IprAccess.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:IprAccess.max_value 0 -system.cpu.iq.IQ:residence:IprAccess.end_dist - -system.cpu.iq.IQ:residence:InstPrefetch.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:InstPrefetch.samples 0 -system.cpu.iq.IQ:residence:InstPrefetch.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:InstPrefetch.max_value 0 -system.cpu.iq.IQ:residence:InstPrefetch.end_dist - -system.cpu.iq.ISSUE:(null)_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:(null)_delay.samples 0 -system.cpu.iq.ISSUE:(null)_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:(null)_delay.max_value 0 -system.cpu.iq.ISSUE:(null)_delay.end_dist - -system.cpu.iq.ISSUE:IntAlu_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:IntAlu_delay.samples 0 -system.cpu.iq.ISSUE:IntAlu_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:IntAlu_delay.max_value 0 -system.cpu.iq.ISSUE:IntAlu_delay.end_dist - -system.cpu.iq.ISSUE:IntMult_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:IntMult_delay.samples 0 -system.cpu.iq.ISSUE:IntMult_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:IntMult_delay.max_value 0 -system.cpu.iq.ISSUE:IntMult_delay.end_dist - -system.cpu.iq.ISSUE:IntDiv_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:IntDiv_delay.samples 0 -system.cpu.iq.ISSUE:IntDiv_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:IntDiv_delay.max_value 0 -system.cpu.iq.ISSUE:IntDiv_delay.end_dist - -system.cpu.iq.ISSUE:FloatAdd_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatAdd_delay.samples 0 -system.cpu.iq.ISSUE:FloatAdd_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatAdd_delay.max_value 0 -system.cpu.iq.ISSUE:FloatAdd_delay.end_dist - -system.cpu.iq.ISSUE:FloatCmp_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatCmp_delay.samples 0 -system.cpu.iq.ISSUE:FloatCmp_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatCmp_delay.max_value 0 -system.cpu.iq.ISSUE:FloatCmp_delay.end_dist - -system.cpu.iq.ISSUE:FloatCvt_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatCvt_delay.samples 0 -system.cpu.iq.ISSUE:FloatCvt_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatCvt_delay.max_value 0 -system.cpu.iq.ISSUE:FloatCvt_delay.end_dist - -system.cpu.iq.ISSUE:FloatMult_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatMult_delay.samples 0 -system.cpu.iq.ISSUE:FloatMult_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatMult_delay.max_value 0 -system.cpu.iq.ISSUE:FloatMult_delay.end_dist - -system.cpu.iq.ISSUE:FloatDiv_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatDiv_delay.samples 0 -system.cpu.iq.ISSUE:FloatDiv_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatDiv_delay.max_value 0 -system.cpu.iq.ISSUE:FloatDiv_delay.end_dist - -system.cpu.iq.ISSUE:FloatSqrt_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatSqrt_delay.samples 0 -system.cpu.iq.ISSUE:FloatSqrt_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatSqrt_delay.max_value 0 -system.cpu.iq.ISSUE:FloatSqrt_delay.end_dist - -system.cpu.iq.ISSUE:MemRead_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:MemRead_delay.samples 0 -system.cpu.iq.ISSUE:MemRead_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:MemRead_delay.max_value 0 -system.cpu.iq.ISSUE:MemRead_delay.end_dist - -system.cpu.iq.ISSUE:MemWrite_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:MemWrite_delay.samples 0 -system.cpu.iq.ISSUE:MemWrite_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:MemWrite_delay.max_value 0 -system.cpu.iq.ISSUE:MemWrite_delay.end_dist - -system.cpu.iq.ISSUE:IprAccess_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:IprAccess_delay.samples 0 -system.cpu.iq.ISSUE:IprAccess_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:IprAccess_delay.max_value 0 -system.cpu.iq.ISSUE:IprAccess_delay.end_dist - -system.cpu.iq.ISSUE:InstPrefetch_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:InstPrefetch_delay.samples 0 -system.cpu.iq.ISSUE:InstPrefetch_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:InstPrefetch_delay.max_value 0 -system.cpu.iq.ISSUE:InstPrefetch_delay.end_dist - -system.cpu.iq.ISSUE:FU_type_0 8388 # Type of FU issued +system.cpu.ipc 0.818725 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.818725 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 8359 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist (null) 2 0.02% # Type of FU issued - IntAlu 5594 66.69% # Type of FU issued + IntAlu 5573 66.67% # Type of FU issued IntMult 1 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 2 0.02% # Type of FU issued @@ -1845,13 +278,13 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 1757 20.95% # Type of FU issued - MemWrite 1032 12.30% # Type of FU issued + MemRead 1757 21.02% # Type of FU issued + MemWrite 1024 12.25% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.end_dist system.cpu.iq.ISSUE:fu_busy_cnt 115 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.013710 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate 0.013758 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full.start_dist (null) 0 0.00% # attempts to use FU when none available IntAlu 1 0.87% # attempts to use FU when none available @@ -1869,72 +302,72 @@ system.cpu.iq.ISSUE:fu_full.start_dist InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 6871 +system.cpu.iq.ISSUE:issued_per_cycle.samples 6869 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 3753 5462.09% - 1 894 1301.12% - 2 723 1052.25% - 3 614 893.61% - 4 451 656.38% - 5 279 406.05% - 6 104 151.36% - 7 41 59.67% - 8 12 17.46% + 0 3761 5475.32% + 1 891 1297.13% + 2 720 1048.19% + 3 617 898.24% + 4 445 647.84% + 5 278 404.72% + 6 104 151.40% + 7 41 59.69% + 8 12 17.47% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 1.220783 # Inst issue rate -system.cpu.iq.iqInstsAdded 9932 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 8388 # Number of instructions issued +system.cpu.iq.ISSUE:rate 1.216917 # Inst issue rate +system.cpu.iq.iqInstsAdded 9924 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 8359 # Number of instructions issued system.cpu.iq.iqNonSpecInstsAdded 21 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 3990 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsExamined 3985 # Number of squashed instructions iterated over during squash; mainly for profiling system.cpu.iq.iqSquashedInstsIssued 21 # Number of squashed instructions issued system.cpu.iq.iqSquashedNonSpecRemoved 4 # Number of squashed non-spec instructions that were removed -system.cpu.iq.iqSquashedOperandsExamined 2486 # Number of squashed operands that are examined and possibly removed from graph -system.cpu.l2cache.ReadReq_accesses 499 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 2.042254 # average ReadReq miss latency +system.cpu.iq.iqSquashedOperandsExamined 2568 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.l2cache.ReadReq_accesses 494 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 2.071138 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 1 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 2 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 1015 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 0.995992 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 497 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 490 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.981964 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 490 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_miss_latency 1019 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.995951 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 492 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 492 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.995951 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 492 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0.004024 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.004065 # Average number of references to valid blocks. system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 499 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 2.042254 # average overall miss latency +system.cpu.l2cache.demand_accesses 494 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 2.071138 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 1 # average overall mshr miss latency system.cpu.l2cache.demand_hits 2 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 1015 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 0.995992 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 497 # number of demand (read+write) misses +system.cpu.l2cache.demand_miss_latency 1019 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.995951 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 492 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 490 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 0.981964 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 490 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_miss_latency 492 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.995951 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 492 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 499 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 2.042254 # average overall miss latency +system.cpu.l2cache.overall_accesses 494 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 2.071138 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 1 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 2 # number of overall hits -system.cpu.l2cache.overall_miss_latency 1015 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 0.995992 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 497 # number of overall misses +system.cpu.l2cache.overall_miss_latency 1019 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.995951 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 492 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 490 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 0.981964 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 490 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_miss_latency 492 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.995951 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 492 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -1947,22 +380,22 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 497 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 492 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 295.773395 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 290.948901 # Cycle average of tags in use system.cpu.l2cache.total_refs 2 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 6871 # number of cpu cycles simulated +system.cpu.numCycles 6869 # number of cpu cycles simulated system.cpu.rename.RENAME:BlockCycles 4 # Number of cycles rename is blocking system.cpu.rename.RENAME:CommittedMaps 4051 # Number of HB maps that are committed -system.cpu.rename.RENAME:IdleCycles 3758 # Number of cycles rename is idle +system.cpu.rename.RENAME:IdleCycles 3757 # Number of cycles rename is idle system.cpu.rename.RENAME:LSQFullEvents 62 # Number of times rename has blocked due to LSQ full system.cpu.rename.RENAME:RenameLookups 14786 # Number of register rename lookups that rename has made system.cpu.rename.RENAME:RenamedInsts 11555 # Number of instructions processed by rename system.cpu.rename.RENAME:RenamedOperands 8634 # Number of destination operands rename has renamed system.cpu.rename.RENAME:RunCycles 1975 # Number of cycles rename is running -system.cpu.rename.RENAME:SquashCycles 754 # Number of cycles rename is squashing +system.cpu.rename.RENAME:SquashCycles 753 # Number of cycles rename is squashing system.cpu.rename.RENAME:UnblockCycles 111 # Number of cycles rename is unblocking system.cpu.rename.RENAME:UndoneMaps 4583 # Number of HB maps that are undone due to squashing system.cpu.rename.RENAME:serializeStallCycles 269 # count of cycles rename stalled for serializing inst diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr index 8893caac8..558105896 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr @@ -1,3 +1,12 @@ warn: Entering event queue @ 0. Starting simulation... warn: cycle 0: fault (page_table_fault) detected @ PC 0x000000 warn: Increasing stack 0x11ff92000:0x11ff9b000 to 0x11ff90000:0x11ff9b000 because of access to 0x11ff91ff0 +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout index fbb329a2f..f2a1151c4 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout @@ -6,8 +6,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:28:48 -M5 started Tue Sep 5 15:42:12 2006 +M5 compiled Oct 8 2006 20:54:51 +M5 started Sun Oct 8 20:55:10 2006 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing -Exiting @ tick 6870 because target called exit() +Exiting @ tick 6868 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini index f84372165..b8aba735a 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini +++ b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini @@ -56,6 +56,7 @@ physmem=system.physmem type=AtomicSimpleCPU children=workload clock=1 +cpu_id=0 defer_registration=false function_trace=false function_trace_start=0 @@ -64,6 +65,7 @@ max_insts_any_thread=0 max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem +progress_interval=0 simulate_stalls=false system=system width=1 @@ -74,15 +76,23 @@ icache_port=system.membus.port[1] [system.cpu.workload] type=LiveProcess cmd=hello +egid=100 env= +euid=100 executable=tests/test-progs/hello/bin/alpha/linux/hello +gid=100 input=cin output=cout +pid=100 +ppid=99 system=system +uid=100 [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port [system.physmem] @@ -94,6 +104,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out index a3621a08a..71a43d484 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out +++ b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out @@ -19,6 +19,8 @@ mem_mode=atomic [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.workload] type=LiveProcess @@ -28,6 +30,12 @@ input=cin output=cout env= system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 [system.cpu] type=AtomicSimpleCPU @@ -35,8 +43,10 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=0 workload=system.cpu.workload clock=1 defer_registration=false @@ -48,6 +58,7 @@ simulate_stalls=false [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -91,3 +102,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt index e3cd05fb0..875e55644 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt @@ -1,9 +1,9 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 74000 # Simulator instruction rate (inst/s) -host_mem_usage 148088 # Number of bytes of host memory used -host_seconds 0.08 # Real time elapsed on the host -host_tick_rate 73591 # Simulator tick rate (ticks/s) +host_inst_rate 172802 # Simulator instruction rate (inst/s) +host_mem_usage 148116 # Number of bytes of host memory used +host_seconds 0.03 # Real time elapsed on the host +host_tick_rate 170614 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5642 # Number of instructions simulated sim_seconds 0.000000 # Number of seconds simulated diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout index 17eea9aed..59f571aaf 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout +++ b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout @@ -6,8 +6,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:28:48 -M5 started Tue Sep 5 15:42:14 2006 +M5 compiled Oct 8 2006 14:00:39 +M5 started Sun Oct 8 14:00:50 2006 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/linux/simple-atomic tests/run.py quick/00.hello/alpha/linux/simple-atomic Exiting @ tick 5641 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini index 80d2a27e1..f8e1f1bb0 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini @@ -56,6 +56,7 @@ physmem=system.physmem type=TimingSimpleCPU children=dcache icache l2cache toL2Bus workload clock=1 +cpu_id=0 defer_registration=false function_trace=false function_trace_start=0 @@ -64,6 +65,7 @@ max_insts_any_thread=0 max_loads_all_threads=0 max_loads_any_thread=0 mem=system.cpu.dcache +progress_interval=0 system=system workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side @@ -192,20 +194,30 @@ mem_side=system.membus.port[1] [system.cpu.toL2Bus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side [system.cpu.workload] type=LiveProcess cmd=hello +egid=100 env= +euid=100 executable=tests/test-progs/hello/bin/alpha/linux/hello +gid=100 input=cin output=cout +pid=100 +ppid=99 system=system +uid=100 [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.physmem.port system.cpu.l2cache.mem_side [system.physmem] @@ -217,6 +229,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out index 09d8f0c22..2ab7c0150 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out +++ b/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out @@ -19,6 +19,8 @@ mem_mode=atomic [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.dcache] type=BaseCache @@ -67,6 +69,12 @@ input=cin output=cout env= system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 [system.cpu] type=TimingSimpleCPU @@ -74,8 +82,10 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.cpu.dcache system=system +cpu_id=0 workload=system.cpu.workload clock=1 defer_registration=false @@ -87,6 +97,8 @@ function_trace_start=0 [system.cpu.toL2Bus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.icache] type=BaseCache @@ -169,6 +181,7 @@ hit_latency=1 [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -212,3 +225,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt index fe2cd43a5..6914938e5 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt @@ -1,67 +1,67 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 113478 # Simulator instruction rate (inst/s) -host_mem_usage 159608 # Number of bytes of host memory used -host_seconds 0.05 # Real time elapsed on the host -host_tick_rate 165749 # Simulator tick rate (ticks/s) +host_inst_rate 152920 # Simulator instruction rate (inst/s) +host_mem_usage 166272 # Number of bytes of host memory used +host_seconds 0.04 # Real time elapsed on the host +host_tick_rate 221766 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5642 # Number of instructions simulated sim_seconds 0.000000 # Number of seconds simulated -sim_ticks 8312 # Number of ticks simulated -system.cpu.dcache.ReadReq_accesses 981 # number of ReadReq accesses(hits+misses) +sim_ticks 8316 # Number of ticks simulated +system.cpu.dcache.ReadReq_accesses 979 # number of ReadReq accesses(hits+misses) system.cpu.dcache.ReadReq_avg_miss_latency 3 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 891 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 270 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.091743 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 90 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 180 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.091743 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 90 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 821 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 2.737500 # average WriteReq miss latency +system.cpu.dcache.ReadReq_hits 887 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 276 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.093973 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 92 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 184 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.093973 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 92 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 812 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 3 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 2 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 741 # number of WriteReq hits +system.cpu.dcache.WriteReq_hits 739 # number of WriteReq hits system.cpu.dcache.WriteReq_miss_latency 219 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.097442 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 80 # number of WriteReq misses +system.cpu.dcache.WriteReq_miss_rate 0.089901 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 73 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_miss_latency 146 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.088916 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_miss_rate 0.089901 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 73 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 9.600000 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 9.854545 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 1802 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 2.876471 # average overall miss latency +system.cpu.dcache.demand_accesses 1791 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 3 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency 2 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1632 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 489 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.094340 # miss rate for demand accesses -system.cpu.dcache.demand_misses 170 # number of demand (read+write) misses +system.cpu.dcache.demand_hits 1626 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 495 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.092127 # miss rate for demand accesses +system.cpu.dcache.demand_misses 165 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 326 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.090455 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 163 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 330 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.092127 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 165 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 1802 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 2.876471 # average overall miss latency +system.cpu.dcache.overall_accesses 1791 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 3 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency 2 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1632 # number of overall hits -system.cpu.dcache.overall_miss_latency 489 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.094340 # miss rate for overall accesses -system.cpu.dcache.overall_misses 170 # number of overall misses +system.cpu.dcache.overall_hits 1626 # number of overall hits +system.cpu.dcache.overall_miss_latency 495 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.092127 # miss rate for overall accesses +system.cpu.dcache.overall_misses 165 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 326 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.090455 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 163 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_miss_latency 330 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.092127 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 165 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -74,10 +74,10 @@ system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.dcache.replacements 0 # number of replacements -system.cpu.dcache.sampled_refs 170 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 165 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 112.055094 # Cycle average of tags in use -system.cpu.dcache.total_refs 1632 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 107.125526 # Cycle average of tags in use +system.cpu.dcache.total_refs 1626 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.icache.ReadReq_accesses 5643 # number of ReadReq accesses(hits+misses) @@ -138,55 +138,55 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 277 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 133.267292 # Cycle average of tags in use +system.cpu.icache.tagsinuse 133.213539 # Cycle average of tags in use system.cpu.icache.total_refs 5366 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idle_fraction 0 # Percentage of idle cycles -system.cpu.l2cache.ReadReq_accesses 447 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 1.968610 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_accesses 442 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 2 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 1 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 1 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 878 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 0.997763 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 446 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 439 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.982103 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 439 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_miss_latency 882 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.997738 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 441 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 441 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.997738 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 441 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0.002242 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.002268 # Average number of references to valid blocks. system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 447 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 1.968610 # average overall miss latency +system.cpu.l2cache.demand_accesses 442 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 2 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 1 # average overall mshr miss latency system.cpu.l2cache.demand_hits 1 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 878 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 0.997763 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 446 # number of demand (read+write) misses +system.cpu.l2cache.demand_miss_latency 882 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.997738 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 441 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 439 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 0.982103 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 439 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_miss_latency 441 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.997738 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 441 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 447 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 1.968610 # average overall miss latency +system.cpu.l2cache.overall_accesses 442 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 2 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 1 # average overall mshr miss latency -system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no value # average overall mshr uncacheable latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 1 # number of overall hits -system.cpu.l2cache.overall_miss_latency 878 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 0.997763 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 446 # number of overall misses +system.cpu.l2cache.overall_miss_latency 882 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.997738 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 441 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 439 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 0.982103 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 439 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_miss_latency 441 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.997738 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 441 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -199,14 +199,14 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 446 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 441 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 245.259112 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 240.276061 # Cycle average of tags in use system.cpu.l2cache.total_refs 1 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 0 # number of cpu cycles simulated +system.cpu.numCycles 8316 # number of cpu cycles simulated system.cpu.num_insts 5642 # Number of instructions executed system.cpu.num_refs 1792 # Number of memory references system.cpu.workload.PROG:num_syscalls 17 # Number of system calls diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout index 7104aa0ce..423c0b115 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout @@ -6,8 +6,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:28:48 -M5 started Tue Sep 5 15:42:15 2006 +M5 compiled Oct 10 2006 01:56:36 +M5 started Tue Oct 10 01:57:04 2006 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/linux/simple-timing tests/run.py quick/00.hello/alpha/linux/simple-timing -Exiting @ tick 8312 because target called exit() +Exiting @ tick 8316 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini index 790ae6ab3..e15dd47b7 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini @@ -110,6 +110,7 @@ numROBEntries=192 numRobs=1 numThreads=1 predType=tournament +progress_interval=0 renameToDecodeDelay=1 renameToFetchDelay=1 renameToIEWDelay=2 @@ -384,20 +385,30 @@ mem_side=system.membus.port[1] [system.cpu.toL2Bus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side [system.cpu.workload] type=LiveProcess cmd=hello +egid=100 env= +euid=100 executable=tests/test-progs/hello/bin/alpha/tru64/hello +gid=100 input=cin output=cout +pid=100 +ppid=99 system=system +uid=100 [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.physmem.port system.cpu.l2cache.mem_side [system.physmem] @@ -409,6 +420,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out index 474ea3523..a57dbacf3 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out @@ -19,6 +19,8 @@ mem_mode=atomic [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.workload] type=LiveProcess @@ -28,6 +30,12 @@ input=cin output=cout env= system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 [system.cpu.dcache] type=BaseCache @@ -208,6 +216,7 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 cachePorts=200 decodeToFetchDelay=1 renameToFetchDelay=1 @@ -354,10 +363,13 @@ hit_latency=1 [system.cpu.toL2Bus] type=Bus bus_id=0 +clock=1000 +width=64 [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -401,3 +413,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt index db582e731..41348bbfb 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt @@ -8,10 +8,10 @@ global.BPredUnit.condIncorrect 222 # Nu global.BPredUnit.condPredicted 441 # Number of conditional branches predicted global.BPredUnit.lookups 888 # Number of BP lookups global.BPredUnit.usedRAS 160 # Number of times the RAS was used to get a target. -host_inst_rate 26468 # Simulator instruction rate (inst/s) -host_mem_usage 159864 # Number of bytes of host memory used +host_inst_rate 26386 # Simulator instruction rate (inst/s) +host_mem_usage 159884 # Number of bytes of host memory used host_seconds 0.09 # Real time elapsed on the host -host_tick_rate 31894 # Simulator tick rate (ticks/s) +host_tick_rate 31792 # Simulator tick rate (ticks/s) memdepunit.memDep.conflictingLoads 9 # Number of conflicting loads. memdepunit.memDep.conflictingStores 7 # Number of conflicting stores. memdepunit.memDep.insertedLoads 675 # Number of loads inserted to the mem dependence unit. @@ -51,16 +51,16 @@ system.cpu.committedInsts 2387 # Nu system.cpu.committedInsts_total 2387 # Number of Instructions Simulated system.cpu.cpi 1.209049 # CPI: Cycles Per Instruction system.cpu.cpi_total 1.209049 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 535 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_accesses 534 # number of ReadReq accesses(hits+misses) system.cpu.dcache.ReadReq_avg_miss_latency 3 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 470 # number of ReadReq hits +system.cpu.dcache.ReadReq_hits 469 # number of ReadReq hits system.cpu.dcache.ReadReq_miss_latency 195 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.121495 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_miss_rate 0.121723 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 65 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 4 # number of ReadReq MSHR hits system.cpu.dcache.ReadReq_mshr_miss_latency 122 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.114019 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_miss_rate 0.114232 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 61 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 294 # number of WriteReq accesses(hits+misses) system.cpu.dcache.WriteReq_avg_miss_latency 3.017241 # average WriteReq miss latency @@ -75,37 +75,37 @@ system.cpu.dcache.WriteReq_mshr_miss_rate 0.081633 # m system.cpu.dcache.WriteReq_mshr_misses 24 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets 1.500000 # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 8.305882 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 8.294118 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 2 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 3 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 829 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses 828 # number of demand (read+write) accesses system.cpu.dcache.demand_avg_miss_latency 3.008130 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency 2.058824 # average overall mshr miss latency -system.cpu.dcache.demand_hits 706 # number of demand (read+write) hits +system.cpu.dcache.demand_hits 705 # number of demand (read+write) hits system.cpu.dcache.demand_miss_latency 370 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.148372 # miss rate for demand accesses +system.cpu.dcache.demand_miss_rate 0.148551 # miss rate for demand accesses system.cpu.dcache.demand_misses 123 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 38 # number of demand (read+write) MSHR hits system.cpu.dcache.demand_mshr_miss_latency 175 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.102533 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_rate 0.102657 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 85 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 829 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses 828 # number of overall (read+write) accesses system.cpu.dcache.overall_avg_miss_latency 3.008130 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency 2.058824 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 706 # number of overall hits +system.cpu.dcache.overall_hits 705 # number of overall hits system.cpu.dcache.overall_miss_latency 370 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.148372 # miss rate for overall accesses +system.cpu.dcache.overall_miss_rate 0.148551 # miss rate for overall accesses system.cpu.dcache.overall_misses 123 # number of overall misses system.cpu.dcache.overall_mshr_hits 38 # number of overall MSHR hits system.cpu.dcache.overall_mshr_miss_latency 175 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.102533 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_rate 0.102657 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 85 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -122,7 +122,7 @@ system.cpu.dcache.replacements 0 # nu system.cpu.dcache.sampled_refs 85 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.tagsinuse 54.161413 # Cycle average of tags in use -system.cpu.dcache.total_refs 706 # Total number of references to valid blocks. +system.cpu.dcache.total_refs 705 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.decode.DECODE:BlockedCycles 82 # Number of cycles decode is blocked @@ -222,30 +222,30 @@ system.cpu.icache.tagsinuse 115.538968 # Cy system.cpu.icache.total_refs 550 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.iew.EXEC:branches 533 # Number of branches executed -system.cpu.iew.EXEC:insts 3123 # Number of executed instructions -system.cpu.iew.EXEC:loads 578 # Number of load instructions executed +system.cpu.iew.EXEC:branches 532 # Number of branches executed system.cpu.iew.EXEC:nop 247 # number of nop insts executed -system.cpu.iew.EXEC:rate 1.081746 # Inst execution rate -system.cpu.iew.EXEC:refs 914 # number of memory reference insts executed -system.cpu.iew.EXEC:squashedInsts 148 # Number of squashed instructions skipped in execute +system.cpu.iew.EXEC:rate 1.078628 # Inst execution rate +system.cpu.iew.EXEC:refs 910 # number of memory reference insts executed system.cpu.iew.EXEC:stores 336 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed -system.cpu.iew.WB:consumers 1801 # num instructions consuming a value -system.cpu.iew.WB:count 3070 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 0.791227 # average fanout of values written-back +system.cpu.iew.WB:consumers 1788 # num instructions consuming a value +system.cpu.iew.WB:count 3053 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.790828 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 1425 # num instructions producing a value -system.cpu.iew.WB:rate 1.063388 # insts written-back per cycle -system.cpu.iew.WB:sent 3076 # cumulative count of insts sent to commit -system.cpu.iew.branchMispredicts 159 # Number of branch mispredicts detected at execute +system.cpu.iew.WB:producers 1414 # num instructions producing a value +system.cpu.iew.WB:rate 1.057499 # insts written-back per cycle +system.cpu.iew.WB:sent 3067 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 158 # Number of branch mispredicts detected at execute system.cpu.iew.iewBlockCycles 0 # Number of cycles IEW is blocking system.cpu.iew.iewDispLoadInsts 675 # Number of dispatched load instructions system.cpu.iew.iewDispNonSpecInsts 7 # Number of dispatched non-speculative instructions system.cpu.iew.iewDispSquashedInsts 127 # Number of squashed instructions skipped by dispatch system.cpu.iew.iewDispStoreInsts 369 # Number of dispatched store instructions system.cpu.iew.iewDispatchedInsts 3835 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 574 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 143 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 3114 # Number of executed instructions system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall @@ -257,1586 +257,19 @@ system.cpu.iew.lsq.thread.0.forwLoads 30 # Nu system.cpu.iew.lsq.thread.0.ignoredResponses 3 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address +system.cpu.iew.lsq.thread.0.memOrderViolation 11 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 0 # Number of loads that were rescheduled system.cpu.iew.lsq.thread.0.squashedLoads 260 # Number of loads squashed system.cpu.iew.lsq.thread.0.squashedStores 75 # Number of stores squashed system.cpu.iew.memOrderViolationEvents 11 # Number of memory order violations -system.cpu.iew.predictedNotTakenIncorrect 106 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedNotTakenIncorrect 105 # Number of branches that were predicted not taken incorrectly system.cpu.iew.predictedTakenIncorrect 53 # Number of branches that were predicted taken incorrectly system.cpu.ipc 0.827096 # IPC: Instructions Per Cycle system.cpu.ipc_total 0.827096 # IPC: Total IPC of All Threads -system.cpu.iq.IQ:residence:(null).start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:(null).samples 0 -system.cpu.iq.IQ:residence:(null).min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:(null).max_value 0 -system.cpu.iq.IQ:residence:(null).end_dist - -system.cpu.iq.IQ:residence:IntAlu.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:IntAlu.samples 0 -system.cpu.iq.IQ:residence:IntAlu.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:IntAlu.max_value 0 -system.cpu.iq.IQ:residence:IntAlu.end_dist - -system.cpu.iq.IQ:residence:IntMult.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:IntMult.samples 0 -system.cpu.iq.IQ:residence:IntMult.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:IntMult.max_value 0 -system.cpu.iq.IQ:residence:IntMult.end_dist - -system.cpu.iq.IQ:residence:IntDiv.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:IntDiv.samples 0 -system.cpu.iq.IQ:residence:IntDiv.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:IntDiv.max_value 0 -system.cpu.iq.IQ:residence:IntDiv.end_dist - -system.cpu.iq.IQ:residence:FloatAdd.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatAdd.samples 0 -system.cpu.iq.IQ:residence:FloatAdd.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatAdd.max_value 0 -system.cpu.iq.IQ:residence:FloatAdd.end_dist - -system.cpu.iq.IQ:residence:FloatCmp.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatCmp.samples 0 -system.cpu.iq.IQ:residence:FloatCmp.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatCmp.max_value 0 -system.cpu.iq.IQ:residence:FloatCmp.end_dist - -system.cpu.iq.IQ:residence:FloatCvt.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatCvt.samples 0 -system.cpu.iq.IQ:residence:FloatCvt.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatCvt.max_value 0 -system.cpu.iq.IQ:residence:FloatCvt.end_dist - -system.cpu.iq.IQ:residence:FloatMult.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatMult.samples 0 -system.cpu.iq.IQ:residence:FloatMult.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatMult.max_value 0 -system.cpu.iq.IQ:residence:FloatMult.end_dist - -system.cpu.iq.IQ:residence:FloatDiv.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatDiv.samples 0 -system.cpu.iq.IQ:residence:FloatDiv.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatDiv.max_value 0 -system.cpu.iq.IQ:residence:FloatDiv.end_dist - -system.cpu.iq.IQ:residence:FloatSqrt.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:FloatSqrt.samples 0 -system.cpu.iq.IQ:residence:FloatSqrt.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:FloatSqrt.max_value 0 -system.cpu.iq.IQ:residence:FloatSqrt.end_dist - -system.cpu.iq.IQ:residence:MemRead.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:MemRead.samples 0 -system.cpu.iq.IQ:residence:MemRead.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:MemRead.max_value 0 -system.cpu.iq.IQ:residence:MemRead.end_dist - -system.cpu.iq.IQ:residence:MemWrite.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:MemWrite.samples 0 -system.cpu.iq.IQ:residence:MemWrite.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:MemWrite.max_value 0 -system.cpu.iq.IQ:residence:MemWrite.end_dist - -system.cpu.iq.IQ:residence:IprAccess.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:IprAccess.samples 0 -system.cpu.iq.IQ:residence:IprAccess.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:IprAccess.max_value 0 -system.cpu.iq.IQ:residence:IprAccess.end_dist - -system.cpu.iq.IQ:residence:InstPrefetch.start_dist # cycles from dispatch to issue -system.cpu.iq.IQ:residence:InstPrefetch.samples 0 -system.cpu.iq.IQ:residence:InstPrefetch.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.IQ:residence:InstPrefetch.max_value 0 -system.cpu.iq.IQ:residence:InstPrefetch.end_dist - -system.cpu.iq.ISSUE:(null)_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:(null)_delay.samples 0 -system.cpu.iq.ISSUE:(null)_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:(null)_delay.max_value 0 -system.cpu.iq.ISSUE:(null)_delay.end_dist - -system.cpu.iq.ISSUE:IntAlu_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:IntAlu_delay.samples 0 -system.cpu.iq.ISSUE:IntAlu_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:IntAlu_delay.max_value 0 -system.cpu.iq.ISSUE:IntAlu_delay.end_dist - -system.cpu.iq.ISSUE:IntMult_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:IntMult_delay.samples 0 -system.cpu.iq.ISSUE:IntMult_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:IntMult_delay.max_value 0 -system.cpu.iq.ISSUE:IntMult_delay.end_dist - -system.cpu.iq.ISSUE:IntDiv_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:IntDiv_delay.samples 0 -system.cpu.iq.ISSUE:IntDiv_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:IntDiv_delay.max_value 0 -system.cpu.iq.ISSUE:IntDiv_delay.end_dist - -system.cpu.iq.ISSUE:FloatAdd_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatAdd_delay.samples 0 -system.cpu.iq.ISSUE:FloatAdd_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatAdd_delay.max_value 0 -system.cpu.iq.ISSUE:FloatAdd_delay.end_dist - -system.cpu.iq.ISSUE:FloatCmp_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatCmp_delay.samples 0 -system.cpu.iq.ISSUE:FloatCmp_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatCmp_delay.max_value 0 -system.cpu.iq.ISSUE:FloatCmp_delay.end_dist - -system.cpu.iq.ISSUE:FloatCvt_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatCvt_delay.samples 0 -system.cpu.iq.ISSUE:FloatCvt_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatCvt_delay.max_value 0 -system.cpu.iq.ISSUE:FloatCvt_delay.end_dist - -system.cpu.iq.ISSUE:FloatMult_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatMult_delay.samples 0 -system.cpu.iq.ISSUE:FloatMult_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatMult_delay.max_value 0 -system.cpu.iq.ISSUE:FloatMult_delay.end_dist - -system.cpu.iq.ISSUE:FloatDiv_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatDiv_delay.samples 0 -system.cpu.iq.ISSUE:FloatDiv_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatDiv_delay.max_value 0 -system.cpu.iq.ISSUE:FloatDiv_delay.end_dist - -system.cpu.iq.ISSUE:FloatSqrt_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:FloatSqrt_delay.samples 0 -system.cpu.iq.ISSUE:FloatSqrt_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:FloatSqrt_delay.max_value 0 -system.cpu.iq.ISSUE:FloatSqrt_delay.end_dist - -system.cpu.iq.ISSUE:MemRead_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:MemRead_delay.samples 0 -system.cpu.iq.ISSUE:MemRead_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:MemRead_delay.max_value 0 -system.cpu.iq.ISSUE:MemRead_delay.end_dist - -system.cpu.iq.ISSUE:MemWrite_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:MemWrite_delay.samples 0 -system.cpu.iq.ISSUE:MemWrite_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:MemWrite_delay.max_value 0 -system.cpu.iq.ISSUE:MemWrite_delay.end_dist - -system.cpu.iq.ISSUE:IprAccess_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:IprAccess_delay.samples 0 -system.cpu.iq.ISSUE:IprAccess_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:IprAccess_delay.max_value 0 -system.cpu.iq.ISSUE:IprAccess_delay.end_dist - -system.cpu.iq.ISSUE:InstPrefetch_delay.start_dist # cycles from operands ready to issue -system.cpu.iq.ISSUE:InstPrefetch_delay.samples 0 -system.cpu.iq.ISSUE:InstPrefetch_delay.min_value 0 - 0 0 - 2 0 - 4 0 - 6 0 - 8 0 - 10 0 - 12 0 - 14 0 - 16 0 - 18 0 - 20 0 - 22 0 - 24 0 - 26 0 - 28 0 - 30 0 - 32 0 - 34 0 - 36 0 - 38 0 - 40 0 - 42 0 - 44 0 - 46 0 - 48 0 - 50 0 - 52 0 - 54 0 - 56 0 - 58 0 - 60 0 - 62 0 - 64 0 - 66 0 - 68 0 - 70 0 - 72 0 - 74 0 - 76 0 - 78 0 - 80 0 - 82 0 - 84 0 - 86 0 - 88 0 - 90 0 - 92 0 - 94 0 - 96 0 - 98 0 -system.cpu.iq.ISSUE:InstPrefetch_delay.max_value 0 -system.cpu.iq.ISSUE:InstPrefetch_delay.end_dist - -system.cpu.iq.ISSUE:FU_type_0 3271 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0 3257 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist (null) 0 0.00% # Type of FU issued - IntAlu 2317 70.83% # Type of FU issued + IntAlu 2308 70.86% # Type of FU issued IntMult 1 0.03% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 0 0.00% # Type of FU issued @@ -1845,13 +278,13 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 609 18.62% # Type of FU issued - MemWrite 344 10.52% # Type of FU issued + MemRead 605 18.58% # Type of FU issued + MemWrite 343 10.53% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.end_dist system.cpu.iq.ISSUE:fu_busy_cnt 40 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.012229 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate 0.012281 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full.start_dist (null) 0 0.00% # attempts to use FU when none available IntAlu 5 12.50% # attempts to use FU when none available @@ -1871,11 +304,11 @@ system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle system.cpu.iq.ISSUE:issued_per_cycle.samples 2887 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 1603 5552.48% - 1 434 1503.29% - 2 301 1042.60% - 3 220 762.04% - 4 167 578.46% + 0 1607 5566.33% + 1 435 1506.75% + 2 298 1032.21% + 3 221 765.50% + 4 164 568.06% 5 94 325.60% 6 46 159.33% 7 15 51.96% @@ -1883,14 +316,14 @@ system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 1.133010 # Inst issue rate +system.cpu.iq.ISSUE:rate 1.128161 # Inst issue rate system.cpu.iq.iqInstsAdded 3581 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 3271 # Number of instructions issued +system.cpu.iq.iqInstsIssued 3257 # Number of instructions issued system.cpu.iq.iqNonSpecInstsAdded 7 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 1067 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsExamined 1088 # Number of squashed instructions iterated over during squash; mainly for profiling system.cpu.iq.iqSquashedInstsIssued 1 # Number of squashed instructions issued system.cpu.iq.iqSquashedNonSpecRemoved 3 # Number of squashed non-spec instructions that were removed -system.cpu.iq.iqSquashedOperandsExamined 477 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.iq.iqSquashedOperandsExamined 503 # Number of squashed operands that are examined and possibly removed from graph system.cpu.l2cache.ReadReq_accesses 274 # number of ReadReq accesses(hits+misses) system.cpu.l2cache.ReadReq_avg_miss_latency 2.018248 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 1 # average ReadReq mshr miss latency diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout index 708b9587a..c51631489 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout @@ -6,8 +6,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:28:48 -M5 started Tue Sep 5 15:42:16 2006 +M5 compiled Oct 8 2006 14:00:39 +M5 started Sun Oct 8 14:00:52 2006 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing Exiting @ tick 2886 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini index 1ec052afb..60783267b 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini @@ -56,6 +56,7 @@ physmem=system.physmem type=AtomicSimpleCPU children=workload clock=1 +cpu_id=0 defer_registration=false function_trace=false function_trace_start=0 @@ -64,6 +65,7 @@ max_insts_any_thread=0 max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem +progress_interval=0 simulate_stalls=false system=system width=1 @@ -74,15 +76,23 @@ icache_port=system.membus.port[1] [system.cpu.workload] type=LiveProcess cmd=hello +egid=100 env= +euid=100 executable=tests/test-progs/hello/bin/alpha/tru64/hello +gid=100 input=cin output=cout +pid=100 +ppid=99 system=system +uid=100 [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port [system.physmem] @@ -94,6 +104,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out index ae24ae2a8..c8733b8f7 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out @@ -19,6 +19,8 @@ mem_mode=atomic [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.workload] type=LiveProcess @@ -28,6 +30,12 @@ input=cin output=cout env= system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 [system.cpu] type=AtomicSimpleCPU @@ -35,8 +43,10 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=0 workload=system.cpu.workload clock=1 defer_registration=false @@ -48,6 +58,7 @@ simulate_stalls=false [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -91,3 +102,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt index b4747f1f4..e3f845135 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt @@ -1,9 +1,9 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 46556 # Simulator instruction rate (inst/s) -host_mem_usage 147672 # Number of bytes of host memory used -host_seconds 0.06 # Real time elapsed on the host -host_tick_rate 46204 # Simulator tick rate (ticks/s) +host_inst_rate 60702 # Simulator instruction rate (inst/s) +host_mem_usage 147692 # Number of bytes of host memory used +host_seconds 0.04 # Real time elapsed on the host +host_tick_rate 60102 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 2578 # Number of instructions simulated sim_seconds 0.000000 # Number of seconds simulated diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout index 438e330f5..2ee4e0a08 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout @@ -6,8 +6,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:28:48 -M5 started Tue Sep 5 15:42:18 2006 +M5 compiled Oct 8 2006 14:00:39 +M5 started Sun Oct 8 14:00:54 2006 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/tru64/simple-atomic tests/run.py quick/00.hello/alpha/tru64/simple-atomic Exiting @ tick 2577 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini index e833d841e..f32654f76 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini @@ -56,6 +56,7 @@ physmem=system.physmem type=TimingSimpleCPU children=dcache icache l2cache toL2Bus workload clock=1 +cpu_id=0 defer_registration=false function_trace=false function_trace_start=0 @@ -64,6 +65,7 @@ max_insts_any_thread=0 max_loads_all_threads=0 max_loads_any_thread=0 mem=system.cpu.dcache +progress_interval=0 system=system workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side @@ -192,20 +194,30 @@ mem_side=system.membus.port[1] [system.cpu.toL2Bus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side [system.cpu.workload] type=LiveProcess cmd=hello +egid=100 env= +euid=100 executable=tests/test-progs/hello/bin/alpha/tru64/hello +gid=100 input=cin output=cout +pid=100 +ppid=99 system=system +uid=100 [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.physmem.port system.cpu.l2cache.mem_side [system.physmem] @@ -217,6 +229,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out index 1914b47e7..c45e587d9 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out @@ -19,6 +19,8 @@ mem_mode=atomic [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.dcache] type=BaseCache @@ -67,6 +69,12 @@ input=cin output=cout env= system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 [system.cpu] type=TimingSimpleCPU @@ -74,8 +82,10 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.cpu.dcache system=system +cpu_id=0 workload=system.cpu.workload clock=1 defer_registration=false @@ -87,6 +97,8 @@ function_trace_start=0 [system.cpu.toL2Bus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.icache] type=BaseCache @@ -169,6 +181,7 @@ hit_latency=1 [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -212,3 +225,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt index 47bcc1b3c..27b01a108 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt @@ -1,22 +1,22 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 73626 # Simulator instruction rate (inst/s) -host_mem_usage 159128 # Number of bytes of host memory used -host_seconds 0.04 # Real time elapsed on the host -host_tick_rate 106590 # Simulator tick rate (ticks/s) +host_inst_rate 120829 # Simulator instruction rate (inst/s) +host_mem_usage 165792 # Number of bytes of host memory used +host_seconds 0.02 # Real time elapsed on the host +host_tick_rate 168699 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 2578 # Number of instructions simulated sim_seconds 0.000000 # Number of seconds simulated sim_ticks 3777 # Number of ticks simulated -system.cpu.dcache.ReadReq_accesses 416 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_accesses 415 # number of ReadReq accesses(hits+misses) system.cpu.dcache.ReadReq_avg_miss_latency 3 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 361 # number of ReadReq hits +system.cpu.dcache.ReadReq_hits 360 # number of ReadReq hits system.cpu.dcache.ReadReq_miss_latency 165 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.132212 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_miss_rate 0.132530 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 55 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_miss_latency 110 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.132212 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_miss_rate 0.132530 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 55 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 294 # number of WriteReq accesses(hits+misses) system.cpu.dcache.WriteReq_avg_miss_latency 3 # average WriteReq miss latency @@ -30,37 +30,37 @@ system.cpu.dcache.WriteReq_mshr_miss_rate 0.091837 # m system.cpu.dcache.WriteReq_mshr_misses 27 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 7.658537 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 7.646341 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 710 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses 709 # number of demand (read+write) accesses system.cpu.dcache.demand_avg_miss_latency 3 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency 2 # average overall mshr miss latency -system.cpu.dcache.demand_hits 628 # number of demand (read+write) hits +system.cpu.dcache.demand_hits 627 # number of demand (read+write) hits system.cpu.dcache.demand_miss_latency 246 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.115493 # miss rate for demand accesses +system.cpu.dcache.demand_miss_rate 0.115656 # miss rate for demand accesses system.cpu.dcache.demand_misses 82 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.cpu.dcache.demand_mshr_miss_latency 164 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.115493 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_rate 0.115656 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 82 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 710 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses 709 # number of overall (read+write) accesses system.cpu.dcache.overall_avg_miss_latency 3 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency 2 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 628 # number of overall hits +system.cpu.dcache.overall_hits 627 # number of overall hits system.cpu.dcache.overall_miss_latency 246 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.115493 # miss rate for overall accesses +system.cpu.dcache.overall_miss_rate 0.115656 # miss rate for overall accesses system.cpu.dcache.overall_misses 82 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits system.cpu.dcache.overall_mshr_miss_latency 164 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.115493 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_rate 0.115656 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 82 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -77,7 +77,7 @@ system.cpu.dcache.replacements 0 # nu system.cpu.dcache.sampled_refs 82 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.tagsinuse 53.009529 # Cycle average of tags in use -system.cpu.dcache.total_refs 628 # Total number of references to valid blocks. +system.cpu.dcache.total_refs 627 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.icache.ReadReq_accesses 2579 # number of ReadReq accesses(hits+misses) @@ -205,7 +205,7 @@ system.cpu.l2cache.total_refs 0 # To system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 0 # number of cpu cycles simulated +system.cpu.numCycles 3777 # number of cpu cycles simulated system.cpu.num_insts 2578 # Number of instructions executed system.cpu.num_refs 710 # Number of memory references system.cpu.workload.PROG:num_syscalls 4 # Number of system calls diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout index 4a02e57f0..1beab6f4b 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout @@ -6,8 +6,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:28:48 -M5 started Tue Sep 5 15:42:18 2006 +M5 compiled Oct 10 2006 01:56:36 +M5 started Tue Oct 10 01:57:11 2006 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/tru64/simple-timing tests/run.py quick/00.hello/alpha/tru64/simple-timing Exiting @ tick 3777 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini index 2c82b8c1a..59cadaa12 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini @@ -56,6 +56,7 @@ physmem=system.physmem type=AtomicSimpleCPU children=workload clock=1 +cpu_id=0 defer_registration=false function_trace=false function_trace_start=0 @@ -64,6 +65,7 @@ max_insts_any_thread=0 max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem +progress_interval=0 simulate_stalls=false system=system width=1 @@ -74,15 +76,23 @@ icache_port=system.membus.port[1] [system.cpu.workload] type=LiveProcess cmd=hello +egid=100 env= +euid=100 executable=tests/test-progs/hello/bin/mips/linux/hello +gid=100 input=cin output=cout +pid=100 +ppid=99 system=system +uid=100 [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port [system.physmem] @@ -94,6 +104,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out index 8678c0d97..064f467da 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out @@ -19,6 +19,8 @@ mem_mode=atomic [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.workload] type=LiveProcess @@ -28,6 +30,12 @@ input=cin output=cout env= system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 [system.cpu] type=AtomicSimpleCPU @@ -35,8 +43,10 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=0 workload=system.cpu.workload clock=1 defer_registration=false @@ -48,6 +58,7 @@ simulate_stalls=false [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -91,3 +102,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt index b70a6ee17..3b2a2730b 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt @@ -1,9 +1,9 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 90956 # Simulator instruction rate (inst/s) -host_mem_usage 147380 # Number of bytes of host memory used -host_seconds 0.06 # Real time elapsed on the host -host_tick_rate 90353 # Simulator tick rate (ticks/s) +host_inst_rate 52255 # Simulator instruction rate (inst/s) +host_mem_usage 148024 # Number of bytes of host memory used +host_seconds 0.11 # Real time elapsed on the host +host_tick_rate 52038 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5657 # Number of instructions simulated sim_seconds 0.000000 # Number of seconds simulated diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout b/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout index f5b9c8fd7..600b178b3 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout @@ -6,8 +6,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:37:09 -M5 started Tue Sep 5 15:46:32 2006 +M5 compiled Oct 9 2006 19:28:25 +M5 started Mon Oct 9 19:28:56 2006 M5 executing on zizzer.eecs.umich.edu -command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic tests/run.py quick/00.hello/mips/linux/simple-atomic +command line: build/MIPS_SE/m5.debug -d build/MIPS_SE/tests/debug/quick/00.hello/mips/linux/simple-atomic tests/run.py quick/00.hello/mips/linux/simple-atomic Exiting @ tick 5656 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini index 040735f2c..8e1bb0388 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini @@ -56,6 +56,7 @@ physmem=system.physmem type=TimingSimpleCPU children=dcache icache l2cache toL2Bus workload clock=1 +cpu_id=0 defer_registration=false function_trace=false function_trace_start=0 @@ -64,6 +65,7 @@ max_insts_any_thread=0 max_loads_all_threads=0 max_loads_any_thread=0 mem=system.cpu.dcache +progress_interval=0 system=system workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side @@ -192,20 +194,30 @@ mem_side=system.membus.port[1] [system.cpu.toL2Bus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side [system.cpu.workload] type=LiveProcess cmd=hello +egid=100 env= +euid=100 executable=tests/test-progs/hello/bin/mips/linux/hello +gid=100 input=cin output=cout +pid=100 +ppid=99 system=system +uid=100 [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 port=system.physmem.port system.cpu.l2cache.mem_side [system.physmem] @@ -217,6 +229,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.out b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.out index a7270a97e..d683d2355 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.out +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.out @@ -19,6 +19,8 @@ mem_mode=atomic [system.membus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.dcache] type=BaseCache @@ -67,6 +69,12 @@ input=cin output=cout env= system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 [system.cpu] type=TimingSimpleCPU @@ -74,8 +82,10 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.cpu.dcache system=system +cpu_id=0 workload=system.cpu.workload clock=1 defer_registration=false @@ -87,6 +97,8 @@ function_trace_start=0 [system.cpu.toL2Bus] type=Bus bus_id=0 +clock=1000 +width=64 [system.cpu.icache] type=BaseCache @@ -169,6 +181,7 @@ hit_latency=1 [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -212,3 +225,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt index 5d054b950..ab86ba509 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt @@ -1,67 +1,67 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 129834 # Simulator instruction rate (inst/s) -host_mem_usage 158964 # Number of bytes of host memory used -host_seconds 0.04 # Real time elapsed on the host -host_tick_rate 194881 # Simulator tick rate (ticks/s) +host_inst_rate 68704 # Simulator instruction rate (inst/s) +host_mem_usage 166092 # Number of bytes of host memory used +host_seconds 0.08 # Real time elapsed on the host +host_tick_rate 103651 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5657 # Number of instructions simulated sim_seconds 0.000000 # Number of seconds simulated -sim_ticks 8573 # Number of ticks simulated -system.cpu.dcache.ReadReq_accesses 1131 # number of ReadReq accesses(hits+misses) +sim_ticks 8579 # Number of ticks simulated +system.cpu.dcache.ReadReq_accesses 1130 # number of ReadReq accesses(hits+misses) system.cpu.dcache.ReadReq_avg_miss_latency 3 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 1052 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 237 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.069850 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 79 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 158 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.069850 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 79 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 933 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 2.586207 # average WriteReq miss latency +system.cpu.dcache.ReadReq_hits 1048 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 246 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.072566 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 82 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 164 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.072566 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 82 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 924 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 3 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 2 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 875 # number of WriteReq hits +system.cpu.dcache.WriteReq_hits 874 # number of WriteReq hits system.cpu.dcache.WriteReq_miss_latency 150 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.062165 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 58 # number of WriteReq misses +system.cpu.dcache.WriteReq_miss_rate 0.054113 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 50 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_miss_latency 100 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.053591 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_miss_rate 0.054113 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 50 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 14.065693 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 14.560606 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 2064 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 2.824818 # average overall miss latency +system.cpu.dcache.demand_accesses 2054 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 3 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency 2 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1927 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 387 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.066376 # miss rate for demand accesses -system.cpu.dcache.demand_misses 137 # number of demand (read+write) misses +system.cpu.dcache.demand_hits 1922 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 396 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.064265 # miss rate for demand accesses +system.cpu.dcache.demand_misses 132 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 258 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.062500 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 129 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 264 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.064265 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 132 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 2064 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 2.824818 # average overall miss latency +system.cpu.dcache.overall_accesses 2054 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 3 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency 2 # average overall mshr miss latency -system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1927 # number of overall hits -system.cpu.dcache.overall_miss_latency 387 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.066376 # miss rate for overall accesses -system.cpu.dcache.overall_misses 137 # number of overall misses +system.cpu.dcache.overall_avg_mshr_uncacheable_latency no value # average overall mshr uncacheable latency +system.cpu.dcache.overall_hits 1922 # number of overall hits +system.cpu.dcache.overall_miss_latency 396 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.064265 # miss rate for overall accesses +system.cpu.dcache.overall_misses 132 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 258 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.062500 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 129 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_miss_latency 264 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.064265 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 132 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -74,10 +74,10 @@ system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.dcache.replacements 0 # number of replacements -system.cpu.dcache.sampled_refs 137 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 132 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 91.822487 # Cycle average of tags in use -system.cpu.dcache.total_refs 1927 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 86.924009 # Cycle average of tags in use +system.cpu.dcache.total_refs 1922 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.icache.ReadReq_accesses 5658 # number of ReadReq accesses(hits+misses) @@ -115,7 +115,7 @@ system.cpu.icache.no_allocate_misses 0 # Nu system.cpu.icache.overall_accesses 5658 # number of overall (read+write) accesses system.cpu.icache.overall_avg_miss_latency 2.993399 # average overall miss latency system.cpu.icache.overall_avg_mshr_miss_latency 1.993399 # average overall mshr miss latency -system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.icache.overall_avg_mshr_uncacheable_latency no value # average overall mshr uncacheable latency system.cpu.icache.overall_hits 5355 # number of overall hits system.cpu.icache.overall_miss_latency 907 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.053552 # miss rate for overall accesses @@ -138,55 +138,55 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 13 # number of replacements system.cpu.icache.sampled_refs 303 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 138.188010 # Cycle average of tags in use +system.cpu.icache.tagsinuse 138.192774 # Cycle average of tags in use system.cpu.icache.total_refs 5355 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idle_fraction 0 # Percentage of idle cycles -system.cpu.l2cache.ReadReq_accesses 440 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 1.963470 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_accesses 435 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 2 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 1 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 2 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 860 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 0.995455 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 438 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 430 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.977273 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 430 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_miss_latency 866 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.995402 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 433 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 433 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.995402 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 433 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0.004566 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.004619 # Average number of references to valid blocks. system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 440 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 1.963470 # average overall miss latency +system.cpu.l2cache.demand_accesses 435 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 2 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 1 # average overall mshr miss latency system.cpu.l2cache.demand_hits 2 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 860 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 0.995455 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 438 # number of demand (read+write) misses +system.cpu.l2cache.demand_miss_latency 866 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.995402 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 433 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 430 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 0.977273 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 430 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_miss_latency 433 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.995402 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 433 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 440 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 1.963470 # average overall miss latency +system.cpu.l2cache.overall_accesses 435 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 2 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 1 # average overall mshr miss latency -system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 2 # number of overall hits -system.cpu.l2cache.overall_miss_latency 860 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 0.995455 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 438 # number of overall misses +system.cpu.l2cache.overall_miss_latency 866 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.995402 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 433 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 430 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 0.977273 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 430 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_miss_latency 433 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.995402 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 433 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -199,9 +199,9 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 438 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 433 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 231.300093 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 226.406294 # Cycle average of tags in use system.cpu.l2cache.total_refs 2 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout b/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout index 11009935d..4acd2a2e5 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout @@ -6,8 +6,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:37:09 -M5 started Tue Sep 5 15:46:32 2006 +M5 compiled Oct 9 2006 19:28:25 +M5 started Mon Oct 9 19:28:56 2006 M5 executing on zizzer.eecs.umich.edu -command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing tests/run.py quick/00.hello/mips/linux/simple-timing -Exiting @ tick 8573 because target called exit() +command line: build/MIPS_SE/m5.debug -d build/MIPS_SE/tests/debug/quick/00.hello/mips/linux/simple-timing tests/run.py quick/00.hello/mips/linux/simple-timing +Exiting @ tick 8579 because target called exit() diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini index 082415a7f..21028fa63 100644 --- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini +++ b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini @@ -56,6 +56,7 @@ physmem=system.physmem type=AtomicSimpleCPU children=workload clock=1 +cpu_id=0 defer_registration=false function_trace=false function_trace_start=0 @@ -64,6 +65,7 @@ max_insts_any_thread=0 max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem +progress_interval=0 simulate_stalls=false system=system width=1 @@ -74,11 +76,17 @@ icache_port=system.membus.port[1] [system.cpu.workload] type=LiveProcess cmd=hello +egid=100 env= +euid=100 executable=tests/test-progs/hello/bin/sparc/linux/hello +gid=100 input=cin output=cout +pid=100 +ppid=99 system=system +uid=100 [system.membus] type=Bus @@ -94,6 +102,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out index 45412a511..f5be4e3bd 100644 --- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out +++ b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out @@ -28,6 +28,12 @@ input=cin output=cout env= system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 [system.cpu] type=AtomicSimpleCPU @@ -35,8 +41,10 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=0 workload=system.cpu.workload clock=1 defer_registration=false @@ -48,6 +56,7 @@ simulate_stalls=false [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -91,3 +100,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt index 9bfb2fec9..e87e77b8f 100644 --- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt +++ b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt @@ -1,9 +1,9 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 61348 # Simulator instruction rate (inst/s) -host_mem_usage 147288 # Number of bytes of host memory used -host_seconds 0.07 # Real time elapsed on the host -host_tick_rate 60991 # Simulator tick rate (ticks/s) +host_inst_rate 2175 # Simulator instruction rate (inst/s) +host_mem_usage 147292 # Number of bytes of host memory used +host_seconds 2.06 # Real time elapsed on the host +host_tick_rate 2174 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 4483 # Number of instructions simulated sim_seconds 0.000000 # Number of seconds simulated diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout index 38eb82c8b..c9df3a17c 100644 --- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout +++ b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout @@ -5,8 +5,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:39:50 -M5 started Tue Sep 5 15:49:24 2006 +M5 compiled Oct 8 2006 14:19:59 +M5 started Sun Oct 8 14:20:03 2006 M5 executing on zizzer.eecs.umich.edu command line: build/SPARC_SE/m5.opt -d build/SPARC_SE/tests/opt/quick/00.hello/sparc/linux/simple-atomic tests/run.py quick/00.hello/sparc/linux/simple-atomic Exiting @ tick 4482 because target called exit() diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini new file mode 100644 index 000000000..9dad57e13 --- /dev/null +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini @@ -0,0 +1,444 @@ +[root] +type=Root +children=system +checkpoint= +clock=1000000000000 +max_tick=0 +output_file=cout +progress_interval=0 + +[debug] +break_cycles= + +[exetrace] +intel_format=false +pc_symbol=true +print_cpseq=false +print_cycle=true +print_data=true +print_effaddr=true +print_fetchseq=false +print_iregs=false +print_opclass=true +print_thread=true +speculative=true +trace_system=client + +[serialize] +count=10 +cycle=0 +dir=cpt.%012d +period=0 + +[stats] +descriptions=true +dump_cycle=0 +dump_period=0 +dump_reset=false +ignore_events= +mysql_db= +mysql_host= +mysql_password= +mysql_user= +project_name=test +simulation_name=test +simulation_sample=0 +text_compat=true +text_file=m5stats.txt + +[system] +type=System +children=cpu membus physmem +mem_mode=atomic +physmem=system.physmem + +[system.cpu] +type=DerivO3CPU +children=dcache fuPool icache l2cache toL2Bus workload0 workload1 +BTBEntries=4096 +BTBTagSize=16 +LFSTSize=1024 +LQEntries=32 +RASSize=16 +SQEntries=32 +SSITSize=1024 +activity=0 +backComSize=5 +choiceCtrBits=2 +choicePredictorSize=8192 +clock=1 +commitToDecodeDelay=1 +commitToFetchDelay=1 +commitToIEWDelay=1 +commitToRenameDelay=1 +commitWidth=8 +decodeToFetchDelay=1 +decodeToRenameDelay=1 +decodeWidth=8 +defer_registration=false +dispatchWidth=8 +fetchToDecodeDelay=1 +fetchTrapLatency=1 +fetchWidth=8 +forwardComSize=5 +fuPool=system.cpu.fuPool +function_trace=false +function_trace_start=0 +globalCtrBits=2 +globalHistoryBits=13 +globalPredictorSize=8192 +iewToCommitDelay=1 +iewToDecodeDelay=1 +iewToFetchDelay=1 +iewToRenameDelay=1 +instShiftAmt=2 +issueToExecuteDelay=1 +issueWidth=8 +localCtrBits=2 +localHistoryBits=11 +localHistoryTableSize=2048 +localPredictorSize=2048 +max_insts_all_threads=0 +max_insts_any_thread=0 +max_loads_all_threads=0 +max_loads_any_thread=0 +mem=system.cpu.dcache +numIQEntries=64 +numPhysFloatRegs=256 +numPhysIntRegs=256 +numROBEntries=192 +numRobs=1 +numThreads=1 +predType=tournament +progress_interval=0 +renameToDecodeDelay=1 +renameToFetchDelay=1 +renameToIEWDelay=2 +renameToROBDelay=1 +renameWidth=8 +squashWidth=8 +system=system +trapLatency=13 +wbDepth=1 +wbWidth=8 +workload=system.cpu.workload0 system.cpu.workload1 +dcache_port=system.cpu.dcache.cpu_side +icache_port=system.cpu.icache.cpu_side + +[system.cpu.dcache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +do_copy=false +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=262144 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.dcache_port +mem_side=system.cpu.toL2Bus.port[1] + +[system.cpu.fuPool] +type=FUPool +children=FUList0 FUList1 FUList2 FUList3 FUList4 FUList5 FUList6 FUList7 +FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 + +[system.cpu.fuPool.FUList0] +type=FUDesc +children=opList0 +count=6 +opList=system.cpu.fuPool.FUList0.opList0 + +[system.cpu.fuPool.FUList0.opList0] +type=OpDesc +issueLat=1 +opClass=IntAlu +opLat=1 + +[system.cpu.fuPool.FUList1] +type=FUDesc +children=opList0 opList1 +count=2 +opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 + +[system.cpu.fuPool.FUList1.opList0] +type=OpDesc +issueLat=1 +opClass=IntMult +opLat=3 + +[system.cpu.fuPool.FUList1.opList1] +type=OpDesc +issueLat=19 +opClass=IntDiv +opLat=20 + +[system.cpu.fuPool.FUList2] +type=FUDesc +children=opList0 opList1 opList2 +count=4 +opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 + +[system.cpu.fuPool.FUList2.opList0] +type=OpDesc +issueLat=1 +opClass=FloatAdd +opLat=2 + +[system.cpu.fuPool.FUList2.opList1] +type=OpDesc +issueLat=1 +opClass=FloatCmp +opLat=2 + +[system.cpu.fuPool.FUList2.opList2] +type=OpDesc +issueLat=1 +opClass=FloatCvt +opLat=2 + +[system.cpu.fuPool.FUList3] +type=FUDesc +children=opList0 opList1 opList2 +count=2 +opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 + +[system.cpu.fuPool.FUList3.opList0] +type=OpDesc +issueLat=1 +opClass=FloatMult +opLat=4 + +[system.cpu.fuPool.FUList3.opList1] +type=OpDesc +issueLat=12 +opClass=FloatDiv +opLat=12 + +[system.cpu.fuPool.FUList3.opList2] +type=OpDesc +issueLat=24 +opClass=FloatSqrt +opLat=24 + +[system.cpu.fuPool.FUList4] +type=FUDesc +children=opList0 +count=0 +opList=system.cpu.fuPool.FUList4.opList0 + +[system.cpu.fuPool.FUList4.opList0] +type=OpDesc +issueLat=1 +opClass=MemRead +opLat=1 + +[system.cpu.fuPool.FUList5] +type=FUDesc +children=opList0 +count=0 +opList=system.cpu.fuPool.FUList5.opList0 + +[system.cpu.fuPool.FUList5.opList0] +type=OpDesc +issueLat=1 +opClass=MemWrite +opLat=1 + +[system.cpu.fuPool.FUList6] +type=FUDesc +children=opList0 opList1 +count=4 +opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 + +[system.cpu.fuPool.FUList6.opList0] +type=OpDesc +issueLat=1 +opClass=MemRead +opLat=1 + +[system.cpu.fuPool.FUList6.opList1] +type=OpDesc +issueLat=1 +opClass=MemWrite +opLat=1 + +[system.cpu.fuPool.FUList7] +type=FUDesc +children=opList0 +count=1 +opList=system.cpu.fuPool.FUList7.opList0 + +[system.cpu.fuPool.FUList7.opList0] +type=OpDesc +issueLat=3 +opClass=IprAccess +opLat=3 + +[system.cpu.icache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +do_copy=false +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=131072 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.icache_port +mem_side=system.cpu.toL2Bus.port[0] + +[system.cpu.l2cache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +do_copy=false +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=2097152 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.toL2Bus.port[2] +mem_side=system.membus.port[1] + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +width=64 +port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side + +[system.cpu.workload0] +type=LiveProcess +cmd=hello +egid=100 +env= +euid=100 +executable=tests/test-progs/hello/bin/alpha/linux/hello +gid=100 +input=cin +output=cout +pid=100 +ppid=99 +system=system +uid=100 + +[system.cpu.workload1] +type=LiveProcess +cmd=hello +egid=100 +env= +euid=100 +executable=tests/test-progs/hello/bin/alpha/linux/hello +gid=100 +input=cin +output=cout +pid=100 +ppid=99 +system=system +uid=100 + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +width=64 +port=system.physmem.port system.cpu.l2cache.mem_side + +[system.physmem] +type=PhysicalMemory +file= +latency=1 +range=0:134217727 +port=system.membus.port[0] + +[trace] +bufsize=0 +cycle=0 +dump_on_exit=false +file=cout +flags= +ignore= +start=0 + diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out new file mode 100644 index 000000000..bb55a2b69 --- /dev/null +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out @@ -0,0 +1,433 @@ +[root] +type=Root +clock=1000000000000 +max_tick=0 +progress_interval=0 +output_file=cout + +[system.physmem] +type=PhysicalMemory +file= +range=[0,134217727] +latency=1 + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +width=64 + +[system.cpu.workload0] +type=LiveProcess +cmd=hello +executable=tests/test-progs/hello/bin/alpha/linux/hello +input=cin +output=cout +env= +system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 + +[system.cpu.workload1] +type=LiveProcess +cmd=hello +executable=tests/test-progs/hello/bin/alpha/linux/hello +input=cin +output=cout +env= +system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 + +[system.cpu.dcache] +type=BaseCache +size=262144 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +do_copy=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.fuPool.FUList0.opList0] +type=OpDesc +opClass=IntAlu +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList0] +type=FUDesc +opList=system.cpu.fuPool.FUList0.opList0 +count=6 + +[system.cpu.fuPool.FUList1.opList0] +type=OpDesc +opClass=IntMult +opLat=3 +issueLat=1 + +[system.cpu.fuPool.FUList1.opList1] +type=OpDesc +opClass=IntDiv +opLat=20 +issueLat=19 + +[system.cpu.fuPool.FUList1] +type=FUDesc +opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 +count=2 + +[system.cpu.fuPool.FUList2.opList0] +type=OpDesc +opClass=FloatAdd +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2.opList1] +type=OpDesc +opClass=FloatCmp +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2.opList2] +type=OpDesc +opClass=FloatCvt +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2] +type=FUDesc +opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 +count=4 + +[system.cpu.fuPool.FUList3.opList0] +type=OpDesc +opClass=FloatMult +opLat=4 +issueLat=1 + +[system.cpu.fuPool.FUList3.opList1] +type=OpDesc +opClass=FloatDiv +opLat=12 +issueLat=12 + +[system.cpu.fuPool.FUList3.opList2] +type=OpDesc +opClass=FloatSqrt +opLat=24 +issueLat=24 + +[system.cpu.fuPool.FUList3] +type=FUDesc +opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 +count=2 + +[system.cpu.fuPool.FUList4.opList0] +type=OpDesc +opClass=MemRead +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList4] +type=FUDesc +opList=system.cpu.fuPool.FUList4.opList0 +count=0 + +[system.cpu.fuPool.FUList5.opList0] +type=OpDesc +opClass=MemWrite +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList5] +type=FUDesc +opList=system.cpu.fuPool.FUList5.opList0 +count=0 + +[system.cpu.fuPool.FUList6.opList0] +type=OpDesc +opClass=MemRead +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList6.opList1] +type=OpDesc +opClass=MemWrite +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList6] +type=FUDesc +opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 +count=4 + +[system.cpu.fuPool.FUList7.opList0] +type=OpDesc +opClass=IprAccess +opLat=3 +issueLat=3 + +[system.cpu.fuPool.FUList7] +type=FUDesc +opList=system.cpu.fuPool.FUList7.opList0 +count=1 + +[system.cpu.fuPool] +type=FUPool +FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 + +[system.cpu] +type=DerivO3CPU +clock=1 +numThreads=1 +activity=0 +workload=system.cpu.workload0 system.cpu.workload1 +mem=system.cpu.dcache +checker=null +max_insts_any_thread=0 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +progress_interval=0 +cachePorts=200 +decodeToFetchDelay=1 +renameToFetchDelay=1 +iewToFetchDelay=1 +commitToFetchDelay=1 +fetchWidth=8 +renameToDecodeDelay=1 +iewToDecodeDelay=1 +commitToDecodeDelay=1 +fetchToDecodeDelay=1 +decodeWidth=8 +iewToRenameDelay=1 +commitToRenameDelay=1 +decodeToRenameDelay=1 +renameWidth=8 +commitToIEWDelay=1 +renameToIEWDelay=2 +issueToExecuteDelay=1 +dispatchWidth=8 +issueWidth=8 +wbWidth=8 +wbDepth=1 +fuPool=system.cpu.fuPool +iewToCommitDelay=1 +renameToROBDelay=1 +commitWidth=8 +squashWidth=8 +trapLatency=13 +backComSize=5 +forwardComSize=5 +predType=tournament +localPredictorSize=2048 +localCtrBits=2 +localHistoryTableSize=2048 +localHistoryBits=11 +globalPredictorSize=8192 +globalCtrBits=2 +globalHistoryBits=13 +choicePredictorSize=8192 +choiceCtrBits=2 +BTBEntries=4096 +BTBTagSize=16 +RASSize=16 +LQEntries=32 +SQEntries=32 +LFSTSize=1024 +SSITSize=1024 +numPhysIntRegs=256 +numPhysFloatRegs=256 +numIQEntries=64 +numROBEntries=192 +smtNumFetchingThreads=1 +smtFetchPolicy=SingleThread +smtLSQPolicy=Partitioned +smtLSQThreshold=100 +smtIQPolicy=Partitioned +smtIQThreshold=100 +smtROBPolicy=Partitioned +smtROBThreshold=100 +smtCommitPolicy=RoundRobin +instShiftAmt=2 +defer_registration=false +function_trace=false +function_trace_start=0 + +[system.cpu.icache] +type=BaseCache +size=131072 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +do_copy=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.l2cache] +type=BaseCache +size=2097152 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +do_copy=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +width=64 + +[trace] +flags= +start=0 +cycle=0 +bufsize=0 +file=cout +dump_on_exit=false +ignore= + +[stats] +descriptions=true +project_name=test +simulation_name=test +simulation_sample=0 +text_file=m5stats.txt +text_compat=true +mysql_db= +mysql_user= +mysql_password= +mysql_host= +events_start=-1 +dump_reset=false +dump_cycle=0 +dump_period=0 +ignore_events= + +[random] +seed=1 + +[exetrace] +speculative=true +print_cycle=true +print_opclass=true +print_thread=true +print_effaddr=true +print_data=true +print_iregs=false +print_fetchseq=false +print_cpseq=false +print_reg_delta=false +pc_symbol=true +intel_format=false +trace_system=client + +[debug] +break_cycles= + +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt new file mode 100644 index 000000000..e5fad9159 --- /dev/null +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt @@ -0,0 +1,722 @@ + +---------- Begin Simulation Statistics ---------- +global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. +global.BPredUnit.BTBHits 640 # Number of BTB hits +global.BPredUnit.BTBLookups 3595 # Number of BTB lookups +global.BPredUnit.RASInCorrect 99 # Number of incorrect RAS predictions. +global.BPredUnit.condIncorrect 1081 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 2447 # Number of conditional branches predicted +global.BPredUnit.lookups 4169 # Number of BP lookups +global.BPredUnit.usedRAS 550 # Number of times the RAS was used to get a target. +host_inst_rate 8624 # Simulator instruction rate (inst/s) +host_mem_usage 167824 # Number of bytes of host memory used +host_seconds 1.30 # Real time elapsed on the host +host_tick_rate 6469 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 41 # Number of conflicting loads. +memdepunit.memDep.conflictingLoads 39 # Number of conflicting loads. +memdepunit.memDep.conflictingStores 194 # Number of conflicting stores. +memdepunit.memDep.conflictingStores 198 # Number of conflicting stores. +memdepunit.memDep.insertedLoads 1868 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedLoads 1833 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 1106 # Number of stores inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 1108 # Number of stores inserted to the mem dependence unit. +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 11247 # Number of instructions simulated +sim_seconds 0.000000 # Number of seconds simulated +sim_ticks 8439 # Number of ticks simulated +system.cpu.commit.COM:branches 1724 # Number of branches committed +system.cpu.commit.COM:branches_0 862 # Number of branches committed +system.cpu.commit.COM:branches_1 862 # Number of branches committed +system.cpu.commit.COM:bw_lim_events 126 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits +system.cpu.commit.COM:bw_limited_0 0 # number of insts not committed due to BW limits +system.cpu.commit.COM:bw_limited_1 0 # number of insts not committed due to BW limits +system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle.samples 8391 +system.cpu.commit.COM:committed_per_cycle.min_value 0 + 0 3954 4712.19% + 1 1909 2275.06% + 2 920 1096.41% + 3 516 614.94% + 4 376 448.10% + 5 235 280.06% + 6 188 224.05% + 7 167 199.02% + 8 126 150.16% +system.cpu.commit.COM:committed_per_cycle.max_value 8 +system.cpu.commit.COM:committed_per_cycle.end_dist + +system.cpu.commit.COM:count 11281 # Number of instructions committed +system.cpu.commit.COM:count_0 5640 # Number of instructions committed +system.cpu.commit.COM:count_1 5641 # Number of instructions committed +system.cpu.commit.COM:loads 1958 # Number of loads committed +system.cpu.commit.COM:loads_0 979 # Number of loads committed +system.cpu.commit.COM:loads_1 979 # Number of loads committed +system.cpu.commit.COM:membars 0 # Number of memory barriers committed +system.cpu.commit.COM:membars_0 0 # Number of memory barriers committed +system.cpu.commit.COM:membars_1 0 # Number of memory barriers committed +system.cpu.commit.COM:refs 3582 # Number of memory references committed +system.cpu.commit.COM:refs_0 1791 # Number of memory references committed +system.cpu.commit.COM:refs_1 1791 # Number of memory references committed +system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed +system.cpu.commit.COM:swp_count_0 0 # Number of s/w prefetches committed +system.cpu.commit.COM:swp_count_1 0 # Number of s/w prefetches committed +system.cpu.commit.branchMispredicts 832 # The number of times a branch was mispredicted +system.cpu.commit.commitCommittedInsts 11281 # The number of committed instructions +system.cpu.commit.commitNonSpecStalls 34 # The number of times commit has been forced to stall to communicate backwards +system.cpu.commit.commitSquashedInsts 7510 # The number of squashed insts skipped by commit +system.cpu.committedInsts_0 5623 # Number of Instructions Simulated +system.cpu.committedInsts_1 5624 # Number of Instructions Simulated +system.cpu.committedInsts_total 11247 # Number of Instructions Simulated +system.cpu.cpi_0 1.500800 # CPI: Cycles Per Instruction +system.cpu.cpi_1 1.500533 # CPI: Cycles Per Instruction +system.cpu.cpi_total 0.750333 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 2911 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_accesses_0 2911 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 3.077253 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_miss_latency_0 3.077253 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2.232323 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 2.232323 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 2678 # number of ReadReq hits +system.cpu.dcache.ReadReq_hits_0 2678 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 717 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency_0 717 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.080041 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_miss_rate_0 0.080041 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 233 # number of ReadReq misses +system.cpu.dcache.ReadReq_misses_0 233 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 35 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_hits_0 35 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 442 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency_0 442 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.068018 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_miss_rate_0 0.068018 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 198 # number of ReadReq MSHR misses +system.cpu.dcache.ReadReq_mshr_misses_0 198 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 1624 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_accesses_0 1624 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 2.762376 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_miss_latency_0 2.762376 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 2.062500 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0 2.062500 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 1321 # number of WriteReq hits +system.cpu.dcache.WriteReq_hits_0 1321 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 837 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency_0 837 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.186576 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_miss_rate_0 0.186576 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 303 # number of WriteReq misses +system.cpu.dcache.WriteReq_misses_0 303 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_hits 159 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_hits_0 159 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_miss_latency 297 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency_0 297 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.088670 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_miss_rate_0 0.088670 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 144 # number of WriteReq MSHR misses +system.cpu.dcache.WriteReq_mshr_misses_0 144 # number of WriteReq MSHR misses +system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.dcache.avg_blocked_cycles_no_targets 1 # average number of cycles each access was blocked +system.cpu.dcache.avg_refs 11.692982 # Average number of references to valid blocks. +system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_no_targets 7 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_targets 7 # number of cycles access was blocked +system.cpu.dcache.cache_copies 0 # number of cache copies performed +system.cpu.dcache.demand_accesses 4535 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses_0 4535 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses_1 0 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 2.899254 # average overall miss latency +system.cpu.dcache.demand_avg_miss_latency_0 2.899254 # average overall miss latency +system.cpu.dcache.demand_avg_miss_latency_1 <err: div-0> # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 2.160819 # average overall mshr miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency_0 2.160819 # average overall mshr miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency +system.cpu.dcache.demand_hits 3999 # number of demand (read+write) hits +system.cpu.dcache.demand_hits_0 3999 # number of demand (read+write) hits +system.cpu.dcache.demand_hits_1 0 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 1554 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency_0 1554 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.118192 # miss rate for demand accesses +system.cpu.dcache.demand_miss_rate_0 0.118192 # miss rate for demand accesses +system.cpu.dcache.demand_miss_rate_1 <err: div-0> # miss rate for demand accesses +system.cpu.dcache.demand_misses 536 # number of demand (read+write) misses +system.cpu.dcache.demand_misses_0 536 # number of demand (read+write) misses +system.cpu.dcache.demand_misses_1 0 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 194 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_hits_0 194 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 739 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency_0 739 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.075413 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_rate_0 0.075413 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_rate_1 <err: div-0> # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 342 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_misses_0 342 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses +system.cpu.dcache.fast_writes 0 # number of fast writes performed +system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.dcache.mshr_cap_events_0 0 # number of times MSHR cap was activated +system.cpu.dcache.mshr_cap_events_1 0 # number of times MSHR cap was activated +system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.dcache.overall_accesses 4535 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses_0 4535 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses_1 0 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 2.899254 # average overall miss latency +system.cpu.dcache.overall_avg_miss_latency_0 2.899254 # average overall miss latency +system.cpu.dcache.overall_avg_miss_latency_1 <err: div-0> # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 2.160819 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency_0 2.160819 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.dcache.overall_avg_mshr_uncacheable_latency_0 <err: div-0> # average overall mshr uncacheable latency +system.cpu.dcache.overall_avg_mshr_uncacheable_latency_1 <err: div-0> # average overall mshr uncacheable latency +system.cpu.dcache.overall_hits 3999 # number of overall hits +system.cpu.dcache.overall_hits_0 3999 # number of overall hits +system.cpu.dcache.overall_hits_1 0 # number of overall hits +system.cpu.dcache.overall_miss_latency 1554 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency_0 1554 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency_1 0 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.118192 # miss rate for overall accesses +system.cpu.dcache.overall_miss_rate_0 0.118192 # miss rate for overall accesses +system.cpu.dcache.overall_miss_rate_1 <err: div-0> # miss rate for overall accesses +system.cpu.dcache.overall_misses 536 # number of overall misses +system.cpu.dcache.overall_misses_0 536 # number of overall misses +system.cpu.dcache.overall_misses_1 0 # number of overall misses +system.cpu.dcache.overall_mshr_hits 194 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_hits_0 194 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_hits_1 0 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 739 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency_0 739 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.075413 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_rate_0 0.075413 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_rate_1 <err: div-0> # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 342 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_misses_0 342 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_misses_1 0 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.dcache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles +system.cpu.dcache.overall_mshr_uncacheable_latency_1 0 # number of overall MSHR uncacheable cycles +system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.dcache.overall_mshr_uncacheable_misses_0 0 # number of overall MSHR uncacheable misses +system.cpu.dcache.overall_mshr_uncacheable_misses_1 0 # number of overall MSHR uncacheable misses +system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.dcache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.dcache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.dcache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.dcache.replacements 0 # number of replacements +system.cpu.dcache.replacements_0 0 # number of replacements +system.cpu.dcache.replacements_1 0 # number of replacements +system.cpu.dcache.sampled_refs 342 # Sample count of references to valid blocks. +system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.dcache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions +system.cpu.dcache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions +system.cpu.dcache.tagsinuse 226.387441 # Cycle average of tags in use +system.cpu.dcache.total_refs 3999 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.dcache.writebacks 0 # number of writebacks +system.cpu.dcache.writebacks_0 0 # number of writebacks +system.cpu.dcache.writebacks_1 0 # number of writebacks +system.cpu.decode.DECODE:BlockedCycles 1691 # Number of cycles decode is blocked +system.cpu.decode.DECODE:BranchMispred 271 # Number of times decode detected a branch misprediction +system.cpu.decode.DECODE:BranchResolved 367 # Number of times decode resolved a branch +system.cpu.decode.DECODE:DecodedInsts 22675 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 9659 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 3750 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 1395 # Number of cycles decode is squashing +system.cpu.decode.DECODE:SquashedInsts 233 # Number of squashed instructions handled by decode +system.cpu.decode.DECODE:UnblockCycles 107 # Number of cycles decode is unblocking +system.cpu.fetch.Branches 4169 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 2866 # Number of cache lines fetched +system.cpu.fetch.Cycles 6955 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 200 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 25228 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 1143 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.493957 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 2866 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 1190 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 2.989100 # Number of inst fetches per cycle +system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist.samples 8440 +system.cpu.fetch.rateDist.min_value 0 + 0 4352 5156.40% + 1 273 323.46% + 2 228 270.14% + 3 247 292.65% + 4 313 370.85% + 5 277 328.20% + 6 294 348.34% + 7 291 344.79% + 8 2165 2565.17% +system.cpu.fetch.rateDist.max_value 8 +system.cpu.fetch.rateDist.end_dist + +system.cpu.icache.ReadReq_accesses 2866 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_accesses_0 2866 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 2.982343 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_miss_latency_0 2.982343 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 1.995153 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency_0 1.995153 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 2243 # number of ReadReq hits +system.cpu.icache.ReadReq_hits_0 2243 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1858 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency_0 1858 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.217376 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_miss_rate_0 0.217376 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 623 # number of ReadReq misses +system.cpu.icache.ReadReq_misses_0 623 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 4 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_hits_0 4 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 1235 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency_0 1235 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.215980 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_miss_rate_0 0.215980 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 619 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_mshr_misses_0 619 # number of ReadReq MSHR misses +system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.icache.avg_refs 3.623586 # Average number of references to valid blocks. +system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.icache.cache_copies 0 # number of cache copies performed +system.cpu.icache.demand_accesses 2866 # number of demand (read+write) accesses +system.cpu.icache.demand_accesses_0 2866 # number of demand (read+write) accesses +system.cpu.icache.demand_accesses_1 0 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 2.982343 # average overall miss latency +system.cpu.icache.demand_avg_miss_latency_0 2.982343 # average overall miss latency +system.cpu.icache.demand_avg_miss_latency_1 <err: div-0> # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 1.995153 # average overall mshr miss latency +system.cpu.icache.demand_avg_mshr_miss_latency_0 1.995153 # average overall mshr miss latency +system.cpu.icache.demand_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency +system.cpu.icache.demand_hits 2243 # number of demand (read+write) hits +system.cpu.icache.demand_hits_0 2243 # number of demand (read+write) hits +system.cpu.icache.demand_hits_1 0 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1858 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency_0 1858 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.217376 # miss rate for demand accesses +system.cpu.icache.demand_miss_rate_0 0.217376 # miss rate for demand accesses +system.cpu.icache.demand_miss_rate_1 <err: div-0> # miss rate for demand accesses +system.cpu.icache.demand_misses 623 # number of demand (read+write) misses +system.cpu.icache.demand_misses_0 623 # number of demand (read+write) misses +system.cpu.icache.demand_misses_1 0 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 4 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_hits_0 4 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 1235 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency_0 1235 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.215980 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_miss_rate_0 0.215980 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_miss_rate_1 <err: div-0> # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 619 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_misses_0 619 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses +system.cpu.icache.fast_writes 0 # number of fast writes performed +system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.icache.mshr_cap_events_0 0 # number of times MSHR cap was activated +system.cpu.icache.mshr_cap_events_1 0 # number of times MSHR cap was activated +system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.icache.overall_accesses 2866 # number of overall (read+write) accesses +system.cpu.icache.overall_accesses_0 2866 # number of overall (read+write) accesses +system.cpu.icache.overall_accesses_1 0 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 2.982343 # average overall miss latency +system.cpu.icache.overall_avg_miss_latency_0 2.982343 # average overall miss latency +system.cpu.icache.overall_avg_miss_latency_1 <err: div-0> # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 1.995153 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_miss_latency_0 1.995153 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.icache.overall_avg_mshr_uncacheable_latency_0 <err: div-0> # average overall mshr uncacheable latency +system.cpu.icache.overall_avg_mshr_uncacheable_latency_1 <err: div-0> # average overall mshr uncacheable latency +system.cpu.icache.overall_hits 2243 # number of overall hits +system.cpu.icache.overall_hits_0 2243 # number of overall hits +system.cpu.icache.overall_hits_1 0 # number of overall hits +system.cpu.icache.overall_miss_latency 1858 # number of overall miss cycles +system.cpu.icache.overall_miss_latency_0 1858 # number of overall miss cycles +system.cpu.icache.overall_miss_latency_1 0 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.217376 # miss rate for overall accesses +system.cpu.icache.overall_miss_rate_0 0.217376 # miss rate for overall accesses +system.cpu.icache.overall_miss_rate_1 no value # miss rate for overall accesses +system.cpu.icache.overall_misses 623 # number of overall misses +system.cpu.icache.overall_misses_0 623 # number of overall misses +system.cpu.icache.overall_misses_1 0 # number of overall misses +system.cpu.icache.overall_mshr_hits 4 # number of overall MSHR hits +system.cpu.icache.overall_mshr_hits_0 4 # number of overall MSHR hits +system.cpu.icache.overall_mshr_hits_1 0 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 1235 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency_0 1235 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.215980 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_miss_rate_0 0.215980 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_miss_rate_1 <err: div-0> # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 619 # number of overall MSHR misses +system.cpu.icache.overall_mshr_misses_0 619 # number of overall MSHR misses +system.cpu.icache.overall_mshr_misses_1 0 # number of overall MSHR misses +system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.icache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles +system.cpu.icache.overall_mshr_uncacheable_latency_1 0 # number of overall MSHR uncacheable cycles +system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.icache.overall_mshr_uncacheable_misses_0 0 # number of overall MSHR uncacheable misses +system.cpu.icache.overall_mshr_uncacheable_misses_1 0 # number of overall MSHR uncacheable misses +system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.icache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.icache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.icache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.icache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.icache.replacements 9 # number of replacements +system.cpu.icache.replacements_0 9 # number of replacements +system.cpu.icache.replacements_1 0 # number of replacements +system.cpu.icache.sampled_refs 619 # Sample count of references to valid blocks. +system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.icache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions +system.cpu.icache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions +system.cpu.icache.tagsinuse 332.363626 # Cycle average of tags in use +system.cpu.icache.total_refs 2243 # Total number of references to valid blocks. +system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.icache.writebacks 0 # number of writebacks +system.cpu.icache.writebacks_0 0 # number of writebacks +system.cpu.icache.writebacks_1 0 # number of writebacks +system.cpu.iew.EXEC:branches 2318 # Number of branches executed +system.cpu.iew.EXEC:branches_0 1160 # Number of branches executed +system.cpu.iew.EXEC:branches_1 1158 # Number of branches executed +system.cpu.iew.EXEC:nop 65 # number of nop insts executed +system.cpu.iew.EXEC:nop_0 31 # number of nop insts executed +system.cpu.iew.EXEC:nop_1 34 # number of nop insts executed +system.cpu.iew.EXEC:rate 1.813863 # Inst execution rate +system.cpu.iew.EXEC:refs 4922 # number of memory reference insts executed +system.cpu.iew.EXEC:refs_0 2464 # number of memory reference insts executed +system.cpu.iew.EXEC:refs_1 2458 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 1868 # Number of stores executed +system.cpu.iew.EXEC:stores_0 932 # Number of stores executed +system.cpu.iew.EXEC:stores_1 936 # Number of stores executed +system.cpu.iew.EXEC:swp 0 # number of swp insts executed +system.cpu.iew.EXEC:swp_0 0 # number of swp insts executed +system.cpu.iew.EXEC:swp_1 0 # number of swp insts executed +system.cpu.iew.WB:consumers 10001 # num instructions consuming a value +system.cpu.iew.WB:consumers_0 5003 # num instructions consuming a value +system.cpu.iew.WB:consumers_1 4998 # num instructions consuming a value +system.cpu.iew.WB:count 14799 # cumulative count of insts written-back +system.cpu.iew.WB:count_0 7402 # cumulative count of insts written-back +system.cpu.iew.WB:count_1 7397 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.777122 # average fanout of values written-back +system.cpu.iew.WB:fanout_0 0.776134 # average fanout of values written-back +system.cpu.iew.WB:fanout_1 0.778111 # average fanout of values written-back +system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ +system.cpu.iew.WB:penalized_0 0 # number of instrctions required to write to 'other' IQ +system.cpu.iew.WB:penalized_1 0 # number of instrctions required to write to 'other' IQ +system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ +system.cpu.iew.WB:penalized_rate_0 0 # fraction of instructions written-back that wrote to 'other' IQ +system.cpu.iew.WB:penalized_rate_1 0 # fraction of instructions written-back that wrote to 'other' IQ +system.cpu.iew.WB:producers 7772 # num instructions producing a value +system.cpu.iew.WB:producers_0 3883 # num instructions producing a value +system.cpu.iew.WB:producers_1 3889 # num instructions producing a value +system.cpu.iew.WB:rate 1.753436 # insts written-back per cycle +system.cpu.iew.WB:rate_0 0.877014 # insts written-back per cycle +system.cpu.iew.WB:rate_1 0.876422 # insts written-back per cycle +system.cpu.iew.WB:sent 14932 # cumulative count of insts sent to commit +system.cpu.iew.WB:sent_0 7467 # cumulative count of insts sent to commit +system.cpu.iew.WB:sent_1 7465 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 926 # Number of branch mispredicts detected at execute +system.cpu.iew.iewBlockCycles 4 # Number of cycles IEW is blocking +system.cpu.iew.iewDispLoadInsts 3701 # Number of dispatched load instructions +system.cpu.iew.iewDispNonSpecInsts 40 # Number of dispatched non-speculative instructions +system.cpu.iew.iewDispSquashedInsts 604 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 2214 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 18792 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 3054 # Number of load instructions executed +system.cpu.iew.iewExecLoadInsts_0 1532 # Number of load instructions executed +system.cpu.iew.iewExecLoadInsts_1 1522 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 916 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 15309 # Number of executed instructions +system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall +system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle +system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall +system.cpu.iew.iewSquashCycles 1395 # Number of cycles IEW is squashing +system.cpu.iew.iewUnblockCycles 0 # Number of cycles IEW is unblocking +system.cpu.iew.lsq.thread.0.blockedLoads 1 # Number of blocked loads due to partial load-store forwarding +system.cpu.iew.lsq.thread.0.cacheBlocked 4 # Number of times an access to memory failed due to the cache being blocked +system.cpu.iew.lsq.thread.0.forwLoads 45 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.0.ignoredResponses 2 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address +system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address +system.cpu.iew.lsq.thread.0.memOrderViolation 32 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled +system.cpu.iew.lsq.thread.0.squashedLoads 889 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 294 # Number of stores squashed +system.cpu.iew.lsq.thread.1.blockedLoads 1 # Number of blocked loads due to partial load-store forwarding +system.cpu.iew.lsq.thread.1.cacheBlocked 6 # Number of times an access to memory failed due to the cache being blocked +system.cpu.iew.lsq.thread.1.forwLoads 45 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.1.ignoredResponses 2 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread.1.invAddrLoads 0 # Number of loads ignored due to an invalid address +system.cpu.iew.lsq.thread.1.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address +system.cpu.iew.lsq.thread.1.memOrderViolation 35 # Number of memory ordering violations +system.cpu.iew.lsq.thread.1.rescheduledLoads 1 # Number of loads that were rescheduled +system.cpu.iew.lsq.thread.1.squashedLoads 854 # Number of loads squashed +system.cpu.iew.lsq.thread.1.squashedStores 296 # Number of stores squashed +system.cpu.iew.memOrderViolationEvents 67 # Number of memory order violations +system.cpu.iew.predictedNotTakenIncorrect 764 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedTakenIncorrect 162 # Number of branches that were predicted taken incorrectly +system.cpu.ipc_0 0.666311 # IPC: Instructions Per Cycle +system.cpu.ipc_1 0.666430 # IPC: Instructions Per Cycle +system.cpu.ipc_total 1.332741 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 8135 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0.start_dist + (null) 2 0.02% # Type of FU issued + IntAlu 5505 67.67% # Type of FU issued + IntMult 1 0.01% # Type of FU issued + IntDiv 0 0.00% # Type of FU issued + FloatAdd 2 0.02% # Type of FU issued + FloatCmp 0 0.00% # Type of FU issued + FloatCvt 0 0.00% # Type of FU issued + FloatMult 0 0.00% # Type of FU issued + FloatDiv 0 0.00% # Type of FU issued + FloatSqrt 0 0.00% # Type of FU issued + MemRead 1656 20.36% # Type of FU issued + MemWrite 969 11.91% # Type of FU issued + IprAccess 0 0.00% # Type of FU issued + InstPrefetch 0 0.00% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0.end_dist +system.cpu.iq.ISSUE:FU_type_1 8090 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_1.start_dist + (null) 2 0.02% # Type of FU issued + IntAlu 5481 67.75% # Type of FU issued + IntMult 1 0.01% # Type of FU issued + IntDiv 0 0.00% # Type of FU issued + FloatAdd 2 0.02% # Type of FU issued + FloatCmp 0 0.00% # Type of FU issued + FloatCvt 0 0.00% # Type of FU issued + FloatMult 0 0.00% # Type of FU issued + FloatDiv 0 0.00% # Type of FU issued + FloatSqrt 0 0.00% # Type of FU issued + MemRead 1640 20.27% # Type of FU issued + MemWrite 964 11.92% # Type of FU issued + IprAccess 0 0.00% # Type of FU issued + InstPrefetch 0 0.00% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_1.end_dist +system.cpu.iq.ISSUE:FU_type 16225 # Type of FU issued +system.cpu.iq.ISSUE:FU_type.start_dist + (null) 4 0.02% # Type of FU issued + IntAlu 10986 67.71% # Type of FU issued + IntMult 2 0.01% # Type of FU issued + IntDiv 0 0.00% # Type of FU issued + FloatAdd 4 0.02% # Type of FU issued + FloatCmp 0 0.00% # Type of FU issued + FloatCvt 0 0.00% # Type of FU issued + FloatMult 0 0.00% # Type of FU issued + FloatDiv 0 0.00% # Type of FU issued + FloatSqrt 0 0.00% # Type of FU issued + MemRead 3296 20.31% # Type of FU issued + MemWrite 1933 11.91% # Type of FU issued + IprAccess 0 0.00% # Type of FU issued + InstPrefetch 0 0.00% # Type of FU issued +system.cpu.iq.ISSUE:FU_type.end_dist +system.cpu.iq.ISSUE:fu_busy_cnt 181 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_cnt_0 103 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_cnt_1 78 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.011156 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate_0 0.006348 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate_1 0.004807 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_full.start_dist + (null) 0 0.00% # attempts to use FU when none available + IntAlu 10 5.52% # attempts to use FU when none available + IntMult 0 0.00% # attempts to use FU when none available + IntDiv 0 0.00% # attempts to use FU when none available + FloatAdd 0 0.00% # attempts to use FU when none available + FloatCmp 0 0.00% # attempts to use FU when none available + FloatCvt 0 0.00% # attempts to use FU when none available + FloatMult 0 0.00% # attempts to use FU when none available + FloatDiv 0 0.00% # attempts to use FU when none available + FloatSqrt 0 0.00% # attempts to use FU when none available + MemRead 100 55.25% # attempts to use FU when none available + MemWrite 71 39.23% # attempts to use FU when none available + IprAccess 0 0.00% # attempts to use FU when none available + InstPrefetch 0 0.00% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full.end_dist +system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle.samples 8440 +system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 + 0 2689 3186.02% + 1 1457 1726.30% + 2 1432 1696.68% + 3 1110 1315.17% + 4 757 896.92% + 5 583 690.76% + 6 287 340.05% + 7 91 107.82% + 8 34 40.28% +system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 +system.cpu.iq.ISSUE:issued_per_cycle.end_dist + +system.cpu.iq.ISSUE:rate 1.922393 # Inst issue rate +system.cpu.iq.iqInstsAdded 18687 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 16225 # Number of instructions issued +system.cpu.iq.iqNonSpecInstsAdded 40 # Number of non-speculative instructions added to the IQ +system.cpu.iq.iqSquashedInstsExamined 6645 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 31 # Number of squashed instructions issued +system.cpu.iq.iqSquashedNonSpecRemoved 6 # Number of squashed non-spec instructions that were removed +system.cpu.iq.iqSquashedOperandsExamined 4127 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.l2cache.ReadReq_accesses 961 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_accesses_0 961 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 2.059623 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency_0 2.059623 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 1 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0 1 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_hits 5 # number of ReadReq hits +system.cpu.l2cache.ReadReq_hits_0 5 # number of ReadReq hits +system.cpu.l2cache.ReadReq_miss_latency 1969 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency_0 1969 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.994797 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_miss_rate_0 0.994797 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 956 # number of ReadReq misses +system.cpu.l2cache.ReadReq_misses_0 956 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 956 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency_0 956 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.994797 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_miss_rate_0 0.994797 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 956 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_mshr_misses_0 956 # number of ReadReq MSHR misses +system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.l2cache.avg_refs 0.005230 # Average number of references to valid blocks. +system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.cache_copies 0 # number of cache copies performed +system.cpu.l2cache.demand_accesses 961 # number of demand (read+write) accesses +system.cpu.l2cache.demand_accesses_0 961 # number of demand (read+write) accesses +system.cpu.l2cache.demand_accesses_1 0 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 2.059623 # average overall miss latency +system.cpu.l2cache.demand_avg_miss_latency_0 2.059623 # average overall miss latency +system.cpu.l2cache.demand_avg_miss_latency_1 <err: div-0> # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 1 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency_0 1 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency +system.cpu.l2cache.demand_hits 5 # number of demand (read+write) hits +system.cpu.l2cache.demand_hits_0 5 # number of demand (read+write) hits +system.cpu.l2cache.demand_hits_1 0 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 1969 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency_0 1969 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.994797 # miss rate for demand accesses +system.cpu.l2cache.demand_miss_rate_0 0.994797 # miss rate for demand accesses +system.cpu.l2cache.demand_miss_rate_1 <err: div-0> # miss rate for demand accesses +system.cpu.l2cache.demand_misses 956 # number of demand (read+write) misses +system.cpu.l2cache.demand_misses_0 956 # number of demand (read+write) misses +system.cpu.l2cache.demand_misses_1 0 # number of demand (read+write) misses +system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.l2cache.demand_mshr_hits_0 0 # number of demand (read+write) MSHR hits +system.cpu.l2cache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits +system.cpu.l2cache.demand_mshr_miss_latency 956 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency_0 956 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.994797 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_miss_rate_0 0.994797 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_miss_rate_1 <err: div-0> # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 956 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_misses_0 956 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses +system.cpu.l2cache.fast_writes 0 # number of fast writes performed +system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.l2cache.mshr_cap_events_0 0 # number of times MSHR cap was activated +system.cpu.l2cache.mshr_cap_events_1 0 # number of times MSHR cap was activated +system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.l2cache.overall_accesses 961 # number of overall (read+write) accesses +system.cpu.l2cache.overall_accesses_0 961 # number of overall (read+write) accesses +system.cpu.l2cache.overall_accesses_1 0 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 2.059623 # average overall miss latency +system.cpu.l2cache.overall_avg_miss_latency_0 2.059623 # average overall miss latency +system.cpu.l2cache.overall_avg_miss_latency_1 <err: div-0> # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 1 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency_0 1 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_0 <err: div-0> # average overall mshr uncacheable latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_1 <err: div-0> # average overall mshr uncacheable latency +system.cpu.l2cache.overall_hits 5 # number of overall hits +system.cpu.l2cache.overall_hits_0 5 # number of overall hits +system.cpu.l2cache.overall_hits_1 0 # number of overall hits +system.cpu.l2cache.overall_miss_latency 1969 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency_0 1969 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency_1 0 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.994797 # miss rate for overall accesses +system.cpu.l2cache.overall_miss_rate_0 0.994797 # miss rate for overall accesses +system.cpu.l2cache.overall_miss_rate_1 <err: div-0> # miss rate for overall accesses +system.cpu.l2cache.overall_misses 956 # number of overall misses +system.cpu.l2cache.overall_misses_0 956 # number of overall misses +system.cpu.l2cache.overall_misses_1 0 # number of overall misses +system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.l2cache.overall_mshr_hits_0 0 # number of overall MSHR hits +system.cpu.l2cache.overall_mshr_hits_1 0 # number of overall MSHR hits +system.cpu.l2cache.overall_mshr_miss_latency 956 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency_0 956 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.994797 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_miss_rate_0 0.994797 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_miss_rate_1 <err: div-0> # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 956 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_misses_0 956 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_misses_1 0 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.l2cache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles +system.cpu.l2cache.overall_mshr_uncacheable_latency_1 0 # number of overall MSHR uncacheable cycles +system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.l2cache.overall_mshr_uncacheable_misses_0 0 # number of overall MSHR uncacheable misses +system.cpu.l2cache.overall_mshr_uncacheable_misses_1 0 # number of overall MSHR uncacheable misses +system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.l2cache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.l2cache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.l2cache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.l2cache.replacements 0 # number of replacements +system.cpu.l2cache.replacements_0 0 # number of replacements +system.cpu.l2cache.replacements_1 0 # number of replacements +system.cpu.l2cache.sampled_refs 956 # Sample count of references to valid blocks. +system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.l2cache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions +system.cpu.l2cache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions +system.cpu.l2cache.tagsinuse 558.812441 # Cycle average of tags in use +system.cpu.l2cache.total_refs 5 # Total number of references to valid blocks. +system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.l2cache.writebacks 0 # number of writebacks +system.cpu.l2cache.writebacks_0 0 # number of writebacks +system.cpu.l2cache.writebacks_1 0 # number of writebacks +system.cpu.numCycles 8440 # number of cpu cycles simulated +system.cpu.rename.RENAME:BlockCycles 345 # Number of cycles rename is blocking +system.cpu.rename.RENAME:CommittedMaps 8102 # Number of HB maps that are committed +system.cpu.rename.RENAME:IdleCycles 9958 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 698 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:RenameLookups 26874 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 21097 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 15772 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 3566 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 1395 # Number of cycles rename is squashing +system.cpu.rename.RENAME:UnblockCycles 766 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 7670 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 572 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:serializingInsts 48 # count of serializing insts renamed +system.cpu.rename.RENAME:skidInsts 1906 # count of insts added to the skid buffer +system.cpu.rename.RENAME:tempSerializingInsts 38 # count of temporary serializing insts renamed +system.cpu.workload0.PROG:num_syscalls 17 # Number of system calls +system.cpu.workload1.PROG:num_syscalls 17 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr new file mode 100644 index 000000000..48d711163 --- /dev/null +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr @@ -0,0 +1,22 @@ +warn: Entering event queue @ 0. Starting simulation... +warn: cycle 0: fault (page_table_fault) detected @ PC 0x000000 +warn: Increasing stack 0x11ff92000:0x11ff9b000 to 0x11ff90000:0x11ff9b000 because of access to 0x11ff91ff0 +warn: Increasing stack 0x11ff92000:0x11ff9b000 to 0x11ff90000:0x11ff9b000 because of access to 0x11ff91ff0 +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. +warn: Default fetch doesn't update it's state from a functional call. diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout new file mode 100644 index 000000000..2b27a0049 --- /dev/null +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout @@ -0,0 +1,14 @@ +Hello world! +Hello world! +M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Oct 10 2006 01:56:36 +M5 started Tue Oct 10 01:57:16 2006 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing +Exiting @ tick 8439 because target called exit() diff --git a/tests/quick/01.hello-2T-smt/test.py b/tests/quick/01.hello-2T-smt/test.py new file mode 100644 index 000000000..04ff8c2e6 --- /dev/null +++ b/tests/quick/01.hello-2T-smt/test.py @@ -0,0 +1,32 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Korey Sewell + +process1 = LiveProcess(cmd = 'hello', executable = binpath('hello')) +process2 = LiveProcess(cmd = 'hello', executable = binpath('hello')) + +root.system.cpu.workload = [process1, process2] diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini index c2bcb99aa..c45637b94 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini @@ -58,6 +58,7 @@ mem_mode=atomic pal=/dist/m5/system/binaries/ts_osfpal physmem=system.physmem readfile=tests/halt.sh +symbolfile= system_rev=1024 system_type=34 @@ -74,7 +75,7 @@ side_b=system.membus.port[0] type=AtomicSimpleCPU children=dtb itb clock=1 -cpu_id=-1 +cpu_id=0 defer_registration=false dtb=system.cpu0.dtb function_trace=false @@ -86,6 +87,7 @@ max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem profile=0 +progress_interval=0 simulate_stalls=false system=system width=1 @@ -104,7 +106,7 @@ size=48 type=AtomicSimpleCPU children=dtb itb clock=1 -cpu_id=-1 +cpu_id=1 defer_registration=false dtb=system.cpu1.dtb function_trace=false @@ -116,6 +118,7 @@ max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem profile=0 +progress_interval=0 simulate_stalls=false system=system width=1 @@ -175,12 +178,16 @@ cpu=system.cpu0 [system.iobus] type=Bus bus_id=0 +clock=2 +width=64 default=system.tsunami.pciconfig.pio port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio system.tsunami.fake_sm_chip.pio system.tsunami.fake_uart1.pio system.tsunami.fake_uart2.pio system.tsunami.fake_uart3.pio system.tsunami.fake_uart4.pio system.tsunami.fake_ppc.pio system.tsunami.fake_OROM.pio system.tsunami.fake_pnp_addr.pio system.tsunami.fake_pnp_write.pio system.tsunami.fake_pnp_read0.pio system.tsunami.fake_pnp_read1.pio system.tsunami.fake_pnp_read2.pio system.tsunami.fake_pnp_read3.pio system.tsunami.fake_pnp_read4.pio system.tsunami.fake_pnp_read5.pio system.tsunami.fake_pnp_read6.pio system.tsunami.fake_pnp_read7.pio system.tsunami.fake_ata0.pio system.tsunami.fake_ata1.pio system.tsunami.fb.pio system.tsunami.io.pio system.tsunami.uart.pio system.tsunami.console.pio system.tsunami.ide.pio system.tsunami.ethernet.pio system.tsunami.ethernet.config system.tsunami.ethernet.dma system.tsunami.ide.config system.tsunami.ide.dma [system.membus] type=Bus bus_id=1 +clock=2 +width=64 port=system.bridge.side_b system.physmem.port system.cpu0.icache_port system.cpu0.dcache_port system.cpu1.icache_port system.cpu1.dcache_port [system.physmem] @@ -584,6 +591,7 @@ pio=system.iobus.port[24] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out index 737ee6611..45cbbec9b 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out @@ -21,6 +21,7 @@ console=/dist/m5/system/binaries/console pal=/dist/m5/system/binaries/ts_osfpal boot_osflags=root=/dev/hda1 console=ttyS0 readfile=tests/halt.sh +symbolfile= init_param=0 system_type=34 system_rev=1024 @@ -28,6 +29,8 @@ system_rev=1024 [system.membus] type=Bus bus_id=1 +clock=2 +width=64 [system.bridge] type=Bridge @@ -86,11 +89,12 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=0 itb=system.cpu0.itb dtb=system.cpu0.dtb -cpu_id=-1 profile=0 clock=1 defer_registration=false @@ -113,11 +117,12 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=1 itb=system.cpu1.itb dtb=system.cpu1.dtb -cpu_id=-1 profile=0 clock=1 defer_registration=false @@ -488,10 +493,13 @@ disks=system.disk0 system.disk2 [system.iobus] type=Bus bus_id=0 +clock=2 +width=64 [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -535,6 +543,9 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + [pseudo_inst] quiesce=true statistics=true diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/console.system.sim_console b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/console.system.sim_console index c3c7b2676..27adebb82 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/console.system.sim_console +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/console.system.sim_console @@ -3,7 +3,7 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
memsize 8000000 pages 4000
First free page after ROM 0xFFFFFC0000018000
HWRPB 0xFFFFFC0000018000 l1pt 0xFFFFFC0000040000 l2pt 0xFFFFFC0000042000 l3pt_rpb 0xFFFFFC0000044000 l3pt_kernel 0xFFFFFC0000048000 l2reserv 0xFFFFFC0000046000 -
kstart = 0xFFFFFC0000310000, kend = 0xFFFFFC00008064E8, kentry = 0xFFFFFC0000310000, numCPUs = 0x2 +
kstart = 0xFFFFFC0000310000, kend = 0xFFFFFC0000855898, kentry = 0xFFFFFC0000310000, numCPUs = 0x2
CPU Clock at 2000 MHz IntrClockFrequency=1024
Booting with 2 processor(s)
KSP: 0x20043FE8 PTBR 0x20 @@ -16,29 +16,27 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
Bootstraping CPU 1 with sp=0xFFFFFC0000076000
unix_boot_mem ends at FFFFFC0000078000
k_argc = 0 -
jumping to kernel at 0xFFFFFC0000310000, (PCBB 0xFFFFFC0000018180 pfn 1028) -
CallbackFixup 0 18000, t7=FFFFFC0000700000 +
jumping to kernel at 0xFFFFFC0000310000, (PCBB 0xFFFFFC0000018180 pfn 1067) +
CallbackFixup 0 18000, t7=FFFFFC000070C000
Entering slaveloop for cpu 1 my_rpb=FFFFFC0000018400 -
Linux version 2.6.8.1 (binkertn@ziff.eecs.umich.edu) (gcc version 3.4.3) #36 SMP Mon May 2 19:50:53 EDT 2005 +
Linux version 2.6.13 (hsul@zed.eecs.umich.edu) (gcc version 3.4.3) #1 SMP Sun Oct 8 19:52:07 EDT 2006
Booting GENERIC on Tsunami variation DP264 using machine vector DP264 from SRM
Major Options: SMP LEGACY_START VERBOSE_MCHECK
Command line: root=/dev/hda1 console=ttyS0
memcluster 0, usage 1, start 0, end 392
memcluster 1, usage 0, start 392, end 16384 -
freeing pages 1030:16384 -
reserving pages 1030:1031 +
freeing pages 1069:16384 +
reserving pages 1069:1070
SMP: 2 CPUs probed -- cpu_present_mask = 3
Built 1 zonelists
Kernel command line: root=/dev/hda1 console=ttyS0 -
PID hash table entries: 1024 (order 10: 16384 bytes) +
PID hash table entries: 1024 (order: 10, 32768 bytes)
Using epoch = 1900
Console: colour dummy device 80x25
Dentry cache hash table entries: 32768 (order: 5, 262144 bytes)
Inode-cache hash table entries: 16384 (order: 4, 131072 bytes) -
Memory: 119072k/131072k available (3058k kernel code, 8680k reserved, 695k data, 480k init) -
Mount-cache hash table entries: 512 (order: 0, 8192 bytes) -
per-CPU timeslice cutoff: 374.49 usecs. -
task migration cache decay timeout: 0 msecs. +
Memory: 118784k/131072k available (3314k kernel code, 8952k reserved, 983k data, 224k init) +
Mount-cache hash table entries: 512
SMP starting up secondaries.
Slave CPU 1 console command START SlaveCmd: restart FFFFFC0000310020 FFFFFC0000310020 vptb FFFFFFFE00000000 my_rpb FFFFFC0000018400 my_rpb_phys 18400 @@ -53,16 +51,21 @@ SlaveCmd: restart FFFFFC0000310020 FFFFFC0000310020 vptb FFFFFFFE00000000 my_rpb
Initializing Cryptographic API
rtc: Standard PC (1900) epoch (1900) detected
Real Time Clock Driver v1.12 -
Serial: 8250/16550 driver $Revision: 1.90 $ 5 ports, IRQ sharing disabled +
Serial: 8250/16550 driver $Revision: 1.90 $ 1 ports, IRQ sharing disabled
ttyS0 at I/O 0x3f8 (irq = 4) is a 8250 +
io scheduler noop registered +
io scheduler anticipatory registered +
io scheduler deadline registered +
io scheduler cfq registered
loop: loaded (max 8 devices) -
Using anticipatory io scheduler
nbd: registered device at major 43 -
sinic.c: M5 Simple Integrated NIC driver
ns83820.c: National Semiconductor DP83820 10/100/1000 driver.
eth0: ns83820.c: 0x22c: 00000000, subsystem: 0000:0000
eth0: enabling optical transceiver -
eth0: ns83820 v0.20: DP83820 v1.3: 00:90:00:00:00:01 io=0x09000000 irq=30 f=sg +
eth0: using 64 bit addressing. +
eth0: ns83820 v0.22: DP83820 v1.3: 00:90:00:00:00:01 io=0x09000000 irq=30 f=h,sg +
tun: Universal TUN/TAP device driver, 1.6 +
tun: (C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>
Uniform Multi-Platform E-IDE driver Revision: 7.00alpha2
ide: Assuming 33MHz system bus speed for PIO modes; override with idebus=xx
PIIX4: IDE controller at PCI slot 0000:00:00.0 @@ -74,25 +77,24 @@ SlaveCmd: restart FFFFFC0000310020 FFFFFC0000310020 vptb FFFFFFFE00000000 my_rpb
hdb: M5 IDE Disk, ATA DISK drive
ide0 at 0x8410-0x8417,0x8422 on irq 31
hda: max request size: 128KiB -
hda: 163296 sectors (83 MB), CHS=162/16/63, UDMA(33) +
hda: 511056 sectors (261 MB), CHS=507/16/63, UDMA(33) +
hda: cache flushes not supported
hda: hda1
hdb: max request size: 128KiB
hdb: 4177920 sectors (2139 MB), CHS=4144/16/63, UDMA(33) +
hdb: cache flushes not supported
hdb: unknown partition table -
scsi0 : scsi_m5, version 1.73 [20040518], dev_size_mb=8, opts=0x0 -
Vendor: Linux Model: scsi_m5 Li Rev: 0004 -
Type: Direct-Access ANSI SCSI revision: 03 -
SCSI device sda: 16384 512-byte hdwr sectors (8 MB) -
SCSI device sda: drive cache: write back -
sda: unknown partition table -
Attached scsi disk sda at scsi0, channel 0, id 0, lun 0
mice: PS/2 mouse device common for all mice
NET: Registered protocol family 2 -
IP: routing cache hash table of 1024 buckets, 16Kbytes -
TCP: Hash tables configured (established 8192 bind 8192) -
ip_conntrack version 2.1 (512 buckets, 4096 max) - 440 bytes per conntrack +
IP route cache hash table entries: 4096 (order: 2, 32768 bytes) +
TCP established hash table entries: 16384 (order: 5, 262144 bytes) +
TCP bind hash table entries: 16384 (order: 5, 262144 bytes) +
TCP: Hash tables configured (established 16384 bind 16384) +
TCP reno registered +
ip_conntrack version 2.1 (512 buckets, 4096 max) - 296 bytes per conntrack
ip_tables: (C) 2000-2002 Netfilter core team
arp_tables: (C) 2002 David S. Miller +
TCP bic registered
Initializing IPsec netlink socket
NET: Registered protocol family 1
NET: Registered protocol family 17 @@ -101,11 +103,7 @@ SlaveCmd: restart FFFFFC0000310020 FFFFFC0000310020 vptb FFFFFFFE00000000 my_rpb
802.1Q VLAN Support v1.8 Ben Greear <greearb@candelatech.com>
All bugs added by David S. Miller <davem@redhat.com>
VFS: Mounted root (ext2 filesystem) readonly. -
Freeing unused kernel memory: 480k freed -
init started: BusyBox v1.00-rc2 (2004.11.18-16:22+0000) multi-call binary - -PTXdist-0.7.0 (2004-11-18T11:23:40-0500) - +
Freeing unused kernel memory: 224k freed +
init started: BusyBox v1.1.0 (2006.08.17-02:54+0000) multi-call binary mounting filesystems... -EXT2-fs warning: checktime reached, running e2fsck is recommended -
loading script... +loading script... diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt index c7715aeac..e76c1d683 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt @@ -1,239 +1,224 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 1382023 # Simulator instruction rate (inst/s) -host_mem_usage 194588 # Number of bytes of host memory used -host_seconds 45.78 # Real time elapsed on the host -host_tick_rate 77681401 # Simulator tick rate (ticks/s) +host_inst_rate 1270607 # Simulator instruction rate (inst/s) +host_mem_usage 197696 # Number of bytes of host memory used +host_seconds 51.09 # Real time elapsed on the host +host_tick_rate 72782461 # Simulator tick rate (ticks/s) sim_freq 2000000000 # Frequency of simulated ticks -sim_insts 63264995 # Number of instructions simulated -sim_seconds 1.778030 # Number of seconds simulated -sim_ticks 3556060806 # Number of ticks simulated -system.cpu0.dtb.accesses 1831687 # DTB accesses -system.cpu0.dtb.acv 360 # DTB access violations -system.cpu0.dtb.hits 12876975 # DTB hits -system.cpu0.dtb.misses 11050 # DTB misses -system.cpu0.dtb.read_accesses 495437 # DTB read accesses -system.cpu0.dtb.read_acv 219 # DTB read access violations -system.cpu0.dtb.read_hits 7121424 # DTB read hits -system.cpu0.dtb.read_misses 9036 # DTB read misses -system.cpu0.dtb.write_accesses 1336250 # DTB write accesses -system.cpu0.dtb.write_acv 141 # DTB write access violations -system.cpu0.dtb.write_hits 5755551 # DTB write hits -system.cpu0.dtb.write_misses 2014 # DTB write misses -system.cpu0.idle_fraction 0.984569 # Percentage of idle cycles -system.cpu0.itb.accesses 2328068 # ITB accesses -system.cpu0.itb.acv 216 # ITB acv -system.cpu0.itb.hits 2323500 # ITB hits -system.cpu0.itb.misses 4568 # ITB misses -system.cpu0.kern.callpal 179206 # number of callpals executed +sim_insts 64909600 # Number of instructions simulated +sim_seconds 1.859078 # Number of seconds simulated +sim_ticks 3718155709 # Number of ticks simulated +system.cpu0.dtb.accesses 544556 # DTB accesses +system.cpu0.dtb.acv 335 # DTB access violations +system.cpu0.dtb.hits 14841931 # DTB hits +system.cpu0.dtb.misses 7356 # DTB misses +system.cpu0.dtb.read_accesses 377530 # DTB read accesses +system.cpu0.dtb.read_acv 210 # DTB read access violations +system.cpu0.dtb.read_hits 8970576 # DTB read hits +system.cpu0.dtb.read_misses 6581 # DTB read misses +system.cpu0.dtb.write_accesses 167026 # DTB write accesses +system.cpu0.dtb.write_acv 125 # DTB write access violations +system.cpu0.dtb.write_hits 5871355 # DTB write hits +system.cpu0.dtb.write_misses 775 # DTB write misses +system.cpu0.idle_fraction 0.984943 # Percentage of idle cycles +system.cpu0.itb.accesses 1436270 # ITB accesses +system.cpu0.itb.acv 184 # ITB acv +system.cpu0.itb.hits 1432801 # ITB hits +system.cpu0.itb.misses 3469 # ITB misses +system.cpu0.kern.callpal 182754 # number of callpals executed system.cpu0.kern.callpal_cserve 1 0.00% 0.00% # number of callpals executed -system.cpu0.kern.callpal_wripir 91 0.05% 0.05% # number of callpals executed -system.cpu0.kern.callpal_wrmces 1 0.00% 0.05% # number of callpals executed -system.cpu0.kern.callpal_wrfen 1 0.00% 0.05% # number of callpals executed -system.cpu0.kern.callpal_wrvptptr 1 0.00% 0.05% # number of callpals executed -system.cpu0.kern.callpal_swpctx 1375 0.77% 0.82% # number of callpals executed -system.cpu0.kern.callpal_tbi 20 0.01% 0.83% # number of callpals executed -system.cpu0.kern.callpal_wrent 7 0.00% 0.84% # number of callpals executed -system.cpu0.kern.callpal_swpipl 168681 94.13% 94.96% # number of callpals executed -system.cpu0.kern.callpal_rdps 4713 2.63% 97.59% # number of callpals executed -system.cpu0.kern.callpal_wrkgp 1 0.00% 97.59% # number of callpals executed -system.cpu0.kern.callpal_wrusp 4 0.00% 97.59% # number of callpals executed -system.cpu0.kern.callpal_rdusp 11 0.01% 97.60% # number of callpals executed -system.cpu0.kern.callpal_whami 2 0.00% 97.60% # number of callpals executed -system.cpu0.kern.callpal_rti 3639 2.03% 99.63% # number of callpals executed -system.cpu0.kern.callpal_callsys 461 0.26% 99.89% # number of callpals executed -system.cpu0.kern.callpal_imb 197 0.11% 100.00% # number of callpals executed +system.cpu0.kern.callpal_wripir 115 0.06% 0.06% # number of callpals executed +system.cpu0.kern.callpal_wrmces 1 0.00% 0.06% # number of callpals executed +system.cpu0.kern.callpal_wrfen 1 0.00% 0.06% # number of callpals executed +system.cpu0.kern.callpal_wrvptptr 1 0.00% 0.07% # number of callpals executed +system.cpu0.kern.callpal_swpctx 3791 2.07% 2.14% # number of callpals executed +system.cpu0.kern.callpal_tbi 49 0.03% 2.17% # number of callpals executed +system.cpu0.kern.callpal_wrent 7 0.00% 2.17% # number of callpals executed +system.cpu0.kern.callpal_swpipl 167832 91.83% 94.01% # number of callpals executed +system.cpu0.kern.callpal_rdps 5780 3.16% 97.17% # number of callpals executed +system.cpu0.kern.callpal_wrkgp 1 0.00% 97.17% # number of callpals executed +system.cpu0.kern.callpal_wrusp 2 0.00% 97.17% # number of callpals executed +system.cpu0.kern.callpal_rdusp 9 0.00% 97.17% # number of callpals executed +system.cpu0.kern.callpal_whami 2 0.00% 97.18% # number of callpals executed +system.cpu0.kern.callpal_rti 4696 2.57% 99.75% # number of callpals executed +system.cpu0.kern.callpal_callsys 344 0.19% 99.93% # number of callpals executed +system.cpu0.kern.callpal_imb 122 0.07% 100.00% # number of callpals executed system.cpu0.kern.inst.arm 0 # number of arm instructions executed -system.cpu0.kern.inst.hwrei 197512 # number of hwrei instructions executed +system.cpu0.kern.inst.hwrei 196249 # number of hwrei instructions executed system.cpu0.kern.inst.ivlb 0 # number of ivlb instructions executed system.cpu0.kern.inst.ivle 0 # number of ivle instructions executed -system.cpu0.kern.inst.quiesce 1917 # number of quiesce instructions executed -system.cpu0.kern.ipl_count 174431 # number of times we switched to this ipl -system.cpu0.kern.ipl_count_0 73383 42.07% 42.07% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_21 286 0.16% 42.23% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_22 5540 3.18% 45.41% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_30 8 0.00% 45.41% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_31 95214 54.59% 100.00% # number of times we switched to this ipl -system.cpu0.kern.ipl_good 156222 # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_0 73336 46.94% 46.94% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_21 286 0.18% 47.13% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_22 5540 3.55% 50.67% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_30 8 0.01% 50.68% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_31 77052 49.32% 100.00% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_ticks 3555570558 # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_0 3533670973 99.38% 99.38% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_21 45785 0.00% 99.39% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_22 1008642 0.03% 99.41% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_30 1988 0.00% 99.41% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_31 20843170 0.59% 100.00% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_used 0.895609 # fraction of swpipl calls that actually changed the ipl -system.cpu0.kern.ipl_used_0 0.999360 # fraction of swpipl calls that actually changed the ipl +system.cpu0.kern.inst.quiesce 6184 # number of quiesce instructions executed +system.cpu0.kern.ipl_count 174678 # number of times we switched to this ipl +system.cpu0.kern.ipl_count_0 70736 40.50% 40.50% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_21 245 0.14% 40.64% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_22 1896 1.09% 41.72% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_30 8 0.00% 41.73% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_31 101793 58.27% 100.00% # number of times we switched to this ipl +system.cpu0.kern.ipl_good 140889 # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_0 69374 49.24% 49.24% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_21 245 0.17% 49.41% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_22 1896 1.35% 50.76% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_30 8 0.01% 50.77% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_31 69366 49.23% 100.00% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_ticks 3718155294 # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_0 3683661066 99.07% 99.07% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_21 40474 0.00% 99.07% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_22 163056 0.00% 99.08% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_30 2026 0.00% 99.08% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_31 34288672 0.92% 100.00% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_used 0.806564 # fraction of swpipl calls that actually changed the ipl +system.cpu0.kern.ipl_used_0 0.980745 # fraction of swpipl calls that actually changed the ipl system.cpu0.kern.ipl_used_21 1 # fraction of swpipl calls that actually changed the ipl system.cpu0.kern.ipl_used_22 1 # fraction of swpipl calls that actually changed the ipl system.cpu0.kern.ipl_used_30 1 # fraction of swpipl calls that actually changed the ipl -system.cpu0.kern.ipl_used_31 0.809251 # fraction of swpipl calls that actually changed the ipl -system.cpu0.kern.mode_good_kernel 1633 -system.cpu0.kern.mode_good_user 1486 -system.cpu0.kern.mode_good_idle 147 -system.cpu0.kern.mode_switch_kernel 2898 # number of protection mode switches -system.cpu0.kern.mode_switch_user 1486 # number of protection mode switches -system.cpu0.kern.mode_switch_idle 2090 # number of protection mode switches -system.cpu0.kern.mode_switch_good 0.504479 # fraction of useful protection mode switches -system.cpu0.kern.mode_switch_good_kernel 0.563492 # fraction of useful protection mode switches +system.cpu0.kern.ipl_used_31 0.681442 # fraction of swpipl calls that actually changed the ipl +system.cpu0.kern.mode_good_kernel 1192 +system.cpu0.kern.mode_good_user 1193 +system.cpu0.kern.mode_good_idle 0 +system.cpu0.kern.mode_switch_kernel 7143 # number of protection mode switches +system.cpu0.kern.mode_switch_user 1193 # number of protection mode switches +system.cpu0.kern.mode_switch_idle 0 # number of protection mode switches +system.cpu0.kern.mode_switch_good 0.286108 # fraction of useful protection mode switches +system.cpu0.kern.mode_switch_good_kernel 0.166877 # fraction of useful protection mode switches system.cpu0.kern.mode_switch_good_user 1 # fraction of useful protection mode switches -system.cpu0.kern.mode_switch_good_idle 0.070335 # fraction of useful protection mode switches -system.cpu0.kern.mode_ticks_kernel 29671488 0.83% 0.83% # number of ticks spent at the given mode -system.cpu0.kern.mode_ticks_user 2605758 0.07% 0.91% # number of ticks spent at the given mode -system.cpu0.kern.mode_ticks_idle 3523245106 99.09% 100.00% # number of ticks spent at the given mode -system.cpu0.kern.swap_context 1376 # number of times the context was actually changed -system.cpu0.kern.syscall 312 # number of syscalls executed -system.cpu0.kern.syscall_fork 9 2.88% 2.88% # number of syscalls executed -system.cpu0.kern.syscall_read 20 6.41% 9.29% # number of syscalls executed -system.cpu0.kern.syscall_write 6 1.92% 11.22% # number of syscalls executed -system.cpu0.kern.syscall_close 36 11.54% 22.76% # number of syscalls executed -system.cpu0.kern.syscall_chdir 1 0.32% 23.08% # number of syscalls executed -system.cpu0.kern.syscall_chmod 1 0.32% 23.40% # number of syscalls executed -system.cpu0.kern.syscall_obreak 26 8.33% 31.73% # number of syscalls executed -system.cpu0.kern.syscall_lseek 9 2.88% 34.62% # number of syscalls executed -system.cpu0.kern.syscall_getpid 8 2.56% 37.18% # number of syscalls executed -system.cpu0.kern.syscall_setuid 2 0.64% 37.82% # number of syscalls executed -system.cpu0.kern.syscall_getuid 4 1.28% 39.10% # number of syscalls executed -system.cpu0.kern.syscall_access 4 1.28% 40.38% # number of syscalls executed -system.cpu0.kern.syscall_dup 4 1.28% 41.67% # number of syscalls executed -system.cpu0.kern.syscall_open 40 12.82% 54.49% # number of syscalls executed -system.cpu0.kern.syscall_getgid 4 1.28% 55.77% # number of syscalls executed -system.cpu0.kern.syscall_sigprocmask 12 3.85% 59.62% # number of syscalls executed -system.cpu0.kern.syscall_ioctl 13 4.17% 63.78% # number of syscalls executed -system.cpu0.kern.syscall_readlink 1 0.32% 64.10% # number of syscalls executed -system.cpu0.kern.syscall_execve 7 2.24% 66.35% # number of syscalls executed -system.cpu0.kern.syscall_pre_F64_stat 22 7.05% 73.40% # number of syscalls executed -system.cpu0.kern.syscall_pre_F64_lstat 1 0.32% 73.72% # number of syscalls executed -system.cpu0.kern.syscall_mmap 28 8.97% 82.69% # number of syscalls executed -system.cpu0.kern.syscall_munmap 4 1.28% 83.97% # number of syscalls executed -system.cpu0.kern.syscall_mprotect 7 2.24% 86.22% # number of syscalls executed -system.cpu0.kern.syscall_gethostname 1 0.32% 86.54% # number of syscalls executed -system.cpu0.kern.syscall_dup2 3 0.96% 87.50% # number of syscalls executed -system.cpu0.kern.syscall_pre_F64_fstat 15 4.81% 92.31% # number of syscalls executed -system.cpu0.kern.syscall_fcntl 11 3.53% 95.83% # number of syscalls executed -system.cpu0.kern.syscall_socket 3 0.96% 96.79% # number of syscalls executed -system.cpu0.kern.syscall_connect 3 0.96% 97.76% # number of syscalls executed -system.cpu0.kern.syscall_setgid 2 0.64% 98.40% # number of syscalls executed -system.cpu0.kern.syscall_getrlimit 2 0.64% 99.04% # number of syscalls executed -system.cpu0.kern.syscall_setsid 3 0.96% 100.00% # number of syscalls executed -system.cpu0.not_idle_fraction 0.015431 # Percentage of non-idle cycles -system.cpu0.numCycles 54873632 # number of cpu cycles simulated -system.cpu0.num_insts 54868848 # Number of instructions executed -system.cpu0.num_refs 12918621 # Number of memory references -system.cpu1.dtb.accesses 524398 # DTB accesses -system.cpu1.dtb.acv 60 # DTB access violations -system.cpu1.dtb.hits 2058922 # DTB hits -system.cpu1.dtb.misses 5263 # DTB misses -system.cpu1.dtb.read_accesses 337746 # DTB read accesses -system.cpu1.dtb.read_acv 23 # DTB read access violations -system.cpu1.dtb.read_hits 1301369 # DTB read hits -system.cpu1.dtb.read_misses 4766 # DTB read misses -system.cpu1.dtb.write_accesses 186652 # DTB write accesses -system.cpu1.dtb.write_acv 37 # DTB write access violations -system.cpu1.dtb.write_hits 757553 # DTB write hits -system.cpu1.dtb.write_misses 497 # DTB write misses -system.cpu1.idle_fraction 0.997638 # Percentage of idle cycles -system.cpu1.itb.accesses 1711917 # ITB accesses -system.cpu1.itb.acv 23 # ITB acv -system.cpu1.itb.hits 1709682 # ITB hits -system.cpu1.itb.misses 2235 # ITB misses -system.cpu1.kern.callpal 25990 # number of callpals executed +system.cpu0.kern.mode_switch_good_idle <err: div-0> # fraction of useful protection mode switches +system.cpu0.kern.mode_ticks_kernel 3716512331 99.96% 99.96% # number of ticks spent at the given mode +system.cpu0.kern.mode_ticks_user 1642961 0.04% 100.00% # number of ticks spent at the given mode +system.cpu0.kern.mode_ticks_idle 0 0.00% 100.00% # number of ticks spent at the given mode +system.cpu0.kern.swap_context 3792 # number of times the context was actually changed +system.cpu0.kern.syscall 199 # number of syscalls executed +system.cpu0.kern.syscall_fork 8 4.02% 4.02% # number of syscalls executed +system.cpu0.kern.syscall_read 17 8.54% 12.56% # number of syscalls executed +system.cpu0.kern.syscall_write 4 2.01% 14.57% # number of syscalls executed +system.cpu0.kern.syscall_close 29 14.57% 29.15% # number of syscalls executed +system.cpu0.kern.syscall_chdir 1 0.50% 29.65% # number of syscalls executed +system.cpu0.kern.syscall_obreak 4 2.01% 31.66% # number of syscalls executed +system.cpu0.kern.syscall_lseek 10 5.03% 36.68% # number of syscalls executed +system.cpu0.kern.syscall_getpid 6 3.02% 39.70% # number of syscalls executed +system.cpu0.kern.syscall_setuid 1 0.50% 40.20% # number of syscalls executed +system.cpu0.kern.syscall_getuid 3 1.51% 41.71% # number of syscalls executed +system.cpu0.kern.syscall_access 6 3.02% 44.72% # number of syscalls executed +system.cpu0.kern.syscall_dup 2 1.01% 45.73% # number of syscalls executed +system.cpu0.kern.syscall_open 31 15.58% 61.31% # number of syscalls executed +system.cpu0.kern.syscall_getgid 3 1.51% 62.81% # number of syscalls executed +system.cpu0.kern.syscall_sigprocmask 10 5.03% 67.84% # number of syscalls executed +system.cpu0.kern.syscall_ioctl 9 4.52% 72.36% # number of syscalls executed +system.cpu0.kern.syscall_execve 6 3.02% 75.38% # number of syscalls executed +system.cpu0.kern.syscall_mmap 20 10.05% 85.43% # number of syscalls executed +system.cpu0.kern.syscall_munmap 3 1.51% 86.93% # number of syscalls executed +system.cpu0.kern.syscall_mprotect 5 2.51% 89.45% # number of syscalls executed +system.cpu0.kern.syscall_gethostname 1 0.50% 89.95% # number of syscalls executed +system.cpu0.kern.syscall_dup2 3 1.51% 91.46% # number of syscalls executed +system.cpu0.kern.syscall_fcntl 8 4.02% 95.48% # number of syscalls executed +system.cpu0.kern.syscall_socket 2 1.01% 96.48% # number of syscalls executed +system.cpu0.kern.syscall_connect 2 1.01% 97.49% # number of syscalls executed +system.cpu0.kern.syscall_setgid 1 0.50% 97.99% # number of syscalls executed +system.cpu0.kern.syscall_getrlimit 2 1.01% 98.99% # number of syscalls executed +system.cpu0.kern.syscall_setsid 2 1.01% 100.00% # number of syscalls executed +system.cpu0.not_idle_fraction 0.015057 # Percentage of non-idle cycles +system.cpu0.numCycles 55984201 # number of cpu cycles simulated +system.cpu0.num_insts 55980548 # Number of instructions executed +system.cpu0.num_refs 15081320 # Number of memory references +system.cpu1.dtb.accesses 761000 # DTB accesses +system.cpu1.dtb.acv 32 # DTB access violations +system.cpu1.dtb.hits 2653187 # DTB hits +system.cpu1.dtb.misses 4173 # DTB misses +system.cpu1.dtb.read_accesses 523552 # DTB read accesses +system.cpu1.dtb.read_acv 0 # DTB read access violations +system.cpu1.dtb.read_hits 1675663 # DTB read hits +system.cpu1.dtb.read_misses 3798 # DTB read misses +system.cpu1.dtb.write_accesses 237448 # DTB write accesses +system.cpu1.dtb.write_acv 32 # DTB write access violations +system.cpu1.dtb.write_hits 977524 # DTB write hits +system.cpu1.dtb.write_misses 375 # DTB write misses +system.cpu1.idle_fraction 0.997598 # Percentage of idle cycles +system.cpu1.itb.accesses 1845187 # ITB accesses +system.cpu1.itb.acv 0 # ITB acv +system.cpu1.itb.hits 1843600 # ITB hits +system.cpu1.itb.misses 1587 # ITB misses +system.cpu1.kern.callpal 34405 # number of callpals executed system.cpu1.kern.callpal_cserve 1 0.00% 0.00% # number of callpals executed -system.cpu1.kern.callpal_wripir 8 0.03% 0.03% # number of callpals executed -system.cpu1.kern.callpal_wrmces 1 0.00% 0.04% # number of callpals executed -system.cpu1.kern.callpal_wrfen 1 0.00% 0.04% # number of callpals executed -system.cpu1.kern.callpal_swpctx 554 2.13% 2.17% # number of callpals executed -system.cpu1.kern.callpal_tbi 7 0.03% 2.20% # number of callpals executed -system.cpu1.kern.callpal_wrent 7 0.03% 2.23% # number of callpals executed -system.cpu1.kern.callpal_swpipl 22366 86.06% 88.28% # number of callpals executed -system.cpu1.kern.callpal_rdps 98 0.38% 88.66% # number of callpals executed -system.cpu1.kern.callpal_wrkgp 1 0.00% 88.66% # number of callpals executed -system.cpu1.kern.callpal_wrusp 4 0.02% 88.68% # number of callpals executed -system.cpu1.kern.callpal_rdusp 1 0.00% 88.68% # number of callpals executed -system.cpu1.kern.callpal_whami 3 0.01% 88.70% # number of callpals executed -system.cpu1.kern.callpal_rti 2613 10.05% 98.75% # number of callpals executed -system.cpu1.kern.callpal_callsys 208 0.80% 99.55% # number of callpals executed -system.cpu1.kern.callpal_imb 116 0.45% 100.00% # number of callpals executed +system.cpu1.kern.callpal_wripir 8 0.02% 0.03% # number of callpals executed +system.cpu1.kern.callpal_wrmces 1 0.00% 0.03% # number of callpals executed +system.cpu1.kern.callpal_wrfen 1 0.00% 0.03% # number of callpals executed +system.cpu1.kern.callpal_swpctx 468 1.36% 1.39% # number of callpals executed +system.cpu1.kern.callpal_tbi 5 0.01% 1.41% # number of callpals executed +system.cpu1.kern.callpal_wrent 7 0.02% 1.43% # number of callpals executed +system.cpu1.kern.callpal_swpipl 28030 81.47% 82.90% # number of callpals executed +system.cpu1.kern.callpal_rdps 3042 8.84% 91.74% # number of callpals executed +system.cpu1.kern.callpal_wrkgp 1 0.00% 91.74% # number of callpals executed +system.cpu1.kern.callpal_wrusp 5 0.01% 91.76% # number of callpals executed +system.cpu1.kern.callpal_whami 3 0.01% 91.77% # number of callpals executed +system.cpu1.kern.callpal_rti 2586 7.52% 99.28% # number of callpals executed +system.cpu1.kern.callpal_callsys 187 0.54% 99.83% # number of callpals executed +system.cpu1.kern.callpal_imb 59 0.17% 100.00% # number of callpals executed system.cpu1.kern.callpal_rdunique 1 0.00% 100.00% # number of callpals executed system.cpu1.kern.inst.arm 0 # number of arm instructions executed -system.cpu1.kern.inst.hwrei 35475 # number of hwrei instructions executed +system.cpu1.kern.inst.hwrei 42209 # number of hwrei instructions executed system.cpu1.kern.inst.ivlb 0 # number of ivlb instructions executed system.cpu1.kern.inst.ivle 0 # number of ivle instructions executed -system.cpu1.kern.inst.quiesce 1946 # number of quiesce instructions executed -system.cpu1.kern.ipl_count 26882 # number of times we switched to this ipl -system.cpu1.kern.ipl_count_0 9636 35.85% 35.85% # number of times we switched to this ipl -system.cpu1.kern.ipl_count_22 5504 20.47% 56.32% # number of times we switched to this ipl -system.cpu1.kern.ipl_count_30 91 0.34% 56.66% # number of times we switched to this ipl -system.cpu1.kern.ipl_count_31 11651 43.34% 100.00% # number of times we switched to this ipl -system.cpu1.kern.ipl_good 26602 # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_0 9607 36.11% 36.11% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_22 5504 20.69% 56.80% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_30 91 0.34% 57.15% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_31 11400 42.85% 100.00% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_ticks 3556060349 # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_0 3533823708 99.37% 99.37% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_22 1040434 0.03% 99.40% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_30 23860 0.00% 99.40% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_31 21172347 0.60% 100.00% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_used 0.989584 # fraction of swpipl calls that actually changed the ipl -system.cpu1.kern.ipl_used_0 0.996990 # fraction of swpipl calls that actually changed the ipl +system.cpu1.kern.inst.quiesce 2146 # number of quiesce instructions executed +system.cpu1.kern.ipl_count 32627 # number of times we switched to this ipl +system.cpu1.kern.ipl_count_0 11165 34.22% 34.22% # number of times we switched to this ipl +system.cpu1.kern.ipl_count_22 1895 5.81% 40.03% # number of times we switched to this ipl +system.cpu1.kern.ipl_count_30 115 0.35% 40.38% # number of times we switched to this ipl +system.cpu1.kern.ipl_count_31 19452 59.62% 100.00% # number of times we switched to this ipl +system.cpu1.kern.ipl_good 24195 # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_0 11150 46.08% 46.08% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_22 1895 7.83% 53.92% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_30 115 0.48% 54.39% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_31 11035 45.61% 100.00% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_ticks 3717733449 # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_0 3695802393 99.41% 99.41% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_22 162970 0.00% 99.41% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_30 29122 0.00% 99.42% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_31 21738964 0.58% 100.00% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_used 0.741564 # fraction of swpipl calls that actually changed the ipl +system.cpu1.kern.ipl_used_0 0.998657 # fraction of swpipl calls that actually changed the ipl system.cpu1.kern.ipl_used_22 1 # fraction of swpipl calls that actually changed the ipl system.cpu1.kern.ipl_used_30 1 # fraction of swpipl calls that actually changed the ipl -system.cpu1.kern.ipl_used_31 0.978457 # fraction of swpipl calls that actually changed the ipl -system.cpu1.kern.mode_good_kernel 691 -system.cpu1.kern.mode_good_user 692 -system.cpu1.kern.mode_good_idle 0 -system.cpu1.kern.mode_switch_kernel 3163 # number of protection mode switches -system.cpu1.kern.mode_switch_user 692 # number of protection mode switches -system.cpu1.kern.mode_switch_idle 0 # number of protection mode switches -system.cpu1.kern.mode_switch_good 0.358755 # fraction of useful protection mode switches -system.cpu1.kern.mode_switch_good_kernel 0.218463 # fraction of useful protection mode switches +system.cpu1.kern.ipl_used_31 0.567294 # fraction of swpipl calls that actually changed the ipl +system.cpu1.kern.mode_good_kernel 602 +system.cpu1.kern.mode_good_user 563 +system.cpu1.kern.mode_good_idle 39 +system.cpu1.kern.mode_switch_kernel 1011 # number of protection mode switches +system.cpu1.kern.mode_switch_user 563 # number of protection mode switches +system.cpu1.kern.mode_switch_idle 2045 # number of protection mode switches +system.cpu1.kern.mode_switch_good 0.332689 # fraction of useful protection mode switches +system.cpu1.kern.mode_switch_good_kernel 0.595450 # fraction of useful protection mode switches system.cpu1.kern.mode_switch_good_user 1 # fraction of useful protection mode switches -system.cpu1.kern.mode_switch_good_idle <err: div-0> # fraction of useful protection mode switches -system.cpu1.kern.mode_ticks_kernel 3554209770 99.95% 99.95% # number of ticks spent at the given mode -system.cpu1.kern.mode_ticks_user 1850577 0.05% 100.00% # number of ticks spent at the given mode -system.cpu1.kern.mode_ticks_idle 0 0.00% 100.00% # number of ticks spent at the given mode -system.cpu1.kern.swap_context 555 # number of times the context was actually changed -system.cpu1.kern.syscall 163 # number of syscalls executed -system.cpu1.kern.syscall_fork 1 0.61% 0.61% # number of syscalls executed -system.cpu1.kern.syscall_read 13 7.98% 8.59% # number of syscalls executed -system.cpu1.kern.syscall_write 1 0.61% 9.20% # number of syscalls executed -system.cpu1.kern.syscall_close 13 7.98% 17.18% # number of syscalls executed -system.cpu1.kern.syscall_obreak 18 11.04% 28.22% # number of syscalls executed -system.cpu1.kern.syscall_lseek 4 2.45% 30.67% # number of syscalls executed -system.cpu1.kern.syscall_getpid 2 1.23% 31.90% # number of syscalls executed -system.cpu1.kern.syscall_setuid 2 1.23% 33.13% # number of syscalls executed -system.cpu1.kern.syscall_getuid 4 2.45% 35.58% # number of syscalls executed -system.cpu1.kern.syscall_open 28 17.18% 52.76% # number of syscalls executed -system.cpu1.kern.syscall_getgid 4 2.45% 55.21% # number of syscalls executed -system.cpu1.kern.syscall_sigprocmask 2 1.23% 56.44% # number of syscalls executed -system.cpu1.kern.syscall_ioctl 3 1.84% 58.28% # number of syscalls executed -system.cpu1.kern.syscall_readlink 1 0.61% 58.90% # number of syscalls executed -system.cpu1.kern.syscall_execve 1 0.61% 59.51% # number of syscalls executed -system.cpu1.kern.syscall_pre_F64_stat 9 5.52% 65.03% # number of syscalls executed -system.cpu1.kern.syscall_mmap 27 16.56% 81.60% # number of syscalls executed -system.cpu1.kern.syscall_munmap 2 1.23% 82.82% # number of syscalls executed -system.cpu1.kern.syscall_mprotect 7 4.29% 87.12% # number of syscalls executed -system.cpu1.kern.syscall_gethostname 1 0.61% 87.73% # number of syscalls executed -system.cpu1.kern.syscall_dup2 1 0.61% 88.34% # number of syscalls executed -system.cpu1.kern.syscall_pre_F64_fstat 13 7.98% 96.32% # number of syscalls executed -system.cpu1.kern.syscall_fcntl 3 1.84% 98.16% # number of syscalls executed -system.cpu1.kern.syscall_setgid 2 1.23% 99.39% # number of syscalls executed -system.cpu1.kern.syscall_getrlimit 1 0.61% 100.00% # number of syscalls executed -system.cpu1.not_idle_fraction 0.002362 # Percentage of non-idle cycles -system.cpu1.numCycles 8398405 # number of cpu cycles simulated -system.cpu1.num_insts 8396147 # Number of instructions executed -system.cpu1.num_refs 2073144 # Number of memory references +system.cpu1.kern.mode_switch_good_idle 0.019071 # fraction of useful protection mode switches +system.cpu1.kern.mode_ticks_kernel 4713507 0.13% 0.13% # number of ticks spent at the given mode +system.cpu1.kern.mode_ticks_user 1950903 0.05% 0.18% # number of ticks spent at the given mode +system.cpu1.kern.mode_ticks_idle 3710606044 99.82% 100.00% # number of ticks spent at the given mode +system.cpu1.kern.swap_context 469 # number of times the context was actually changed +system.cpu1.kern.syscall 130 # number of syscalls executed +system.cpu1.kern.syscall_read 13 10.00% 10.00% # number of syscalls executed +system.cpu1.kern.syscall_close 14 10.77% 20.77% # number of syscalls executed +system.cpu1.kern.syscall_chmod 1 0.77% 21.54% # number of syscalls executed +system.cpu1.kern.syscall_obreak 11 8.46% 30.00% # number of syscalls executed +system.cpu1.kern.syscall_setuid 3 2.31% 32.31% # number of syscalls executed +system.cpu1.kern.syscall_getuid 3 2.31% 34.62% # number of syscalls executed +system.cpu1.kern.syscall_access 5 3.85% 38.46% # number of syscalls executed +system.cpu1.kern.syscall_open 24 18.46% 56.92% # number of syscalls executed +system.cpu1.kern.syscall_getgid 3 2.31% 59.23% # number of syscalls executed +system.cpu1.kern.syscall_ioctl 1 0.77% 60.00% # number of syscalls executed +system.cpu1.kern.syscall_readlink 1 0.77% 60.77% # number of syscalls executed +system.cpu1.kern.syscall_execve 1 0.77% 61.54% # number of syscalls executed +system.cpu1.kern.syscall_mmap 34 26.15% 87.69% # number of syscalls executed +system.cpu1.kern.syscall_mprotect 11 8.46% 96.15% # number of syscalls executed +system.cpu1.kern.syscall_fcntl 2 1.54% 97.69% # number of syscalls executed +system.cpu1.kern.syscall_setgid 3 2.31% 100.00% # number of syscalls executed +system.cpu1.not_idle_fraction 0.002402 # Percentage of non-idle cycles +system.cpu1.numCycles 8930639 # number of cpu cycles simulated +system.cpu1.num_insts 8929052 # Number of instructions executed +system.cpu1.num_refs 2665347 # Number of memory references system.disk0.dma_read_bytes 1024 # Number of bytes transfered via DMA reads (not PRD). system.disk0.dma_read_full_pages 0 # Number of full page size DMA reads (not PRD). system.disk0.dma_read_txs 1 # Number of DMA read transactions (not PRD). -system.disk0.dma_write_bytes 2521088 # Number of bytes transfered via DMA writes. -system.disk0.dma_write_full_pages 285 # Number of full page size DMA writes. -system.disk0.dma_write_txs 375 # Number of DMA write transactions. +system.disk0.dma_write_bytes 2702336 # Number of bytes transfered via DMA writes. +system.disk0.dma_write_full_pages 302 # Number of full page size DMA writes. +system.disk0.dma_write_txs 408 # Number of DMA write transactions. system.disk2.dma_read_bytes 0 # Number of bytes transfered via DMA reads (not PRD). system.disk2.dma_read_full_pages 0 # Number of full page size DMA reads (not PRD). system.disk2.dma_read_txs 0 # Number of DMA read transactions (not PRD). diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr index fe3ad68ab..14aa2c9ff 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr @@ -3,4 +3,4 @@ Listening for console connection on port 3456 0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 0: system.remote_gdb.listener: listening for remote gdb #1 on port 7001 warn: Entering event queue @ 0. Starting simulation... -warn: 195722: Trying to launch CPU number 1! +warn: 195723: Trying to launch CPU number 1! diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout index c8330eef2..18365db1c 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout @@ -5,8 +5,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:32:34 -M5 started Tue Sep 5 15:43:12 2006 -M5 executing on zizzer.eecs.umich.edu +M5 compiled Oct 8 2006 21:57:24 +M5 started Sun Oct 8 21:58:13 2006 +M5 executing on zed.eecs.umich.edu command line: build/ALPHA_FS/m5.opt -d build/ALPHA_FS/tests/opt/quick/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual -Exiting @ tick 3556060806 because m5_exit instruction encountered +Exiting @ tick 3718155709 because m5_exit instruction encountered diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini index c017495f6..11b108837 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini @@ -58,6 +58,7 @@ mem_mode=atomic pal=/dist/m5/system/binaries/ts_osfpal physmem=system.physmem readfile=tests/halt.sh +symbolfile= system_rev=1024 system_type=34 @@ -74,7 +75,7 @@ side_b=system.membus.port[0] type=AtomicSimpleCPU children=dtb itb clock=1 -cpu_id=-1 +cpu_id=0 defer_registration=false dtb=system.cpu.dtb function_trace=false @@ -86,6 +87,7 @@ max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem profile=0 +progress_interval=0 simulate_stalls=false system=system width=1 @@ -145,12 +147,16 @@ cpu=system.cpu [system.iobus] type=Bus bus_id=0 +clock=2 +width=64 default=system.tsunami.pciconfig.pio port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio system.tsunami.fake_sm_chip.pio system.tsunami.fake_uart1.pio system.tsunami.fake_uart2.pio system.tsunami.fake_uart3.pio system.tsunami.fake_uart4.pio system.tsunami.fake_ppc.pio system.tsunami.fake_OROM.pio system.tsunami.fake_pnp_addr.pio system.tsunami.fake_pnp_write.pio system.tsunami.fake_pnp_read0.pio system.tsunami.fake_pnp_read1.pio system.tsunami.fake_pnp_read2.pio system.tsunami.fake_pnp_read3.pio system.tsunami.fake_pnp_read4.pio system.tsunami.fake_pnp_read5.pio system.tsunami.fake_pnp_read6.pio system.tsunami.fake_pnp_read7.pio system.tsunami.fake_ata0.pio system.tsunami.fake_ata1.pio system.tsunami.fb.pio system.tsunami.io.pio system.tsunami.uart.pio system.tsunami.console.pio system.tsunami.ide.pio system.tsunami.ethernet.pio system.tsunami.ethernet.config system.tsunami.ethernet.dma system.tsunami.ide.config system.tsunami.ide.dma [system.membus] type=Bus bus_id=1 +clock=2 +width=64 port=system.bridge.side_b system.physmem.port system.cpu.icache_port system.cpu.dcache_port [system.physmem] @@ -554,6 +560,7 @@ pio=system.iobus.port[24] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out index 018308862..e5c6e96f8 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out @@ -21,6 +21,7 @@ console=/dist/m5/system/binaries/console pal=/dist/m5/system/binaries/ts_osfpal boot_osflags=root=/dev/hda1 console=ttyS0 readfile=tests/halt.sh +symbolfile= init_param=0 system_type=34 system_rev=1024 @@ -28,6 +29,8 @@ system_rev=1024 [system.membus] type=Bus bus_id=1 +clock=2 +width=64 [system.bridge] type=Bridge @@ -86,11 +89,12 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=0 itb=system.cpu.itb dtb=system.cpu.dtb -cpu_id=-1 profile=0 clock=1 defer_registration=false @@ -461,10 +465,13 @@ disks=system.disk0 system.disk2 [system.iobus] type=Bus bus_id=0 +clock=2 +width=64 [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -508,6 +515,9 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + [pseudo_inst] quiesce=true statistics=true diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/console.system.sim_console b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/console.system.sim_console index ea7a20777..5461cc4ab 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/console.system.sim_console +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/console.system.sim_console @@ -3,7 +3,7 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
memsize 8000000 pages 4000
First free page after ROM 0xFFFFFC0000018000
HWRPB 0xFFFFFC0000018000 l1pt 0xFFFFFC0000040000 l2pt 0xFFFFFC0000042000 l3pt_rpb 0xFFFFFC0000044000 l3pt_kernel 0xFFFFFC0000048000 l2reserv 0xFFFFFC0000046000 -
kstart = 0xFFFFFC0000310000, kend = 0xFFFFFC00008064E8, kentry = 0xFFFFFC0000310000, numCPUs = 0x1 +
kstart = 0xFFFFFC0000310000, kend = 0xFFFFFC0000855898, kentry = 0xFFFFFC0000310000, numCPUs = 0x1
CPU Clock at 2000 MHz IntrClockFrequency=1024
Booting with 1 processor(s)
KSP: 0x20043FE8 PTBR 0x20 @@ -14,28 +14,26 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
ConsoleDispatch at virt 10000658 phys 18658 val FFFFFC00000100A8
unix_boot_mem ends at FFFFFC0000076000
k_argc = 0 -
jumping to kernel at 0xFFFFFC0000310000, (PCBB 0xFFFFFC0000018180 pfn 1028) -
CallbackFixup 0 18000, t7=FFFFFC0000700000 -
Linux version 2.6.8.1 (binkertn@ziff.eecs.umich.edu) (gcc version 3.4.3) #36 SMP Mon May 2 19:50:53 EDT 2005 +
jumping to kernel at 0xFFFFFC0000310000, (PCBB 0xFFFFFC0000018180 pfn 1067) +
CallbackFixup 0 18000, t7=FFFFFC000070C000 +
Linux version 2.6.13 (hsul@zed.eecs.umich.edu) (gcc version 3.4.3) #1 SMP Sun Oct 8 19:52:07 EDT 2006
Booting GENERIC on Tsunami variation DP264 using machine vector DP264 from SRM
Major Options: SMP LEGACY_START VERBOSE_MCHECK
Command line: root=/dev/hda1 console=ttyS0
memcluster 0, usage 1, start 0, end 392
memcluster 1, usage 0, start 392, end 16384 -
freeing pages 1030:16384 -
reserving pages 1030:1031 +
freeing pages 1069:16384 +
reserving pages 1069:1070
SMP: 1 CPUs probed -- cpu_present_mask = 1
Built 1 zonelists
Kernel command line: root=/dev/hda1 console=ttyS0 -
PID hash table entries: 1024 (order 10: 16384 bytes) +
PID hash table entries: 1024 (order: 10, 32768 bytes)
Using epoch = 1900
Console: colour dummy device 80x25
Dentry cache hash table entries: 32768 (order: 5, 262144 bytes)
Inode-cache hash table entries: 16384 (order: 4, 131072 bytes) -
Memory: 119072k/131072k available (3058k kernel code, 8680k reserved, 695k data, 480k init) -
Mount-cache hash table entries: 512 (order: 0, 8192 bytes) -
per-CPU timeslice cutoff: 374.49 usecs. -
task migration cache decay timeout: 0 msecs. +
Memory: 118784k/131072k available (3314k kernel code, 8952k reserved, 983k data, 224k init) +
Mount-cache hash table entries: 512
SMP mode deactivated.
Brought up 1 CPUs
SMP: Total of 1 processors activated (4002.20 BogoMIPS). @@ -48,16 +46,21 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
Initializing Cryptographic API
rtc: Standard PC (1900) epoch (1900) detected
Real Time Clock Driver v1.12 -
Serial: 8250/16550 driver $Revision: 1.90 $ 5 ports, IRQ sharing disabled +
Serial: 8250/16550 driver $Revision: 1.90 $ 1 ports, IRQ sharing disabled
ttyS0 at I/O 0x3f8 (irq = 4) is a 8250 +
io scheduler noop registered +
io scheduler anticipatory registered +
io scheduler deadline registered +
io scheduler cfq registered
loop: loaded (max 8 devices) -
Using anticipatory io scheduler
nbd: registered device at major 43 -
sinic.c: M5 Simple Integrated NIC driver
ns83820.c: National Semiconductor DP83820 10/100/1000 driver.
eth0: ns83820.c: 0x22c: 00000000, subsystem: 0000:0000
eth0: enabling optical transceiver -
eth0: ns83820 v0.20: DP83820 v1.3: 00:90:00:00:00:01 io=0x09000000 irq=30 f=sg +
eth0: using 64 bit addressing. +
eth0: ns83820 v0.22: DP83820 v1.3: 00:90:00:00:00:01 io=0x09000000 irq=30 f=h,sg +
tun: Universal TUN/TAP device driver, 1.6 +
tun: (C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>
Uniform Multi-Platform E-IDE driver Revision: 7.00alpha2
ide: Assuming 33MHz system bus speed for PIO modes; override with idebus=xx
PIIX4: IDE controller at PCI slot 0000:00:00.0 @@ -69,25 +72,24 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
hdb: M5 IDE Disk, ATA DISK drive
ide0 at 0x8410-0x8417,0x8422 on irq 31
hda: max request size: 128KiB -
hda: 163296 sectors (83 MB), CHS=162/16/63, UDMA(33) +
hda: 511056 sectors (261 MB), CHS=507/16/63, UDMA(33) +
hda: cache flushes not supported
hda: hda1
hdb: max request size: 128KiB
hdb: 4177920 sectors (2139 MB), CHS=4144/16/63, UDMA(33) +
hdb: cache flushes not supported
hdb: unknown partition table -
scsi0 : scsi_m5, version 1.73 [20040518], dev_size_mb=8, opts=0x0 -
Vendor: Linux Model: scsi_m5 Li Rev: 0004 -
Type: Direct-Access ANSI SCSI revision: 03 -
SCSI device sda: 16384 512-byte hdwr sectors (8 MB) -
SCSI device sda: drive cache: write back -
sda: unknown partition table -
Attached scsi disk sda at scsi0, channel 0, id 0, lun 0
mice: PS/2 mouse device common for all mice
NET: Registered protocol family 2 -
IP: routing cache hash table of 1024 buckets, 16Kbytes -
TCP: Hash tables configured (established 8192 bind 8192) -
ip_conntrack version 2.1 (512 buckets, 4096 max) - 440 bytes per conntrack +
IP route cache hash table entries: 4096 (order: 2, 32768 bytes) +
TCP established hash table entries: 16384 (order: 5, 262144 bytes) +
TCP bind hash table entries: 16384 (order: 5, 262144 bytes) +
TCP: Hash tables configured (established 16384 bind 16384) +
TCP reno registered +
ip_conntrack version 2.1 (512 buckets, 4096 max) - 296 bytes per conntrack
ip_tables: (C) 2000-2002 Netfilter core team
arp_tables: (C) 2002 David S. Miller +
TCP bic registered
Initializing IPsec netlink socket
NET: Registered protocol family 1
NET: Registered protocol family 17 @@ -96,11 +98,7 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
802.1Q VLAN Support v1.8 Ben Greear <greearb@candelatech.com>
All bugs added by David S. Miller <davem@redhat.com>
VFS: Mounted root (ext2 filesystem) readonly. -
Freeing unused kernel memory: 480k freed -
init started: BusyBox v1.00-rc2 (2004.11.18-16:22+0000) multi-call binary - -PTXdist-0.7.0 (2004-11-18T11:23:40-0500) - +
Freeing unused kernel memory: 224k freed +
init started: BusyBox v1.1.0 (2006.08.17-02:54+0000) multi-call binary mounting filesystems... -EXT2-fs warning: checktime reached, running e2fsck is recommended -
loading script... +loading script... diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt index 3a7dc1cd4..e276e91a7 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt @@ -1,130 +1,127 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 1346129 # Simulator instruction rate (inst/s) -host_mem_usage 194392 # Number of bytes of host memory used -host_seconds 44.52 # Real time elapsed on the host -host_tick_rate 78470813 # Simulator tick rate (ticks/s) +host_inst_rate 1389289 # Simulator instruction rate (inst/s) +host_mem_usage 197652 # Number of bytes of host memory used +host_seconds 44.48 # Real time elapsed on the host +host_tick_rate 81712411 # Simulator tick rate (ticks/s) sim_freq 2000000000 # Frequency of simulated ticks -sim_insts 59929520 # Number of instructions simulated -sim_seconds 1.746773 # Number of seconds simulated -sim_ticks 3493545624 # Number of ticks simulated -system.cpu.dtb.accesses 2354955 # DTB accesses -system.cpu.dtb.acv 413 # DTB access violations -system.cpu.dtb.hits 13929995 # DTB hits -system.cpu.dtb.misses 16187 # DTB misses -system.cpu.dtb.read_accesses 832415 # DTB read accesses -system.cpu.dtb.read_acv 242 # DTB read access violations -system.cpu.dtb.read_hits 7718636 # DTB read hits -system.cpu.dtb.read_misses 13695 # DTB read misses -system.cpu.dtb.write_accesses 1522540 # DTB write accesses -system.cpu.dtb.write_acv 171 # DTB write access violations -system.cpu.dtb.write_hits 6211359 # DTB write hits -system.cpu.dtb.write_misses 2492 # DTB write misses -system.cpu.idle_fraction 0.982844 # Percentage of idle cycles -system.cpu.itb.accesses 4037380 # ITB accesses -system.cpu.itb.acv 239 # ITB acv -system.cpu.itb.hits 4030656 # ITB hits -system.cpu.itb.misses 6724 # ITB misses -system.cpu.kern.callpal 184022 # number of callpals executed +sim_insts 61788439 # Number of instructions simulated +sim_seconds 1.817090 # Number of seconds simulated +sim_ticks 3634179176 # Number of ticks simulated +system.cpu.dtb.accesses 1304494 # DTB accesses +system.cpu.dtb.acv 367 # DTB access violations +system.cpu.dtb.hits 16552094 # DTB hits +system.cpu.dtb.misses 11425 # DTB misses +system.cpu.dtb.read_accesses 900425 # DTB read accesses +system.cpu.dtb.read_acv 210 # DTB read access violations +system.cpu.dtb.read_hits 10038384 # DTB read hits +system.cpu.dtb.read_misses 10280 # DTB read misses +system.cpu.dtb.write_accesses 404069 # DTB write accesses +system.cpu.dtb.write_acv 157 # DTB write access violations +system.cpu.dtb.write_hits 6513710 # DTB write hits +system.cpu.dtb.write_misses 1145 # DTB write misses +system.cpu.idle_fraction 0.982997 # Percentage of idle cycles +system.cpu.itb.accesses 3281310 # ITB accesses +system.cpu.itb.acv 184 # ITB acv +system.cpu.itb.hits 3276320 # ITB hits +system.cpu.itb.misses 4990 # ITB misses +system.cpu.kern.callpal 193842 # number of callpals executed system.cpu.kern.callpal_cserve 1 0.00% 0.00% # number of callpals executed system.cpu.kern.callpal_wrmces 1 0.00% 0.00% # number of callpals executed system.cpu.kern.callpal_wrfen 1 0.00% 0.00% # number of callpals executed system.cpu.kern.callpal_wrvptptr 1 0.00% 0.00% # number of callpals executed -system.cpu.kern.callpal_swpctx 1864 1.01% 1.02% # number of callpals executed -system.cpu.kern.callpal_tbi 28 0.02% 1.03% # number of callpals executed -system.cpu.kern.callpal_wrent 7 0.00% 1.03% # number of callpals executed -system.cpu.kern.callpal_swpipl 172016 93.48% 94.51% # number of callpals executed -system.cpu.kern.callpal_rdps 4808 2.61% 97.12% # number of callpals executed -system.cpu.kern.callpal_wrkgp 1 0.00% 97.12% # number of callpals executed -system.cpu.kern.callpal_wrusp 8 0.00% 97.13% # number of callpals executed -system.cpu.kern.callpal_rdusp 12 0.01% 97.13% # number of callpals executed -system.cpu.kern.callpal_whami 2 0.00% 97.14% # number of callpals executed -system.cpu.kern.callpal_rti 4291 2.33% 99.47% # number of callpals executed -system.cpu.kern.callpal_callsys 667 0.36% 99.83% # number of callpals executed -system.cpu.kern.callpal_imb 314 0.17% 100.00% # number of callpals executed +system.cpu.kern.callpal_swpctx 4203 2.17% 2.17% # number of callpals executed +system.cpu.kern.callpal_tbi 54 0.03% 2.20% # number of callpals executed +system.cpu.kern.callpal_wrent 7 0.00% 2.20% # number of callpals executed +system.cpu.kern.callpal_swpipl 176751 91.18% 93.38% # number of callpals executed +system.cpu.kern.callpal_rdps 6881 3.55% 96.93% # number of callpals executed +system.cpu.kern.callpal_wrkgp 1 0.00% 96.94% # number of callpals executed +system.cpu.kern.callpal_wrusp 7 0.00% 96.94% # number of callpals executed +system.cpu.kern.callpal_rdusp 9 0.00% 96.94% # number of callpals executed +system.cpu.kern.callpal_whami 2 0.00% 96.94% # number of callpals executed +system.cpu.kern.callpal_rti 5211 2.69% 99.63% # number of callpals executed +system.cpu.kern.callpal_callsys 531 0.27% 99.91% # number of callpals executed +system.cpu.kern.callpal_imb 181 0.09% 100.00% # number of callpals executed system.cpu.kern.inst.arm 0 # number of arm instructions executed -system.cpu.kern.inst.hwrei 209657 # number of hwrei instructions executed +system.cpu.kern.inst.hwrei 212908 # number of hwrei instructions executed system.cpu.kern.inst.ivlb 0 # number of ivlb instructions executed system.cpu.kern.inst.ivle 0 # number of ivle instructions executed -system.cpu.kern.inst.quiesce 1868 # number of quiesce instructions executed -system.cpu.kern.ipl_count 178378 # number of times we switched to this ipl -system.cpu.kern.ipl_count_0 75463 42.31% 42.31% # number of times we switched to this ipl -system.cpu.kern.ipl_count_21 286 0.16% 42.47% # number of times we switched to this ipl -system.cpu.kern.ipl_count_22 5446 3.05% 45.52% # number of times we switched to this ipl -system.cpu.kern.ipl_count_31 97183 54.48% 100.00% # number of times we switched to this ipl -system.cpu.kern.ipl_good 160188 # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_0 75397 47.07% 47.07% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_21 286 0.18% 47.25% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_22 5446 3.40% 50.65% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_31 79059 49.35% 100.00% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_ticks 3493545167 # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_0 3471576124 99.37% 99.37% # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_21 45785 0.00% 99.37% # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_22 934362 0.03% 99.40% # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_31 20988896 0.60% 100.00% # number of cycles we spent at this ipl -system.cpu.kern.ipl_used 0.898026 # fraction of swpipl calls that actually changed the ipl -system.cpu.kern.ipl_used_0 0.999125 # fraction of swpipl calls that actually changed the ipl +system.cpu.kern.inst.quiesce 6207 # number of quiesce instructions executed +system.cpu.kern.ipl_count 184061 # number of times we switched to this ipl +system.cpu.kern.ipl_count_0 75348 40.94% 40.94% # number of times we switched to this ipl +system.cpu.kern.ipl_count_21 245 0.13% 41.07% # number of times we switched to this ipl +system.cpu.kern.ipl_count_22 1853 1.01% 42.08% # number of times we switched to this ipl +system.cpu.kern.ipl_count_31 106615 57.92% 100.00% # number of times we switched to this ipl +system.cpu.kern.ipl_good 150060 # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_good_0 73981 49.30% 49.30% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_good_21 245 0.16% 49.46% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_good_22 1853 1.23% 50.70% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_good_31 73981 49.30% 100.00% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_ticks 3634178761 # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_0 3599646819 99.05% 99.05% # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_21 40474 0.00% 99.05% # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_22 159358 0.00% 99.06% # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_31 34332110 0.94% 100.00% # number of cycles we spent at this ipl +system.cpu.kern.ipl_used 0.815273 # fraction of swpipl calls that actually changed the ipl +system.cpu.kern.ipl_used_0 0.981858 # fraction of swpipl calls that actually changed the ipl system.cpu.kern.ipl_used_21 1 # fraction of swpipl calls that actually changed the ipl system.cpu.kern.ipl_used_22 1 # fraction of swpipl calls that actually changed the ipl -system.cpu.kern.ipl_used_31 0.813506 # fraction of swpipl calls that actually changed the ipl -system.cpu.kern.mode_good_kernel 2342 -system.cpu.kern.mode_good_user 2171 -system.cpu.kern.mode_good_idle 171 -system.cpu.kern.mode_switch_kernel 4092 # number of protection mode switches -system.cpu.kern.mode_switch_user 2171 # number of protection mode switches -system.cpu.kern.mode_switch_idle 2041 # number of protection mode switches -system.cpu.kern.mode_switch_good 0.564066 # fraction of useful protection mode switches -system.cpu.kern.mode_switch_good_kernel 0.572336 # fraction of useful protection mode switches +system.cpu.kern.ipl_used_31 0.693908 # fraction of swpipl calls that actually changed the ipl +system.cpu.kern.mode_good_kernel 1938 +system.cpu.kern.mode_good_user 1758 +system.cpu.kern.mode_good_idle 180 +system.cpu.kern.mode_switch_kernel 5978 # number of protection mode switches +system.cpu.kern.mode_switch_user 1758 # number of protection mode switches +system.cpu.kern.mode_switch_idle 2102 # number of protection mode switches +system.cpu.kern.mode_switch_good 0.393983 # fraction of useful protection mode switches +system.cpu.kern.mode_switch_good_kernel 0.324189 # fraction of useful protection mode switches system.cpu.kern.mode_switch_good_user 1 # fraction of useful protection mode switches -system.cpu.kern.mode_switch_good_idle 0.083782 # fraction of useful protection mode switches -system.cpu.kern.mode_ticks_kernel 33028385 0.95% 0.95% # number of ticks spent at the given mode -system.cpu.kern.mode_ticks_user 4450361 0.13% 1.07% # number of ticks spent at the given mode -system.cpu.kern.mode_ticks_idle 3456066419 98.93% 100.00% # number of ticks spent at the given mode -system.cpu.kern.swap_context 1865 # number of times the context was actually changed -system.cpu.kern.syscall 475 # number of syscalls executed -system.cpu.kern.syscall_fork 10 2.11% 2.11% # number of syscalls executed -system.cpu.kern.syscall_read 33 6.95% 9.05% # number of syscalls executed -system.cpu.kern.syscall_write 7 1.47% 10.53% # number of syscalls executed -system.cpu.kern.syscall_close 49 10.32% 20.84% # number of syscalls executed -system.cpu.kern.syscall_chdir 1 0.21% 21.05% # number of syscalls executed -system.cpu.kern.syscall_chmod 1 0.21% 21.26% # number of syscalls executed -system.cpu.kern.syscall_obreak 44 9.26% 30.53% # number of syscalls executed -system.cpu.kern.syscall_lseek 13 2.74% 33.26% # number of syscalls executed -system.cpu.kern.syscall_getpid 10 2.11% 35.37% # number of syscalls executed -system.cpu.kern.syscall_setuid 4 0.84% 36.21% # number of syscalls executed -system.cpu.kern.syscall_getuid 8 1.68% 37.89% # number of syscalls executed -system.cpu.kern.syscall_access 4 0.84% 38.74% # number of syscalls executed -system.cpu.kern.syscall_dup 4 0.84% 39.58% # number of syscalls executed -system.cpu.kern.syscall_open 68 14.32% 53.89% # number of syscalls executed -system.cpu.kern.syscall_getgid 8 1.68% 55.58% # number of syscalls executed -system.cpu.kern.syscall_sigprocmask 14 2.95% 58.53% # number of syscalls executed -system.cpu.kern.syscall_ioctl 16 3.37% 61.89% # number of syscalls executed -system.cpu.kern.syscall_readlink 2 0.42% 62.32% # number of syscalls executed -system.cpu.kern.syscall_execve 8 1.68% 64.00% # number of syscalls executed -system.cpu.kern.syscall_pre_F64_stat 31 6.53% 70.53% # number of syscalls executed -system.cpu.kern.syscall_pre_F64_lstat 1 0.21% 70.74% # number of syscalls executed -system.cpu.kern.syscall_mmap 55 11.58% 82.32% # number of syscalls executed -system.cpu.kern.syscall_munmap 6 1.26% 83.58% # number of syscalls executed -system.cpu.kern.syscall_mprotect 14 2.95% 86.53% # number of syscalls executed -system.cpu.kern.syscall_gethostname 2 0.42% 86.95% # number of syscalls executed -system.cpu.kern.syscall_dup2 4 0.84% 87.79% # number of syscalls executed -system.cpu.kern.syscall_pre_F64_fstat 28 5.89% 93.68% # number of syscalls executed -system.cpu.kern.syscall_fcntl 14 2.95% 96.63% # number of syscalls executed -system.cpu.kern.syscall_socket 3 0.63% 97.26% # number of syscalls executed -system.cpu.kern.syscall_connect 3 0.63% 97.89% # number of syscalls executed -system.cpu.kern.syscall_setgid 4 0.84% 98.74% # number of syscalls executed -system.cpu.kern.syscall_getrlimit 3 0.63% 99.37% # number of syscalls executed -system.cpu.kern.syscall_setsid 3 0.63% 100.00% # number of syscalls executed -system.cpu.not_idle_fraction 0.017156 # Percentage of non-idle cycles -system.cpu.numCycles 59936483 # number of cpu cycles simulated -system.cpu.num_insts 59929520 # Number of instructions executed -system.cpu.num_refs 13982880 # Number of memory references +system.cpu.kern.mode_switch_good_idle 0.085633 # fraction of useful protection mode switches +system.cpu.kern.mode_ticks_kernel 54682435 1.50% 1.50% # number of ticks spent at the given mode +system.cpu.kern.mode_ticks_user 3591244 0.10% 1.60% # number of ticks spent at the given mode +system.cpu.kern.mode_ticks_idle 3575905080 98.40% 100.00% # number of ticks spent at the given mode +system.cpu.kern.swap_context 4204 # number of times the context was actually changed +system.cpu.kern.syscall 329 # number of syscalls executed +system.cpu.kern.syscall_fork 8 2.43% 2.43% # number of syscalls executed +system.cpu.kern.syscall_read 30 9.12% 11.55% # number of syscalls executed +system.cpu.kern.syscall_write 4 1.22% 12.77% # number of syscalls executed +system.cpu.kern.syscall_close 43 13.07% 25.84% # number of syscalls executed +system.cpu.kern.syscall_chdir 1 0.30% 26.14% # number of syscalls executed +system.cpu.kern.syscall_chmod 1 0.30% 26.44% # number of syscalls executed +system.cpu.kern.syscall_obreak 15 4.56% 31.00% # number of syscalls executed +system.cpu.kern.syscall_lseek 10 3.04% 34.04% # number of syscalls executed +system.cpu.kern.syscall_getpid 6 1.82% 35.87% # number of syscalls executed +system.cpu.kern.syscall_setuid 4 1.22% 37.08% # number of syscalls executed +system.cpu.kern.syscall_getuid 6 1.82% 38.91% # number of syscalls executed +system.cpu.kern.syscall_access 11 3.34% 42.25% # number of syscalls executed +system.cpu.kern.syscall_dup 2 0.61% 42.86% # number of syscalls executed +system.cpu.kern.syscall_open 55 16.72% 59.57% # number of syscalls executed +system.cpu.kern.syscall_getgid 6 1.82% 61.40% # number of syscalls executed +system.cpu.kern.syscall_sigprocmask 10 3.04% 64.44% # number of syscalls executed +system.cpu.kern.syscall_ioctl 10 3.04% 67.48% # number of syscalls executed +system.cpu.kern.syscall_readlink 1 0.30% 67.78% # number of syscalls executed +system.cpu.kern.syscall_execve 7 2.13% 69.91% # number of syscalls executed +system.cpu.kern.syscall_mmap 54 16.41% 86.32% # number of syscalls executed +system.cpu.kern.syscall_munmap 3 0.91% 87.23% # number of syscalls executed +system.cpu.kern.syscall_mprotect 16 4.86% 92.10% # number of syscalls executed +system.cpu.kern.syscall_gethostname 1 0.30% 92.40% # number of syscalls executed +system.cpu.kern.syscall_dup2 3 0.91% 93.31% # number of syscalls executed +system.cpu.kern.syscall_fcntl 10 3.04% 96.35% # number of syscalls executed +system.cpu.kern.syscall_socket 2 0.61% 96.96% # number of syscalls executed +system.cpu.kern.syscall_connect 2 0.61% 97.57% # number of syscalls executed +system.cpu.kern.syscall_setgid 4 1.22% 98.78% # number of syscalls executed +system.cpu.kern.syscall_getrlimit 2 0.61% 99.39% # number of syscalls executed +system.cpu.kern.syscall_setsid 2 0.61% 100.00% # number of syscalls executed +system.cpu.not_idle_fraction 0.017003 # Percentage of non-idle cycles +system.cpu.numCycles 61793613 # number of cpu cycles simulated +system.cpu.num_insts 61788439 # Number of instructions executed +system.cpu.num_refs 16800623 # Number of memory references system.disk0.dma_read_bytes 1024 # Number of bytes transfered via DMA reads (not PRD). system.disk0.dma_read_full_pages 0 # Number of full page size DMA reads (not PRD). system.disk0.dma_read_txs 1 # Number of DMA read transactions (not PRD). -system.disk0.dma_write_bytes 2521088 # Number of bytes transfered via DMA writes. -system.disk0.dma_write_full_pages 285 # Number of full page size DMA writes. -system.disk0.dma_write_txs 375 # Number of DMA write transactions. +system.disk0.dma_write_bytes 2702336 # Number of bytes transfered via DMA writes. +system.disk0.dma_write_full_pages 302 # Number of full page size DMA writes. +system.disk0.dma_write_txs 408 # Number of DMA write transactions. system.disk2.dma_read_bytes 0 # Number of bytes transfered via DMA reads (not PRD). system.disk2.dma_read_full_pages 0 # Number of full page size DMA reads (not PRD). system.disk2.dma_read_txs 0 # Number of DMA read transactions (not PRD). diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout index c04cd5050..bb7f4ca1e 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout @@ -5,8 +5,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:32:34 -M5 started Tue Sep 5 15:42:26 2006 -M5 executing on zizzer.eecs.umich.edu +M5 compiled Oct 8 2006 21:57:24 +M5 started Sun Oct 8 21:57:28 2006 +M5 executing on zed.eecs.umich.edu command line: build/ALPHA_FS/m5.opt -d build/ALPHA_FS/tests/opt/quick/10.linux-boot/alpha/linux/tsunami-simple-atomic tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-atomic -Exiting @ tick 3493545624 because m5_exit instruction encountered +Exiting @ tick 3634179176 because m5_exit instruction encountered diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini index 97e9007e7..9976e053a 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini @@ -58,6 +58,7 @@ mem_mode=timing pal=/dist/m5/system/binaries/ts_osfpal physmem=system.physmem readfile=tests/halt.sh +symbolfile= system_rev=1024 system_type=34 @@ -74,7 +75,7 @@ side_b=system.membus.port[0] type=TimingSimpleCPU children=dtb itb clock=1 -cpu_id=-1 +cpu_id=0 defer_registration=false dtb=system.cpu0.dtb function_trace=false @@ -86,6 +87,7 @@ max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem profile=0 +progress_interval=0 system=system dcache_port=system.membus.port[3] icache_port=system.membus.port[2] @@ -102,7 +104,7 @@ size=48 type=TimingSimpleCPU children=dtb itb clock=1 -cpu_id=-1 +cpu_id=1 defer_registration=false dtb=system.cpu1.dtb function_trace=false @@ -114,6 +116,7 @@ max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem profile=0 +progress_interval=0 system=system dcache_port=system.membus.port[5] icache_port=system.membus.port[4] @@ -171,12 +174,16 @@ cpu=system.cpu0 [system.iobus] type=Bus bus_id=0 +clock=2 +width=64 default=system.tsunami.pciconfig.pio port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio system.tsunami.fake_sm_chip.pio system.tsunami.fake_uart1.pio system.tsunami.fake_uart2.pio system.tsunami.fake_uart3.pio system.tsunami.fake_uart4.pio system.tsunami.fake_ppc.pio system.tsunami.fake_OROM.pio system.tsunami.fake_pnp_addr.pio system.tsunami.fake_pnp_write.pio system.tsunami.fake_pnp_read0.pio system.tsunami.fake_pnp_read1.pio system.tsunami.fake_pnp_read2.pio system.tsunami.fake_pnp_read3.pio system.tsunami.fake_pnp_read4.pio system.tsunami.fake_pnp_read5.pio system.tsunami.fake_pnp_read6.pio system.tsunami.fake_pnp_read7.pio system.tsunami.fake_ata0.pio system.tsunami.fake_ata1.pio system.tsunami.fb.pio system.tsunami.io.pio system.tsunami.uart.pio system.tsunami.console.pio system.tsunami.ide.pio system.tsunami.ethernet.pio system.tsunami.ethernet.config system.tsunami.ethernet.dma system.tsunami.ide.config system.tsunami.ide.dma [system.membus] type=Bus bus_id=1 +clock=2 +width=64 port=system.bridge.side_b system.physmem.port system.cpu0.icache_port system.cpu0.dcache_port system.cpu1.icache_port system.cpu1.dcache_port [system.physmem] @@ -580,6 +587,7 @@ pio=system.iobus.port[24] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out index 96c734e15..9e4bfb566 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out @@ -21,6 +21,7 @@ console=/dist/m5/system/binaries/console pal=/dist/m5/system/binaries/ts_osfpal boot_osflags=root=/dev/hda1 console=ttyS0 readfile=tests/halt.sh +symbolfile= init_param=0 system_type=34 system_rev=1024 @@ -28,6 +29,8 @@ system_rev=1024 [system.membus] type=Bus bus_id=1 +clock=2 +width=64 [system.bridge] type=Bridge @@ -86,11 +89,12 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=0 itb=system.cpu0.itb dtb=system.cpu0.dtb -cpu_id=-1 profile=0 clock=1 defer_registration=false @@ -113,11 +117,12 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=1 itb=system.cpu1.itb dtb=system.cpu1.dtb -cpu_id=-1 profile=0 clock=1 defer_registration=false @@ -488,10 +493,13 @@ disks=system.disk0 system.disk2 [system.iobus] type=Bus bus_id=0 +clock=2 +width=64 [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -535,6 +543,9 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + [pseudo_inst] quiesce=true statistics=true diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/console.system.sim_console b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/console.system.sim_console index c3c7b2676..27adebb82 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/console.system.sim_console +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/console.system.sim_console @@ -3,7 +3,7 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
memsize 8000000 pages 4000
First free page after ROM 0xFFFFFC0000018000
HWRPB 0xFFFFFC0000018000 l1pt 0xFFFFFC0000040000 l2pt 0xFFFFFC0000042000 l3pt_rpb 0xFFFFFC0000044000 l3pt_kernel 0xFFFFFC0000048000 l2reserv 0xFFFFFC0000046000 -
kstart = 0xFFFFFC0000310000, kend = 0xFFFFFC00008064E8, kentry = 0xFFFFFC0000310000, numCPUs = 0x2 +
kstart = 0xFFFFFC0000310000, kend = 0xFFFFFC0000855898, kentry = 0xFFFFFC0000310000, numCPUs = 0x2
CPU Clock at 2000 MHz IntrClockFrequency=1024
Booting with 2 processor(s)
KSP: 0x20043FE8 PTBR 0x20 @@ -16,29 +16,27 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
Bootstraping CPU 1 with sp=0xFFFFFC0000076000
unix_boot_mem ends at FFFFFC0000078000
k_argc = 0 -
jumping to kernel at 0xFFFFFC0000310000, (PCBB 0xFFFFFC0000018180 pfn 1028) -
CallbackFixup 0 18000, t7=FFFFFC0000700000 +
jumping to kernel at 0xFFFFFC0000310000, (PCBB 0xFFFFFC0000018180 pfn 1067) +
CallbackFixup 0 18000, t7=FFFFFC000070C000
Entering slaveloop for cpu 1 my_rpb=FFFFFC0000018400 -
Linux version 2.6.8.1 (binkertn@ziff.eecs.umich.edu) (gcc version 3.4.3) #36 SMP Mon May 2 19:50:53 EDT 2005 +
Linux version 2.6.13 (hsul@zed.eecs.umich.edu) (gcc version 3.4.3) #1 SMP Sun Oct 8 19:52:07 EDT 2006
Booting GENERIC on Tsunami variation DP264 using machine vector DP264 from SRM
Major Options: SMP LEGACY_START VERBOSE_MCHECK
Command line: root=/dev/hda1 console=ttyS0
memcluster 0, usage 1, start 0, end 392
memcluster 1, usage 0, start 392, end 16384 -
freeing pages 1030:16384 -
reserving pages 1030:1031 +
freeing pages 1069:16384 +
reserving pages 1069:1070
SMP: 2 CPUs probed -- cpu_present_mask = 3
Built 1 zonelists
Kernel command line: root=/dev/hda1 console=ttyS0 -
PID hash table entries: 1024 (order 10: 16384 bytes) +
PID hash table entries: 1024 (order: 10, 32768 bytes)
Using epoch = 1900
Console: colour dummy device 80x25
Dentry cache hash table entries: 32768 (order: 5, 262144 bytes)
Inode-cache hash table entries: 16384 (order: 4, 131072 bytes) -
Memory: 119072k/131072k available (3058k kernel code, 8680k reserved, 695k data, 480k init) -
Mount-cache hash table entries: 512 (order: 0, 8192 bytes) -
per-CPU timeslice cutoff: 374.49 usecs. -
task migration cache decay timeout: 0 msecs. +
Memory: 118784k/131072k available (3314k kernel code, 8952k reserved, 983k data, 224k init) +
Mount-cache hash table entries: 512
SMP starting up secondaries.
Slave CPU 1 console command START SlaveCmd: restart FFFFFC0000310020 FFFFFC0000310020 vptb FFFFFFFE00000000 my_rpb FFFFFC0000018400 my_rpb_phys 18400 @@ -53,16 +51,21 @@ SlaveCmd: restart FFFFFC0000310020 FFFFFC0000310020 vptb FFFFFFFE00000000 my_rpb
Initializing Cryptographic API
rtc: Standard PC (1900) epoch (1900) detected
Real Time Clock Driver v1.12 -
Serial: 8250/16550 driver $Revision: 1.90 $ 5 ports, IRQ sharing disabled +
Serial: 8250/16550 driver $Revision: 1.90 $ 1 ports, IRQ sharing disabled
ttyS0 at I/O 0x3f8 (irq = 4) is a 8250 +
io scheduler noop registered +
io scheduler anticipatory registered +
io scheduler deadline registered +
io scheduler cfq registered
loop: loaded (max 8 devices) -
Using anticipatory io scheduler
nbd: registered device at major 43 -
sinic.c: M5 Simple Integrated NIC driver
ns83820.c: National Semiconductor DP83820 10/100/1000 driver.
eth0: ns83820.c: 0x22c: 00000000, subsystem: 0000:0000
eth0: enabling optical transceiver -
eth0: ns83820 v0.20: DP83820 v1.3: 00:90:00:00:00:01 io=0x09000000 irq=30 f=sg +
eth0: using 64 bit addressing. +
eth0: ns83820 v0.22: DP83820 v1.3: 00:90:00:00:00:01 io=0x09000000 irq=30 f=h,sg +
tun: Universal TUN/TAP device driver, 1.6 +
tun: (C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>
Uniform Multi-Platform E-IDE driver Revision: 7.00alpha2
ide: Assuming 33MHz system bus speed for PIO modes; override with idebus=xx
PIIX4: IDE controller at PCI slot 0000:00:00.0 @@ -74,25 +77,24 @@ SlaveCmd: restart FFFFFC0000310020 FFFFFC0000310020 vptb FFFFFFFE00000000 my_rpb
hdb: M5 IDE Disk, ATA DISK drive
ide0 at 0x8410-0x8417,0x8422 on irq 31
hda: max request size: 128KiB -
hda: 163296 sectors (83 MB), CHS=162/16/63, UDMA(33) +
hda: 511056 sectors (261 MB), CHS=507/16/63, UDMA(33) +
hda: cache flushes not supported
hda: hda1
hdb: max request size: 128KiB
hdb: 4177920 sectors (2139 MB), CHS=4144/16/63, UDMA(33) +
hdb: cache flushes not supported
hdb: unknown partition table -
scsi0 : scsi_m5, version 1.73 [20040518], dev_size_mb=8, opts=0x0 -
Vendor: Linux Model: scsi_m5 Li Rev: 0004 -
Type: Direct-Access ANSI SCSI revision: 03 -
SCSI device sda: 16384 512-byte hdwr sectors (8 MB) -
SCSI device sda: drive cache: write back -
sda: unknown partition table -
Attached scsi disk sda at scsi0, channel 0, id 0, lun 0
mice: PS/2 mouse device common for all mice
NET: Registered protocol family 2 -
IP: routing cache hash table of 1024 buckets, 16Kbytes -
TCP: Hash tables configured (established 8192 bind 8192) -
ip_conntrack version 2.1 (512 buckets, 4096 max) - 440 bytes per conntrack +
IP route cache hash table entries: 4096 (order: 2, 32768 bytes) +
TCP established hash table entries: 16384 (order: 5, 262144 bytes) +
TCP bind hash table entries: 16384 (order: 5, 262144 bytes) +
TCP: Hash tables configured (established 16384 bind 16384) +
TCP reno registered +
ip_conntrack version 2.1 (512 buckets, 4096 max) - 296 bytes per conntrack
ip_tables: (C) 2000-2002 Netfilter core team
arp_tables: (C) 2002 David S. Miller +
TCP bic registered
Initializing IPsec netlink socket
NET: Registered protocol family 1
NET: Registered protocol family 17 @@ -101,11 +103,7 @@ SlaveCmd: restart FFFFFC0000310020 FFFFFC0000310020 vptb FFFFFFFE00000000 my_rpb
802.1Q VLAN Support v1.8 Ben Greear <greearb@candelatech.com>
All bugs added by David S. Miller <davem@redhat.com>
VFS: Mounted root (ext2 filesystem) readonly. -
Freeing unused kernel memory: 480k freed -
init started: BusyBox v1.00-rc2 (2004.11.18-16:22+0000) multi-call binary - -PTXdist-0.7.0 (2004-11-18T11:23:40-0500) - +
Freeing unused kernel memory: 224k freed +
init started: BusyBox v1.1.0 (2006.08.17-02:54+0000) multi-call binary mounting filesystems... -EXT2-fs warning: checktime reached, running e2fsck is recommended -
loading script... +loading script... diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt index 666766e20..3f540d0ea 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt @@ -1,239 +1,231 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 804715 # Simulator instruction rate (inst/s) -host_mem_usage 194628 # Number of bytes of host memory used -host_seconds 78.40 # Real time elapsed on the host -host_tick_rate 45146741 # Simulator tick rate (ticks/s) +host_inst_rate 255147 # Simulator instruction rate (inst/s) +host_mem_usage 198260 # Number of bytes of host memory used +host_seconds 260.00 # Real time elapsed on the host +host_tick_rate 14365182 # Simulator tick rate (ticks/s) sim_freq 2000000000 # Frequency of simulated ticks -sim_insts 63088076 # Number of instructions simulated -sim_seconds 1.769718 # Number of seconds simulated -sim_ticks 3539435029 # Number of ticks simulated -system.cpu0.dtb.accesses 1831687 # DTB accesses -system.cpu0.dtb.acv 360 # DTB access violations -system.cpu0.dtb.hits 10286150 # DTB hits -system.cpu0.dtb.misses 11050 # DTB misses -system.cpu0.dtb.read_accesses 495437 # DTB read accesses -system.cpu0.dtb.read_acv 219 # DTB read access violations -system.cpu0.dtb.read_hits 5741423 # DTB read hits -system.cpu0.dtb.read_misses 9036 # DTB read misses -system.cpu0.dtb.write_accesses 1336250 # DTB write accesses -system.cpu0.dtb.write_acv 141 # DTB write access violations -system.cpu0.dtb.write_hits 4544727 # DTB write hits -system.cpu0.dtb.write_misses 2014 # DTB write misses -system.cpu0.idle_fraction 0.984526 # Percentage of idle cycles -system.cpu0.itb.accesses 2328068 # ITB accesses -system.cpu0.itb.acv 216 # ITB acv -system.cpu0.itb.hits 2323500 # ITB hits -system.cpu0.itb.misses 4568 # ITB misses -system.cpu0.kern.callpal 145575 # number of callpals executed +sim_insts 66337257 # Number of instructions simulated +sim_seconds 1.867449 # Number of seconds simulated +sim_ticks 3734898877 # Number of ticks simulated +system.cpu0.dtb.accesses 828318 # DTB accesses +system.cpu0.dtb.acv 315 # DTB access violations +system.cpu0.dtb.hits 13264910 # DTB hits +system.cpu0.dtb.misses 7094 # DTB misses +system.cpu0.dtb.read_accesses 572336 # DTB read accesses +system.cpu0.dtb.read_acv 200 # DTB read access violations +system.cpu0.dtb.read_hits 8201218 # DTB read hits +system.cpu0.dtb.read_misses 6394 # DTB read misses +system.cpu0.dtb.write_accesses 255982 # DTB write accesses +system.cpu0.dtb.write_acv 115 # DTB write access violations +system.cpu0.dtb.write_hits 5063692 # DTB write hits +system.cpu0.dtb.write_misses 700 # DTB write misses +system.cpu0.idle_fraction 0.982517 # Percentage of idle cycles +system.cpu0.itb.accesses 1888651 # ITB accesses +system.cpu0.itb.acv 166 # ITB acv +system.cpu0.itb.hits 1885318 # ITB hits +system.cpu0.itb.misses 3333 # ITB misses +system.cpu0.kern.callpal 146863 # number of callpals executed system.cpu0.kern.callpal_cserve 1 0.00% 0.00% # number of callpals executed -system.cpu0.kern.callpal_wripir 45 0.03% 0.03% # number of callpals executed -system.cpu0.kern.callpal_wrmces 1 0.00% 0.03% # number of callpals executed -system.cpu0.kern.callpal_wrfen 1 0.00% 0.03% # number of callpals executed -system.cpu0.kern.callpal_wrvptptr 1 0.00% 0.03% # number of callpals executed -system.cpu0.kern.callpal_swpctx 1334 0.92% 0.95% # number of callpals executed -system.cpu0.kern.callpal_tbi 20 0.01% 0.96% # number of callpals executed -system.cpu0.kern.callpal_wrent 7 0.00% 0.97% # number of callpals executed -system.cpu0.kern.callpal_swpipl 135235 92.90% 93.87% # number of callpals executed -system.cpu0.kern.callpal_rdps 4594 3.16% 97.02% # number of callpals executed -system.cpu0.kern.callpal_wrkgp 1 0.00% 97.02% # number of callpals executed -system.cpu0.kern.callpal_wrusp 4 0.00% 97.02% # number of callpals executed -system.cpu0.kern.callpal_rdusp 11 0.01% 97.03% # number of callpals executed -system.cpu0.kern.callpal_whami 2 0.00% 97.03% # number of callpals executed -system.cpu0.kern.callpal_rti 3660 2.51% 99.55% # number of callpals executed -system.cpu0.kern.callpal_callsys 461 0.32% 99.86% # number of callpals executed -system.cpu0.kern.callpal_imb 197 0.14% 100.00% # number of callpals executed +system.cpu0.kern.callpal_wripir 506 0.34% 0.35% # number of callpals executed +system.cpu0.kern.callpal_wrmces 1 0.00% 0.35% # number of callpals executed +system.cpu0.kern.callpal_wrfen 1 0.00% 0.35% # number of callpals executed +system.cpu0.kern.callpal_wrvptptr 1 0.00% 0.35% # number of callpals executed +system.cpu0.kern.callpal_swpctx 2962 2.02% 2.36% # number of callpals executed +system.cpu0.kern.callpal_tbi 47 0.03% 2.40% # number of callpals executed +system.cpu0.kern.callpal_wrent 7 0.00% 2.40% # number of callpals executed +system.cpu0.kern.callpal_swpipl 132443 90.18% 92.58% # number of callpals executed +system.cpu0.kern.callpal_rdps 6236 4.25% 96.83% # number of callpals executed +system.cpu0.kern.callpal_wrkgp 1 0.00% 96.83% # number of callpals executed +system.cpu0.kern.callpal_wrusp 2 0.00% 96.83% # number of callpals executed +system.cpu0.kern.callpal_rdusp 8 0.01% 96.84% # number of callpals executed +system.cpu0.kern.callpal_whami 2 0.00% 96.84% # number of callpals executed +system.cpu0.kern.callpal_rti 4200 2.86% 99.70% # number of callpals executed +system.cpu0.kern.callpal_callsys 317 0.22% 99.91% # number of callpals executed +system.cpu0.kern.callpal_imb 128 0.09% 100.00% # number of callpals executed system.cpu0.kern.inst.arm 0 # number of arm instructions executed -system.cpu0.kern.inst.hwrei 163916 # number of hwrei instructions executed +system.cpu0.kern.inst.hwrei 160332 # number of hwrei instructions executed system.cpu0.kern.inst.ivlb 0 # number of ivlb instructions executed system.cpu0.kern.inst.ivle 0 # number of ivle instructions executed -system.cpu0.kern.inst.quiesce 1952 # number of quiesce instructions executed -system.cpu0.kern.ipl_count 141041 # number of times we switched to this ipl -system.cpu0.kern.ipl_count_0 56950 40.38% 40.38% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_21 286 0.20% 40.58% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_22 5513 3.91% 44.49% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_30 52 0.04% 44.53% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_31 78240 55.47% 100.00% # number of times we switched to this ipl -system.cpu0.kern.ipl_good 123339 # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_0 56917 46.15% 46.15% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_21 286 0.23% 46.38% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_22 5513 4.47% 50.85% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_30 52 0.04% 50.89% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_31 60571 49.11% 100.00% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_ticks 3539063979 # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_0 3513499166 99.28% 99.28% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_21 60705 0.00% 99.28% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_22 1354114 0.04% 99.32% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_30 18748 0.00% 99.32% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_31 24131246 0.68% 100.00% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_used 0.874490 # fraction of swpipl calls that actually changed the ipl -system.cpu0.kern.ipl_used_0 0.999421 # fraction of swpipl calls that actually changed the ipl +system.cpu0.kern.inst.quiesce 6637 # number of quiesce instructions executed +system.cpu0.kern.ipl_count 139203 # number of times we switched to this ipl +system.cpu0.kern.ipl_count_0 55744 40.05% 40.05% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_21 245 0.18% 40.22% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_22 1904 1.37% 41.59% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_30 410 0.29% 41.88% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_31 80900 58.12% 100.00% # number of times we switched to this ipl +system.cpu0.kern.ipl_good 112527 # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_0 55189 49.05% 49.05% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_21 245 0.22% 49.26% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_22 1904 1.69% 50.95% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_30 410 0.36% 51.32% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_31 54779 48.68% 100.00% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_ticks 3734378988 # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_0 3696326531 98.98% 98.98% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_21 53683 0.00% 98.98% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_22 224672 0.01% 98.99% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_30 128286 0.00% 98.99% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_31 37645816 1.01% 100.00% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_used 0.808366 # fraction of swpipl calls that actually changed the ipl +system.cpu0.kern.ipl_used_0 0.990044 # fraction of swpipl calls that actually changed the ipl system.cpu0.kern.ipl_used_21 1 # fraction of swpipl calls that actually changed the ipl system.cpu0.kern.ipl_used_22 1 # fraction of swpipl calls that actually changed the ipl system.cpu0.kern.ipl_used_30 1 # fraction of swpipl calls that actually changed the ipl -system.cpu0.kern.ipl_used_31 0.774169 # fraction of swpipl calls that actually changed the ipl -system.cpu0.kern.mode_good_kernel 1632 -system.cpu0.kern.mode_good_user 1487 -system.cpu0.kern.mode_good_idle 145 -system.cpu0.kern.mode_switch_kernel 2857 # number of protection mode switches -system.cpu0.kern.mode_switch_user 1487 # number of protection mode switches -system.cpu0.kern.mode_switch_idle 2125 # number of protection mode switches -system.cpu0.kern.mode_switch_good 0.504560 # fraction of useful protection mode switches -system.cpu0.kern.mode_switch_good_kernel 0.571229 # fraction of useful protection mode switches +system.cpu0.kern.ipl_used_31 0.677120 # fraction of swpipl calls that actually changed the ipl +system.cpu0.kern.mode_good_kernel 1095 +system.cpu0.kern.mode_good_user 1095 +system.cpu0.kern.mode_good_idle 0 +system.cpu0.kern.mode_switch_kernel 6628 # number of protection mode switches +system.cpu0.kern.mode_switch_user 1095 # number of protection mode switches +system.cpu0.kern.mode_switch_idle 0 # number of protection mode switches +system.cpu0.kern.mode_switch_good 0.283569 # fraction of useful protection mode switches +system.cpu0.kern.mode_switch_good_kernel 0.165208 # fraction of useful protection mode switches system.cpu0.kern.mode_switch_good_user 1 # fraction of useful protection mode switches -system.cpu0.kern.mode_switch_good_idle 0.068235 # fraction of useful protection mode switches -system.cpu0.kern.mode_ticks_kernel 23634401 0.67% 0.67% # number of ticks spent at the given mode -system.cpu0.kern.mode_ticks_user 3241731 0.09% 0.76% # number of ticks spent at the given mode -system.cpu0.kern.mode_ticks_idle 3511854943 99.24% 100.00% # number of ticks spent at the given mode -system.cpu0.kern.swap_context 1335 # number of times the context was actually changed -system.cpu0.kern.syscall 312 # number of syscalls executed -system.cpu0.kern.syscall_fork 9 2.88% 2.88% # number of syscalls executed -system.cpu0.kern.syscall_read 20 6.41% 9.29% # number of syscalls executed -system.cpu0.kern.syscall_write 6 1.92% 11.22% # number of syscalls executed -system.cpu0.kern.syscall_close 36 11.54% 22.76% # number of syscalls executed -system.cpu0.kern.syscall_chdir 1 0.32% 23.08% # number of syscalls executed -system.cpu0.kern.syscall_chmod 1 0.32% 23.40% # number of syscalls executed -system.cpu0.kern.syscall_obreak 26 8.33% 31.73% # number of syscalls executed -system.cpu0.kern.syscall_lseek 9 2.88% 34.62% # number of syscalls executed -system.cpu0.kern.syscall_getpid 8 2.56% 37.18% # number of syscalls executed -system.cpu0.kern.syscall_setuid 2 0.64% 37.82% # number of syscalls executed -system.cpu0.kern.syscall_getuid 4 1.28% 39.10% # number of syscalls executed -system.cpu0.kern.syscall_access 4 1.28% 40.38% # number of syscalls executed -system.cpu0.kern.syscall_dup 4 1.28% 41.67% # number of syscalls executed -system.cpu0.kern.syscall_open 40 12.82% 54.49% # number of syscalls executed -system.cpu0.kern.syscall_getgid 4 1.28% 55.77% # number of syscalls executed -system.cpu0.kern.syscall_sigprocmask 12 3.85% 59.62% # number of syscalls executed -system.cpu0.kern.syscall_ioctl 13 4.17% 63.78% # number of syscalls executed -system.cpu0.kern.syscall_readlink 1 0.32% 64.10% # number of syscalls executed -system.cpu0.kern.syscall_execve 7 2.24% 66.35% # number of syscalls executed -system.cpu0.kern.syscall_pre_F64_stat 22 7.05% 73.40% # number of syscalls executed -system.cpu0.kern.syscall_pre_F64_lstat 1 0.32% 73.72% # number of syscalls executed -system.cpu0.kern.syscall_mmap 28 8.97% 82.69% # number of syscalls executed -system.cpu0.kern.syscall_munmap 4 1.28% 83.97% # number of syscalls executed -system.cpu0.kern.syscall_mprotect 7 2.24% 86.22% # number of syscalls executed -system.cpu0.kern.syscall_gethostname 1 0.32% 86.54% # number of syscalls executed -system.cpu0.kern.syscall_dup2 3 0.96% 87.50% # number of syscalls executed -system.cpu0.kern.syscall_pre_F64_fstat 15 4.81% 92.31% # number of syscalls executed -system.cpu0.kern.syscall_fcntl 11 3.53% 95.83% # number of syscalls executed -system.cpu0.kern.syscall_socket 3 0.96% 96.79% # number of syscalls executed -system.cpu0.kern.syscall_connect 3 0.96% 97.76% # number of syscalls executed -system.cpu0.kern.syscall_setgid 2 0.64% 98.40% # number of syscalls executed -system.cpu0.kern.syscall_getrlimit 2 0.64% 99.04% # number of syscalls executed -system.cpu0.kern.syscall_setsid 3 0.96% 100.00% # number of syscalls executed -system.cpu0.not_idle_fraction 0.015474 # Percentage of non-idle cycles -system.cpu0.numCycles 0 # number of cpu cycles simulated -system.cpu0.num_insts 44447414 # Number of instructions executed -system.cpu0.num_refs 10321518 # Number of memory references -system.cpu1.dtb.accesses 524398 # DTB accesses -system.cpu1.dtb.acv 60 # DTB access violations -system.cpu1.dtb.hits 4612716 # DTB hits -system.cpu1.dtb.misses 5263 # DTB misses -system.cpu1.dtb.read_accesses 337746 # DTB read accesses -system.cpu1.dtb.read_acv 23 # DTB read access violations -system.cpu1.dtb.read_hits 2649302 # DTB read hits -system.cpu1.dtb.read_misses 4766 # DTB read misses -system.cpu1.dtb.write_accesses 186652 # DTB write accesses -system.cpu1.dtb.write_acv 37 # DTB write access violations -system.cpu1.dtb.write_hits 1963414 # DTB write hits -system.cpu1.dtb.write_misses 497 # DTB write misses -system.cpu1.idle_fraction 0.993423 # Percentage of idle cycles -system.cpu1.itb.accesses 1711918 # ITB accesses -system.cpu1.itb.acv 23 # ITB acv -system.cpu1.itb.hits 1709683 # ITB hits -system.cpu1.itb.misses 2235 # ITB misses -system.cpu1.kern.callpal 58341 # number of callpals executed +system.cpu0.kern.mode_switch_good_idle <err: div-0> # fraction of useful protection mode switches +system.cpu0.kern.mode_ticks_kernel 3730042316 99.93% 99.93% # number of ticks spent at the given mode +system.cpu0.kern.mode_ticks_user 2718822 0.07% 100.00% # number of ticks spent at the given mode +system.cpu0.kern.mode_ticks_idle 0 0.00% 100.00% # number of ticks spent at the given mode +system.cpu0.kern.swap_context 2963 # number of times the context was actually changed +system.cpu0.kern.syscall 179 # number of syscalls executed +system.cpu0.kern.syscall_fork 7 3.91% 3.91% # number of syscalls executed +system.cpu0.kern.syscall_read 14 7.82% 11.73% # number of syscalls executed +system.cpu0.kern.syscall_write 4 2.23% 13.97% # number of syscalls executed +system.cpu0.kern.syscall_close 27 15.08% 29.05% # number of syscalls executed +system.cpu0.kern.syscall_chdir 1 0.56% 29.61% # number of syscalls executed +system.cpu0.kern.syscall_obreak 6 3.35% 32.96% # number of syscalls executed +system.cpu0.kern.syscall_lseek 7 3.91% 36.87% # number of syscalls executed +system.cpu0.kern.syscall_getpid 4 2.23% 39.11% # number of syscalls executed +system.cpu0.kern.syscall_setuid 1 0.56% 39.66% # number of syscalls executed +system.cpu0.kern.syscall_getuid 3 1.68% 41.34% # number of syscalls executed +system.cpu0.kern.syscall_access 6 3.35% 44.69% # number of syscalls executed +system.cpu0.kern.syscall_dup 2 1.12% 45.81% # number of syscalls executed +system.cpu0.kern.syscall_open 30 16.76% 62.57% # number of syscalls executed +system.cpu0.kern.syscall_getgid 3 1.68% 64.25% # number of syscalls executed +system.cpu0.kern.syscall_sigprocmask 8 4.47% 68.72% # number of syscalls executed +system.cpu0.kern.syscall_ioctl 8 4.47% 73.18% # number of syscalls executed +system.cpu0.kern.syscall_execve 5 2.79% 75.98% # number of syscalls executed +system.cpu0.kern.syscall_mmap 17 9.50% 85.47% # number of syscalls executed +system.cpu0.kern.syscall_munmap 3 1.68% 87.15% # number of syscalls executed +system.cpu0.kern.syscall_mprotect 4 2.23% 89.39% # number of syscalls executed +system.cpu0.kern.syscall_gethostname 1 0.56% 89.94% # number of syscalls executed +system.cpu0.kern.syscall_dup2 2 1.12% 91.06% # number of syscalls executed +system.cpu0.kern.syscall_fcntl 8 4.47% 95.53% # number of syscalls executed +system.cpu0.kern.syscall_socket 2 1.12% 96.65% # number of syscalls executed +system.cpu0.kern.syscall_connect 2 1.12% 97.77% # number of syscalls executed +system.cpu0.kern.syscall_setgid 1 0.56% 98.32% # number of syscalls executed +system.cpu0.kern.syscall_getrlimit 1 0.56% 98.88% # number of syscalls executed +system.cpu0.kern.syscall_setsid 2 1.12% 100.00% # number of syscalls executed +system.cpu0.not_idle_fraction 0.017483 # Percentage of non-idle cycles +system.cpu0.numCycles 3734379018 # number of cpu cycles simulated +system.cpu0.num_insts 51973218 # Number of instructions executed +system.cpu0.num_refs 13496062 # Number of memory references +system.cpu1.dtb.accesses 477041 # DTB accesses +system.cpu1.dtb.acv 52 # DTB access violations +system.cpu1.dtb.hits 4561390 # DTB hits +system.cpu1.dtb.misses 4359 # DTB misses +system.cpu1.dtb.read_accesses 328551 # DTB read accesses +system.cpu1.dtb.read_acv 10 # DTB read access violations +system.cpu1.dtb.read_hits 2657400 # DTB read hits +system.cpu1.dtb.read_misses 3911 # DTB read misses +system.cpu1.dtb.write_accesses 148490 # DTB write accesses +system.cpu1.dtb.write_acv 42 # DTB write access violations +system.cpu1.dtb.write_hits 1903990 # DTB write hits +system.cpu1.dtb.write_misses 448 # DTB write misses +system.cpu1.idle_fraction 0.994927 # Percentage of idle cycles +system.cpu1.itb.accesses 1392687 # ITB accesses +system.cpu1.itb.acv 18 # ITB acv +system.cpu1.itb.hits 1391015 # ITB hits +system.cpu1.itb.misses 1672 # ITB misses +system.cpu1.kern.callpal 74370 # number of callpals executed system.cpu1.kern.callpal_cserve 1 0.00% 0.00% # number of callpals executed -system.cpu1.kern.callpal_wripir 52 0.09% 0.09% # number of callpals executed -system.cpu1.kern.callpal_wrmces 1 0.00% 0.09% # number of callpals executed -system.cpu1.kern.callpal_wrfen 1 0.00% 0.09% # number of callpals executed -system.cpu1.kern.callpal_swpctx 588 1.01% 1.10% # number of callpals executed -system.cpu1.kern.callpal_tbi 7 0.01% 1.11% # number of callpals executed -system.cpu1.kern.callpal_wrent 7 0.01% 1.13% # number of callpals executed -system.cpu1.kern.callpal_swpipl 54562 93.52% 94.65% # number of callpals executed -system.cpu1.kern.callpal_rdps 217 0.37% 95.02% # number of callpals executed -system.cpu1.kern.callpal_wrkgp 1 0.00% 95.02% # number of callpals executed -system.cpu1.kern.callpal_wrusp 4 0.01% 95.03% # number of callpals executed -system.cpu1.kern.callpal_rdusp 1 0.00% 95.03% # number of callpals executed -system.cpu1.kern.callpal_whami 3 0.01% 95.04% # number of callpals executed -system.cpu1.kern.callpal_rti 2571 4.41% 99.44% # number of callpals executed -system.cpu1.kern.callpal_callsys 208 0.36% 99.80% # number of callpals executed -system.cpu1.kern.callpal_imb 116 0.20% 100.00% # number of callpals executed +system.cpu1.kern.callpal_wripir 410 0.55% 0.55% # number of callpals executed +system.cpu1.kern.callpal_wrmces 1 0.00% 0.55% # number of callpals executed +system.cpu1.kern.callpal_wrfen 1 0.00% 0.56% # number of callpals executed +system.cpu1.kern.callpal_swpctx 2102 2.83% 3.38% # number of callpals executed +system.cpu1.kern.callpal_tbi 6 0.01% 3.39% # number of callpals executed +system.cpu1.kern.callpal_wrent 7 0.01% 3.40% # number of callpals executed +system.cpu1.kern.callpal_swpipl 65072 87.50% 90.90% # number of callpals executed +system.cpu1.kern.callpal_rdps 2603 3.50% 94.40% # number of callpals executed +system.cpu1.kern.callpal_wrkgp 1 0.00% 94.40% # number of callpals executed +system.cpu1.kern.callpal_wrusp 5 0.01% 94.41% # number of callpals executed +system.cpu1.kern.callpal_rdusp 1 0.00% 94.41% # number of callpals executed +system.cpu1.kern.callpal_whami 3 0.00% 94.41% # number of callpals executed +system.cpu1.kern.callpal_rti 3890 5.23% 99.64% # number of callpals executed +system.cpu1.kern.callpal_callsys 214 0.29% 99.93% # number of callpals executed +system.cpu1.kern.callpal_imb 52 0.07% 100.00% # number of callpals executed system.cpu1.kern.callpal_rdunique 1 0.00% 100.00% # number of callpals executed system.cpu1.kern.inst.arm 0 # number of arm instructions executed -system.cpu1.kern.inst.hwrei 67770 # number of hwrei instructions executed +system.cpu1.kern.inst.hwrei 82881 # number of hwrei instructions executed system.cpu1.kern.inst.ivlb 0 # number of ivlb instructions executed system.cpu1.kern.inst.ivle 0 # number of ivle instructions executed -system.cpu1.kern.inst.quiesce 1892 # number of quiesce instructions executed -system.cpu1.kern.ipl_count 58980 # number of times we switched to this ipl -system.cpu1.kern.ipl_count_0 25467 43.18% 43.18% # number of times we switched to this ipl -system.cpu1.kern.ipl_count_22 5476 9.28% 52.46% # number of times we switched to this ipl -system.cpu1.kern.ipl_count_30 45 0.08% 52.54% # number of times we switched to this ipl -system.cpu1.kern.ipl_count_31 27992 47.46% 100.00% # number of times we switched to this ipl -system.cpu1.kern.ipl_good 58199 # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_0 25424 43.68% 43.68% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_22 5476 9.41% 53.09% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_30 45 0.08% 53.17% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_31 27254 46.83% 100.00% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_ticks 3539434499 # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_0 3510645847 99.19% 99.19% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_22 1415637 0.04% 99.23% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_30 16792 0.00% 99.23% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_31 27356223 0.77% 100.00% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_used 0.986758 # fraction of swpipl calls that actually changed the ipl -system.cpu1.kern.ipl_used_0 0.998312 # fraction of swpipl calls that actually changed the ipl +system.cpu1.kern.inst.quiesce 2511 # number of quiesce instructions executed +system.cpu1.kern.ipl_count 71371 # number of times we switched to this ipl +system.cpu1.kern.ipl_count_0 27750 38.88% 38.88% # number of times we switched to this ipl +system.cpu1.kern.ipl_count_22 1902 2.66% 41.55% # number of times we switched to this ipl +system.cpu1.kern.ipl_count_30 506 0.71% 42.26% # number of times we switched to this ipl +system.cpu1.kern.ipl_count_31 41213 57.74% 100.00% # number of times we switched to this ipl +system.cpu1.kern.ipl_good 55758 # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_0 26928 48.29% 48.29% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_22 1902 3.41% 51.71% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_30 506 0.91% 52.61% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_31 26422 47.39% 100.00% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_ticks 3734898431 # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_0 3704872588 99.20% 99.20% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_22 224436 0.01% 99.20% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_30 162482 0.00% 99.21% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_31 29638925 0.79% 100.00% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_used 0.781242 # fraction of swpipl calls that actually changed the ipl +system.cpu1.kern.ipl_used_0 0.970378 # fraction of swpipl calls that actually changed the ipl system.cpu1.kern.ipl_used_22 1 # fraction of swpipl calls that actually changed the ipl system.cpu1.kern.ipl_used_30 1 # fraction of swpipl calls that actually changed the ipl -system.cpu1.kern.ipl_used_31 0.973635 # fraction of swpipl calls that actually changed the ipl -system.cpu1.kern.mode_good_kernel 690 -system.cpu1.kern.mode_good_user 691 -system.cpu1.kern.mode_good_idle 0 -system.cpu1.kern.mode_switch_kernel 3141 # number of protection mode switches -system.cpu1.kern.mode_switch_user 691 # number of protection mode switches -system.cpu1.kern.mode_switch_idle 0 # number of protection mode switches -system.cpu1.kern.mode_switch_good 0.360386 # fraction of useful protection mode switches -system.cpu1.kern.mode_switch_good_kernel 0.219675 # fraction of useful protection mode switches +system.cpu1.kern.ipl_used_31 0.641108 # fraction of swpipl calls that actually changed the ipl +system.cpu1.kern.mode_good_kernel 1093 +system.cpu1.kern.mode_good_user 662 +system.cpu1.kern.mode_good_idle 431 +system.cpu1.kern.mode_switch_kernel 2354 # number of protection mode switches +system.cpu1.kern.mode_switch_user 662 # number of protection mode switches +system.cpu1.kern.mode_switch_idle 2830 # number of protection mode switches +system.cpu1.kern.mode_switch_good 0.373931 # fraction of useful protection mode switches +system.cpu1.kern.mode_switch_good_kernel 0.464316 # fraction of useful protection mode switches system.cpu1.kern.mode_switch_good_user 1 # fraction of useful protection mode switches -system.cpu1.kern.mode_switch_good_idle <err: div-0> # fraction of useful protection mode switches -system.cpu1.kern.mode_ticks_kernel 3537141786 99.94% 99.94% # number of ticks spent at the given mode -system.cpu1.kern.mode_ticks_user 2292711 0.06% 100.00% # number of ticks spent at the given mode -system.cpu1.kern.mode_ticks_idle 0 0.00% 100.00% # number of ticks spent at the given mode -system.cpu1.kern.swap_context 589 # number of times the context was actually changed -system.cpu1.kern.syscall 163 # number of syscalls executed -system.cpu1.kern.syscall_fork 1 0.61% 0.61% # number of syscalls executed -system.cpu1.kern.syscall_read 13 7.98% 8.59% # number of syscalls executed -system.cpu1.kern.syscall_write 1 0.61% 9.20% # number of syscalls executed -system.cpu1.kern.syscall_close 13 7.98% 17.18% # number of syscalls executed -system.cpu1.kern.syscall_obreak 18 11.04% 28.22% # number of syscalls executed -system.cpu1.kern.syscall_lseek 4 2.45% 30.67% # number of syscalls executed -system.cpu1.kern.syscall_getpid 2 1.23% 31.90% # number of syscalls executed -system.cpu1.kern.syscall_setuid 2 1.23% 33.13% # number of syscalls executed -system.cpu1.kern.syscall_getuid 4 2.45% 35.58% # number of syscalls executed -system.cpu1.kern.syscall_open 28 17.18% 52.76% # number of syscalls executed -system.cpu1.kern.syscall_getgid 4 2.45% 55.21% # number of syscalls executed -system.cpu1.kern.syscall_sigprocmask 2 1.23% 56.44% # number of syscalls executed -system.cpu1.kern.syscall_ioctl 3 1.84% 58.28% # number of syscalls executed -system.cpu1.kern.syscall_readlink 1 0.61% 58.90% # number of syscalls executed -system.cpu1.kern.syscall_execve 1 0.61% 59.51% # number of syscalls executed -system.cpu1.kern.syscall_pre_F64_stat 9 5.52% 65.03% # number of syscalls executed -system.cpu1.kern.syscall_mmap 27 16.56% 81.60% # number of syscalls executed -system.cpu1.kern.syscall_munmap 2 1.23% 82.82% # number of syscalls executed -system.cpu1.kern.syscall_mprotect 7 4.29% 87.12% # number of syscalls executed -system.cpu1.kern.syscall_gethostname 1 0.61% 87.73% # number of syscalls executed -system.cpu1.kern.syscall_dup2 1 0.61% 88.34% # number of syscalls executed -system.cpu1.kern.syscall_pre_F64_fstat 13 7.98% 96.32% # number of syscalls executed -system.cpu1.kern.syscall_fcntl 3 1.84% 98.16% # number of syscalls executed -system.cpu1.kern.syscall_setgid 2 1.23% 99.39% # number of syscalls executed -system.cpu1.kern.syscall_getrlimit 1 0.61% 100.00% # number of syscalls executed -system.cpu1.not_idle_fraction 0.006577 # Percentage of non-idle cycles -system.cpu1.numCycles 0 # number of cpu cycles simulated -system.cpu1.num_insts 18640662 # Number of instructions executed -system.cpu1.num_refs 4633112 # Number of memory references +system.cpu1.kern.mode_switch_good_idle 0.152297 # fraction of useful protection mode switches +system.cpu1.kern.mode_ticks_kernel 13359666 0.36% 0.36% # number of ticks spent at the given mode +system.cpu1.kern.mode_ticks_user 1967356 0.05% 0.41% # number of ticks spent at the given mode +system.cpu1.kern.mode_ticks_idle 3719571407 99.59% 100.00% # number of ticks spent at the given mode +system.cpu1.kern.swap_context 2103 # number of times the context was actually changed +system.cpu1.kern.syscall 150 # number of syscalls executed +system.cpu1.kern.syscall_fork 1 0.67% 0.67% # number of syscalls executed +system.cpu1.kern.syscall_read 16 10.67% 11.33% # number of syscalls executed +system.cpu1.kern.syscall_close 16 10.67% 22.00% # number of syscalls executed +system.cpu1.kern.syscall_chmod 1 0.67% 22.67% # number of syscalls executed +system.cpu1.kern.syscall_obreak 9 6.00% 28.67% # number of syscalls executed +system.cpu1.kern.syscall_lseek 3 2.00% 30.67% # number of syscalls executed +system.cpu1.kern.syscall_getpid 2 1.33% 32.00% # number of syscalls executed +system.cpu1.kern.syscall_setuid 3 2.00% 34.00% # number of syscalls executed +system.cpu1.kern.syscall_getuid 3 2.00% 36.00% # number of syscalls executed +system.cpu1.kern.syscall_access 5 3.33% 39.33% # number of syscalls executed +system.cpu1.kern.syscall_open 25 16.67% 56.00% # number of syscalls executed +system.cpu1.kern.syscall_getgid 3 2.00% 58.00% # number of syscalls executed +system.cpu1.kern.syscall_sigprocmask 2 1.33% 59.33% # number of syscalls executed +system.cpu1.kern.syscall_ioctl 2 1.33% 60.67% # number of syscalls executed +system.cpu1.kern.syscall_readlink 1 0.67% 61.33% # number of syscalls executed +system.cpu1.kern.syscall_execve 2 1.33% 62.67% # number of syscalls executed +system.cpu1.kern.syscall_mmap 37 24.67% 87.33% # number of syscalls executed +system.cpu1.kern.syscall_mprotect 12 8.00% 95.33% # number of syscalls executed +system.cpu1.kern.syscall_dup2 1 0.67% 96.00% # number of syscalls executed +system.cpu1.kern.syscall_fcntl 2 1.33% 97.33% # number of syscalls executed +system.cpu1.kern.syscall_setgid 3 2.00% 99.33% # number of syscalls executed +system.cpu1.kern.syscall_getrlimit 1 0.67% 100.00% # number of syscalls executed +system.cpu1.not_idle_fraction 0.005073 # Percentage of non-idle cycles +system.cpu1.numCycles 3734898877 # number of cpu cycles simulated +system.cpu1.num_insts 14364039 # Number of instructions executed +system.cpu1.num_refs 4590544 # Number of memory references system.disk0.dma_read_bytes 1024 # Number of bytes transfered via DMA reads (not PRD). system.disk0.dma_read_full_pages 0 # Number of full page size DMA reads (not PRD). system.disk0.dma_read_txs 1 # Number of DMA read transactions (not PRD). -system.disk0.dma_write_bytes 2521088 # Number of bytes transfered via DMA writes. -system.disk0.dma_write_full_pages 285 # Number of full page size DMA writes. -system.disk0.dma_write_txs 375 # Number of DMA write transactions. +system.disk0.dma_write_bytes 2702336 # Number of bytes transfered via DMA writes. +system.disk0.dma_write_full_pages 302 # Number of full page size DMA writes. +system.disk0.dma_write_txs 408 # Number of DMA write transactions. system.disk2.dma_read_bytes 0 # Number of bytes transfered via DMA reads (not PRD). system.disk2.dma_read_full_pages 0 # Number of full page size DMA reads (not PRD). system.disk2.dma_read_txs 0 # Number of DMA read transactions (not PRD). diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr index 2191bd088..64d80c0d2 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr @@ -1,6 +1,6 @@ 0: system.tsunami.io.rtc: Real-time clock set to Sun Jan 1 00:00:00 2006 -Listening for console connection on port 3456 -0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 -0: system.remote_gdb.listener: listening for remote gdb #1 on port 7001 +Listening for console connection on port 3457 +0: system.remote_gdb.listener: listening for remote gdb #0 on port 7001 +0: system.remote_gdb.listener: listening for remote gdb #1 on port 7002 warn: Entering event queue @ 0. Starting simulation... -warn: 271342: Trying to launch CPU number 1! +warn: 271343: Trying to launch CPU number 1! diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout index 33c194686..0e22ad636 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout @@ -5,8 +5,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:32:34 -M5 started Tue Sep 5 15:45:11 2006 -M5 executing on zizzer.eecs.umich.edu +M5 compiled Oct 10 2006 01:59:16 +M5 started Tue Oct 10 02:09:13 2006 +M5 executing on zamp.eecs.umich.edu command line: build/ALPHA_FS/m5.opt -d build/ALPHA_FS/tests/opt/quick/10.linux-boot/alpha/linux/tsunami-simple-timing-dual tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-timing-dual -Exiting @ tick 3539435029 because m5_exit instruction encountered +Exiting @ tick 3734898877 because m5_exit instruction encountered diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini index 2a354dee0..6514a6af7 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini @@ -58,6 +58,7 @@ mem_mode=timing pal=/dist/m5/system/binaries/ts_osfpal physmem=system.physmem readfile=tests/halt.sh +symbolfile= system_rev=1024 system_type=34 @@ -74,7 +75,7 @@ side_b=system.membus.port[0] type=TimingSimpleCPU children=dtb itb clock=1 -cpu_id=-1 +cpu_id=0 defer_registration=false dtb=system.cpu.dtb function_trace=false @@ -86,6 +87,7 @@ max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem profile=0 +progress_interval=0 system=system dcache_port=system.membus.port[3] icache_port=system.membus.port[2] @@ -143,12 +145,16 @@ cpu=system.cpu [system.iobus] type=Bus bus_id=0 +clock=2 +width=64 default=system.tsunami.pciconfig.pio port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio system.tsunami.fake_sm_chip.pio system.tsunami.fake_uart1.pio system.tsunami.fake_uart2.pio system.tsunami.fake_uart3.pio system.tsunami.fake_uart4.pio system.tsunami.fake_ppc.pio system.tsunami.fake_OROM.pio system.tsunami.fake_pnp_addr.pio system.tsunami.fake_pnp_write.pio system.tsunami.fake_pnp_read0.pio system.tsunami.fake_pnp_read1.pio system.tsunami.fake_pnp_read2.pio system.tsunami.fake_pnp_read3.pio system.tsunami.fake_pnp_read4.pio system.tsunami.fake_pnp_read5.pio system.tsunami.fake_pnp_read6.pio system.tsunami.fake_pnp_read7.pio system.tsunami.fake_ata0.pio system.tsunami.fake_ata1.pio system.tsunami.fb.pio system.tsunami.io.pio system.tsunami.uart.pio system.tsunami.console.pio system.tsunami.ide.pio system.tsunami.ethernet.pio system.tsunami.ethernet.config system.tsunami.ethernet.dma system.tsunami.ide.config system.tsunami.ide.dma [system.membus] type=Bus bus_id=1 +clock=2 +width=64 port=system.bridge.side_b system.physmem.port system.cpu.icache_port system.cpu.dcache_port [system.physmem] @@ -552,6 +558,7 @@ pio=system.iobus.port[24] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out index 1b99934c9..173819299 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out @@ -21,6 +21,7 @@ console=/dist/m5/system/binaries/console pal=/dist/m5/system/binaries/ts_osfpal boot_osflags=root=/dev/hda1 console=ttyS0 readfile=tests/halt.sh +symbolfile= init_param=0 system_type=34 system_rev=1024 @@ -28,6 +29,8 @@ system_rev=1024 [system.membus] type=Bus bus_id=1 +clock=2 +width=64 [system.bridge] type=Bridge @@ -86,11 +89,12 @@ max_insts_any_thread=0 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=0 itb=system.cpu.itb dtb=system.cpu.dtb -cpu_id=-1 profile=0 clock=1 defer_registration=false @@ -461,10 +465,13 @@ disks=system.disk0 system.disk2 [system.iobus] type=Bus bus_id=0 +clock=2 +width=64 [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -508,6 +515,9 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + [pseudo_inst] quiesce=true statistics=true diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/console.system.sim_console b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/console.system.sim_console index ea7a20777..5461cc4ab 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/console.system.sim_console +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/console.system.sim_console @@ -3,7 +3,7 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
memsize 8000000 pages 4000
First free page after ROM 0xFFFFFC0000018000
HWRPB 0xFFFFFC0000018000 l1pt 0xFFFFFC0000040000 l2pt 0xFFFFFC0000042000 l3pt_rpb 0xFFFFFC0000044000 l3pt_kernel 0xFFFFFC0000048000 l2reserv 0xFFFFFC0000046000 -
kstart = 0xFFFFFC0000310000, kend = 0xFFFFFC00008064E8, kentry = 0xFFFFFC0000310000, numCPUs = 0x1 +
kstart = 0xFFFFFC0000310000, kend = 0xFFFFFC0000855898, kentry = 0xFFFFFC0000310000, numCPUs = 0x1
CPU Clock at 2000 MHz IntrClockFrequency=1024
Booting with 1 processor(s)
KSP: 0x20043FE8 PTBR 0x20 @@ -14,28 +14,26 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
ConsoleDispatch at virt 10000658 phys 18658 val FFFFFC00000100A8
unix_boot_mem ends at FFFFFC0000076000
k_argc = 0 -
jumping to kernel at 0xFFFFFC0000310000, (PCBB 0xFFFFFC0000018180 pfn 1028) -
CallbackFixup 0 18000, t7=FFFFFC0000700000 -
Linux version 2.6.8.1 (binkertn@ziff.eecs.umich.edu) (gcc version 3.4.3) #36 SMP Mon May 2 19:50:53 EDT 2005 +
jumping to kernel at 0xFFFFFC0000310000, (PCBB 0xFFFFFC0000018180 pfn 1067) +
CallbackFixup 0 18000, t7=FFFFFC000070C000 +
Linux version 2.6.13 (hsul@zed.eecs.umich.edu) (gcc version 3.4.3) #1 SMP Sun Oct 8 19:52:07 EDT 2006
Booting GENERIC on Tsunami variation DP264 using machine vector DP264 from SRM
Major Options: SMP LEGACY_START VERBOSE_MCHECK
Command line: root=/dev/hda1 console=ttyS0
memcluster 0, usage 1, start 0, end 392
memcluster 1, usage 0, start 392, end 16384 -
freeing pages 1030:16384 -
reserving pages 1030:1031 +
freeing pages 1069:16384 +
reserving pages 1069:1070
SMP: 1 CPUs probed -- cpu_present_mask = 1
Built 1 zonelists
Kernel command line: root=/dev/hda1 console=ttyS0 -
PID hash table entries: 1024 (order 10: 16384 bytes) +
PID hash table entries: 1024 (order: 10, 32768 bytes)
Using epoch = 1900
Console: colour dummy device 80x25
Dentry cache hash table entries: 32768 (order: 5, 262144 bytes)
Inode-cache hash table entries: 16384 (order: 4, 131072 bytes) -
Memory: 119072k/131072k available (3058k kernel code, 8680k reserved, 695k data, 480k init) -
Mount-cache hash table entries: 512 (order: 0, 8192 bytes) -
per-CPU timeslice cutoff: 374.49 usecs. -
task migration cache decay timeout: 0 msecs. +
Memory: 118784k/131072k available (3314k kernel code, 8952k reserved, 983k data, 224k init) +
Mount-cache hash table entries: 512
SMP mode deactivated.
Brought up 1 CPUs
SMP: Total of 1 processors activated (4002.20 BogoMIPS). @@ -48,16 +46,21 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
Initializing Cryptographic API
rtc: Standard PC (1900) epoch (1900) detected
Real Time Clock Driver v1.12 -
Serial: 8250/16550 driver $Revision: 1.90 $ 5 ports, IRQ sharing disabled +
Serial: 8250/16550 driver $Revision: 1.90 $ 1 ports, IRQ sharing disabled
ttyS0 at I/O 0x3f8 (irq = 4) is a 8250 +
io scheduler noop registered +
io scheduler anticipatory registered +
io scheduler deadline registered +
io scheduler cfq registered
loop: loaded (max 8 devices) -
Using anticipatory io scheduler
nbd: registered device at major 43 -
sinic.c: M5 Simple Integrated NIC driver
ns83820.c: National Semiconductor DP83820 10/100/1000 driver.
eth0: ns83820.c: 0x22c: 00000000, subsystem: 0000:0000
eth0: enabling optical transceiver -
eth0: ns83820 v0.20: DP83820 v1.3: 00:90:00:00:00:01 io=0x09000000 irq=30 f=sg +
eth0: using 64 bit addressing. +
eth0: ns83820 v0.22: DP83820 v1.3: 00:90:00:00:00:01 io=0x09000000 irq=30 f=h,sg +
tun: Universal TUN/TAP device driver, 1.6 +
tun: (C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>
Uniform Multi-Platform E-IDE driver Revision: 7.00alpha2
ide: Assuming 33MHz system bus speed for PIO modes; override with idebus=xx
PIIX4: IDE controller at PCI slot 0000:00:00.0 @@ -69,25 +72,24 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
hdb: M5 IDE Disk, ATA DISK drive
ide0 at 0x8410-0x8417,0x8422 on irq 31
hda: max request size: 128KiB -
hda: 163296 sectors (83 MB), CHS=162/16/63, UDMA(33) +
hda: 511056 sectors (261 MB), CHS=507/16/63, UDMA(33) +
hda: cache flushes not supported
hda: hda1
hdb: max request size: 128KiB
hdb: 4177920 sectors (2139 MB), CHS=4144/16/63, UDMA(33) +
hdb: cache flushes not supported
hdb: unknown partition table -
scsi0 : scsi_m5, version 1.73 [20040518], dev_size_mb=8, opts=0x0 -
Vendor: Linux Model: scsi_m5 Li Rev: 0004 -
Type: Direct-Access ANSI SCSI revision: 03 -
SCSI device sda: 16384 512-byte hdwr sectors (8 MB) -
SCSI device sda: drive cache: write back -
sda: unknown partition table -
Attached scsi disk sda at scsi0, channel 0, id 0, lun 0
mice: PS/2 mouse device common for all mice
NET: Registered protocol family 2 -
IP: routing cache hash table of 1024 buckets, 16Kbytes -
TCP: Hash tables configured (established 8192 bind 8192) -
ip_conntrack version 2.1 (512 buckets, 4096 max) - 440 bytes per conntrack +
IP route cache hash table entries: 4096 (order: 2, 32768 bytes) +
TCP established hash table entries: 16384 (order: 5, 262144 bytes) +
TCP bind hash table entries: 16384 (order: 5, 262144 bytes) +
TCP: Hash tables configured (established 16384 bind 16384) +
TCP reno registered +
ip_conntrack version 2.1 (512 buckets, 4096 max) - 296 bytes per conntrack
ip_tables: (C) 2000-2002 Netfilter core team
arp_tables: (C) 2002 David S. Miller +
TCP bic registered
Initializing IPsec netlink socket
NET: Registered protocol family 1
NET: Registered protocol family 17 @@ -96,11 +98,7 @@ M5 console: m5AlphaAccess @ 0xFFFFFD0200000000
802.1Q VLAN Support v1.8 Ben Greear <greearb@candelatech.com>
All bugs added by David S. Miller <davem@redhat.com>
VFS: Mounted root (ext2 filesystem) readonly. -
Freeing unused kernel memory: 480k freed -
init started: BusyBox v1.00-rc2 (2004.11.18-16:22+0000) multi-call binary - -PTXdist-0.7.0 (2004-11-18T11:23:40-0500) - +
Freeing unused kernel memory: 224k freed +
init started: BusyBox v1.1.0 (2006.08.17-02:54+0000) multi-call binary mounting filesystems... -EXT2-fs warning: checktime reached, running e2fsck is recommended -
loading script... +loading script... diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt index 0adb4cc31..c126b03a3 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt @@ -1,130 +1,127 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 835908 # Simulator instruction rate (inst/s) -host_mem_usage 194192 # Number of bytes of host memory used -host_seconds 71.68 # Real time elapsed on the host -host_tick_rate 48916813 # Simulator tick rate (ticks/s) +host_inst_rate 244619 # Simulator instruction rate (inst/s) +host_mem_usage 197804 # Number of bytes of host memory used +host_seconds 252.48 # Real time elapsed on the host +host_tick_rate 14464234 # Simulator tick rate (ticks/s) sim_freq 2000000000 # Frequency of simulated ticks -sim_insts 59915182 # Number of instructions simulated -sim_seconds 1.753109 # Number of seconds simulated -sim_ticks 3506218170 # Number of ticks simulated -system.cpu.dtb.accesses 2354955 # DTB accesses -system.cpu.dtb.acv 413 # DTB access violations -system.cpu.dtb.hits 13926686 # DTB hits -system.cpu.dtb.misses 16187 # DTB misses -system.cpu.dtb.read_accesses 832415 # DTB read accesses -system.cpu.dtb.read_acv 242 # DTB read access violations -system.cpu.dtb.read_hits 7716658 # DTB read hits -system.cpu.dtb.read_misses 13695 # DTB read misses -system.cpu.dtb.write_accesses 1522540 # DTB write accesses -system.cpu.dtb.write_acv 171 # DTB write access violations -system.cpu.dtb.write_hits 6210028 # DTB write hits -system.cpu.dtb.write_misses 2492 # DTB write misses -system.cpu.idle_fraction 0.978925 # Percentage of idle cycles -system.cpu.itb.accesses 4037381 # ITB accesses -system.cpu.itb.acv 239 # ITB acv -system.cpu.itb.hits 4030657 # ITB hits -system.cpu.itb.misses 6724 # ITB misses -system.cpu.kern.callpal 183644 # number of callpals executed +sim_insts 61760478 # Number of instructions simulated +sim_seconds 1.825937 # Number of seconds simulated +sim_ticks 3651873858 # Number of ticks simulated +system.cpu.dtb.accesses 1304494 # DTB accesses +system.cpu.dtb.acv 367 # DTB access violations +system.cpu.dtb.hits 16545335 # DTB hits +system.cpu.dtb.misses 11425 # DTB misses +system.cpu.dtb.read_accesses 900425 # DTB read accesses +system.cpu.dtb.read_acv 210 # DTB read access violations +system.cpu.dtb.read_hits 10034117 # DTB read hits +system.cpu.dtb.read_misses 10280 # DTB read misses +system.cpu.dtb.write_accesses 404069 # DTB write accesses +system.cpu.dtb.write_acv 157 # DTB write access violations +system.cpu.dtb.write_hits 6511218 # DTB write hits +system.cpu.dtb.write_misses 1145 # DTB write misses +system.cpu.idle_fraction 0.978539 # Percentage of idle cycles +system.cpu.itb.accesses 3281311 # ITB accesses +system.cpu.itb.acv 184 # ITB acv +system.cpu.itb.hits 3276321 # ITB hits +system.cpu.itb.misses 4990 # ITB misses +system.cpu.kern.callpal 193987 # number of callpals executed system.cpu.kern.callpal_cserve 1 0.00% 0.00% # number of callpals executed system.cpu.kern.callpal_wrmces 1 0.00% 0.00% # number of callpals executed system.cpu.kern.callpal_wrfen 1 0.00% 0.00% # number of callpals executed system.cpu.kern.callpal_wrvptptr 1 0.00% 0.00% # number of callpals executed -system.cpu.kern.callpal_swpctx 1861 1.01% 1.02% # number of callpals executed -system.cpu.kern.callpal_tbi 28 0.02% 1.03% # number of callpals executed -system.cpu.kern.callpal_wrent 7 0.00% 1.03% # number of callpals executed -system.cpu.kern.callpal_swpipl 171635 93.46% 94.50% # number of callpals executed -system.cpu.kern.callpal_rdps 4808 2.62% 97.11% # number of callpals executed -system.cpu.kern.callpal_wrkgp 1 0.00% 97.11% # number of callpals executed -system.cpu.kern.callpal_wrusp 8 0.00% 97.12% # number of callpals executed -system.cpu.kern.callpal_rdusp 12 0.01% 97.12% # number of callpals executed -system.cpu.kern.callpal_whami 2 0.00% 97.13% # number of callpals executed -system.cpu.kern.callpal_rti 4297 2.34% 99.47% # number of callpals executed -system.cpu.kern.callpal_callsys 667 0.36% 99.83% # number of callpals executed -system.cpu.kern.callpal_imb 314 0.17% 100.00% # number of callpals executed +system.cpu.kern.callpal_swpctx 4203 2.17% 2.17% # number of callpals executed +system.cpu.kern.callpal_tbi 54 0.03% 2.20% # number of callpals executed +system.cpu.kern.callpal_wrent 7 0.00% 2.20% # number of callpals executed +system.cpu.kern.callpal_swpipl 176881 91.18% 93.38% # number of callpals executed +system.cpu.kern.callpal_rdps 6888 3.55% 96.93% # number of callpals executed +system.cpu.kern.callpal_wrkgp 1 0.00% 96.93% # number of callpals executed +system.cpu.kern.callpal_wrusp 7 0.00% 96.94% # number of callpals executed +system.cpu.kern.callpal_rdusp 9 0.00% 96.94% # number of callpals executed +system.cpu.kern.callpal_whami 2 0.00% 96.94% # number of callpals executed +system.cpu.kern.callpal_rti 5219 2.69% 99.63% # number of callpals executed +system.cpu.kern.callpal_callsys 531 0.27% 99.91% # number of callpals executed +system.cpu.kern.callpal_imb 181 0.09% 100.00% # number of callpals executed system.cpu.kern.inst.arm 0 # number of arm instructions executed -system.cpu.kern.inst.hwrei 209285 # number of hwrei instructions executed +system.cpu.kern.inst.hwrei 213061 # number of hwrei instructions executed system.cpu.kern.inst.ivlb 0 # number of ivlb instructions executed system.cpu.kern.inst.ivle 0 # number of ivle instructions executed -system.cpu.kern.inst.quiesce 1867 # number of quiesce instructions executed -system.cpu.kern.ipl_count 178009 # number of times we switched to this ipl -system.cpu.kern.ipl_count_0 75254 42.28% 42.28% # number of times we switched to this ipl -system.cpu.kern.ipl_count_21 286 0.16% 42.44% # number of times we switched to this ipl -system.cpu.kern.ipl_count_22 5465 3.07% 45.51% # number of times we switched to this ipl -system.cpu.kern.ipl_count_31 97004 54.49% 100.00% # number of times we switched to this ipl -system.cpu.kern.ipl_good 159802 # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_0 75188 47.05% 47.05% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_21 286 0.18% 47.23% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_22 5465 3.42% 50.65% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_31 78863 49.35% 100.00% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_ticks 3506217640 # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_0 3478896122 99.22% 99.22% # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_21 60705 0.00% 99.22% # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_22 1274059 0.04% 99.26% # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_31 25986754 0.74% 100.00% # number of cycles we spent at this ipl -system.cpu.kern.ipl_used 0.897719 # fraction of swpipl calls that actually changed the ipl -system.cpu.kern.ipl_used_0 0.999123 # fraction of swpipl calls that actually changed the ipl +system.cpu.kern.inst.quiesce 6207 # number of quiesce instructions executed +system.cpu.kern.ipl_count 184207 # number of times we switched to this ipl +system.cpu.kern.ipl_count_0 75390 40.93% 40.93% # number of times we switched to this ipl +system.cpu.kern.ipl_count_21 245 0.13% 41.06% # number of times we switched to this ipl +system.cpu.kern.ipl_count_22 1861 1.01% 42.07% # number of times we switched to this ipl +system.cpu.kern.ipl_count_31 106711 57.93% 100.00% # number of times we switched to this ipl +system.cpu.kern.ipl_good 150152 # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_good_0 74023 49.30% 49.30% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_good_21 245 0.16% 49.46% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_good_22 1861 1.24% 50.70% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_good_31 74023 49.30% 100.00% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_ticks 3651873412 # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_0 3611240657 98.89% 98.89% # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_21 53683 0.00% 98.89% # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_22 219598 0.01% 98.89% # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_31 40359474 1.11% 100.00% # number of cycles we spent at this ipl +system.cpu.kern.ipl_used 0.815126 # fraction of swpipl calls that actually changed the ipl +system.cpu.kern.ipl_used_0 0.981868 # fraction of swpipl calls that actually changed the ipl system.cpu.kern.ipl_used_21 1 # fraction of swpipl calls that actually changed the ipl system.cpu.kern.ipl_used_22 1 # fraction of swpipl calls that actually changed the ipl -system.cpu.kern.ipl_used_31 0.812987 # fraction of swpipl calls that actually changed the ipl -system.cpu.kern.mode_good_kernel 2339 -system.cpu.kern.mode_good_user 2168 -system.cpu.kern.mode_good_idle 171 -system.cpu.kern.mode_switch_kernel 4093 # number of protection mode switches -system.cpu.kern.mode_switch_user 2168 # number of protection mode switches -system.cpu.kern.mode_switch_idle 2043 # number of protection mode switches -system.cpu.kern.mode_switch_good 0.563343 # fraction of useful protection mode switches -system.cpu.kern.mode_switch_good_kernel 0.571463 # fraction of useful protection mode switches +system.cpu.kern.ipl_used_31 0.693677 # fraction of swpipl calls that actually changed the ipl +system.cpu.kern.mode_good_kernel 1934 +system.cpu.kern.mode_good_user 1754 +system.cpu.kern.mode_good_idle 180 +system.cpu.kern.mode_switch_kernel 5984 # number of protection mode switches +system.cpu.kern.mode_switch_user 1754 # number of protection mode switches +system.cpu.kern.mode_switch_idle 2104 # number of protection mode switches +system.cpu.kern.mode_switch_good 0.393010 # fraction of useful protection mode switches +system.cpu.kern.mode_switch_good_kernel 0.323195 # fraction of useful protection mode switches system.cpu.kern.mode_switch_good_user 1 # fraction of useful protection mode switches -system.cpu.kern.mode_switch_good_idle 0.083700 # fraction of useful protection mode switches -system.cpu.kern.mode_ticks_kernel 40644475 1.16% 1.16% # number of ticks spent at the given mode -system.cpu.kern.mode_ticks_user 5527486 0.16% 1.32% # number of ticks spent at the given mode -system.cpu.kern.mode_ticks_idle 3460045677 98.68% 100.00% # number of ticks spent at the given mode -system.cpu.kern.swap_context 1862 # number of times the context was actually changed -system.cpu.kern.syscall 475 # number of syscalls executed -system.cpu.kern.syscall_fork 10 2.11% 2.11% # number of syscalls executed -system.cpu.kern.syscall_read 33 6.95% 9.05% # number of syscalls executed -system.cpu.kern.syscall_write 7 1.47% 10.53% # number of syscalls executed -system.cpu.kern.syscall_close 49 10.32% 20.84% # number of syscalls executed -system.cpu.kern.syscall_chdir 1 0.21% 21.05% # number of syscalls executed -system.cpu.kern.syscall_chmod 1 0.21% 21.26% # number of syscalls executed -system.cpu.kern.syscall_obreak 44 9.26% 30.53% # number of syscalls executed -system.cpu.kern.syscall_lseek 13 2.74% 33.26% # number of syscalls executed -system.cpu.kern.syscall_getpid 10 2.11% 35.37% # number of syscalls executed -system.cpu.kern.syscall_setuid 4 0.84% 36.21% # number of syscalls executed -system.cpu.kern.syscall_getuid 8 1.68% 37.89% # number of syscalls executed -system.cpu.kern.syscall_access 4 0.84% 38.74% # number of syscalls executed -system.cpu.kern.syscall_dup 4 0.84% 39.58% # number of syscalls executed -system.cpu.kern.syscall_open 68 14.32% 53.89% # number of syscalls executed -system.cpu.kern.syscall_getgid 8 1.68% 55.58% # number of syscalls executed -system.cpu.kern.syscall_sigprocmask 14 2.95% 58.53% # number of syscalls executed -system.cpu.kern.syscall_ioctl 16 3.37% 61.89% # number of syscalls executed -system.cpu.kern.syscall_readlink 2 0.42% 62.32% # number of syscalls executed -system.cpu.kern.syscall_execve 8 1.68% 64.00% # number of syscalls executed -system.cpu.kern.syscall_pre_F64_stat 31 6.53% 70.53% # number of syscalls executed -system.cpu.kern.syscall_pre_F64_lstat 1 0.21% 70.74% # number of syscalls executed -system.cpu.kern.syscall_mmap 55 11.58% 82.32% # number of syscalls executed -system.cpu.kern.syscall_munmap 6 1.26% 83.58% # number of syscalls executed -system.cpu.kern.syscall_mprotect 14 2.95% 86.53% # number of syscalls executed -system.cpu.kern.syscall_gethostname 2 0.42% 86.95% # number of syscalls executed -system.cpu.kern.syscall_dup2 4 0.84% 87.79% # number of syscalls executed -system.cpu.kern.syscall_pre_F64_fstat 28 5.89% 93.68% # number of syscalls executed -system.cpu.kern.syscall_fcntl 14 2.95% 96.63% # number of syscalls executed -system.cpu.kern.syscall_socket 3 0.63% 97.26% # number of syscalls executed -system.cpu.kern.syscall_connect 3 0.63% 97.89% # number of syscalls executed -system.cpu.kern.syscall_setgid 4 0.84% 98.74% # number of syscalls executed -system.cpu.kern.syscall_getrlimit 3 0.63% 99.37% # number of syscalls executed -system.cpu.kern.syscall_setsid 3 0.63% 100.00% # number of syscalls executed -system.cpu.not_idle_fraction 0.021075 # Percentage of non-idle cycles -system.cpu.numCycles 0 # number of cpu cycles simulated -system.cpu.num_insts 59915182 # Number of instructions executed -system.cpu.num_refs 13979549 # Number of memory references +system.cpu.kern.mode_switch_good_idle 0.085551 # fraction of useful protection mode switches +system.cpu.kern.mode_ticks_kernel 58926919 1.61% 1.61% # number of ticks spent at the given mode +system.cpu.kern.mode_ticks_user 4685602 0.13% 1.74% # number of ticks spent at the given mode +system.cpu.kern.mode_ticks_idle 3588260889 98.26% 100.00% # number of ticks spent at the given mode +system.cpu.kern.swap_context 4204 # number of times the context was actually changed +system.cpu.kern.syscall 329 # number of syscalls executed +system.cpu.kern.syscall_fork 8 2.43% 2.43% # number of syscalls executed +system.cpu.kern.syscall_read 30 9.12% 11.55% # number of syscalls executed +system.cpu.kern.syscall_write 4 1.22% 12.77% # number of syscalls executed +system.cpu.kern.syscall_close 43 13.07% 25.84% # number of syscalls executed +system.cpu.kern.syscall_chdir 1 0.30% 26.14% # number of syscalls executed +system.cpu.kern.syscall_chmod 1 0.30% 26.44% # number of syscalls executed +system.cpu.kern.syscall_obreak 15 4.56% 31.00% # number of syscalls executed +system.cpu.kern.syscall_lseek 10 3.04% 34.04% # number of syscalls executed +system.cpu.kern.syscall_getpid 6 1.82% 35.87% # number of syscalls executed +system.cpu.kern.syscall_setuid 4 1.22% 37.08% # number of syscalls executed +system.cpu.kern.syscall_getuid 6 1.82% 38.91% # number of syscalls executed +system.cpu.kern.syscall_access 11 3.34% 42.25% # number of syscalls executed +system.cpu.kern.syscall_dup 2 0.61% 42.86% # number of syscalls executed +system.cpu.kern.syscall_open 55 16.72% 59.57% # number of syscalls executed +system.cpu.kern.syscall_getgid 6 1.82% 61.40% # number of syscalls executed +system.cpu.kern.syscall_sigprocmask 10 3.04% 64.44% # number of syscalls executed +system.cpu.kern.syscall_ioctl 10 3.04% 67.48% # number of syscalls executed +system.cpu.kern.syscall_readlink 1 0.30% 67.78% # number of syscalls executed +system.cpu.kern.syscall_execve 7 2.13% 69.91% # number of syscalls executed +system.cpu.kern.syscall_mmap 54 16.41% 86.32% # number of syscalls executed +system.cpu.kern.syscall_munmap 3 0.91% 87.23% # number of syscalls executed +system.cpu.kern.syscall_mprotect 16 4.86% 92.10% # number of syscalls executed +system.cpu.kern.syscall_gethostname 1 0.30% 92.40% # number of syscalls executed +system.cpu.kern.syscall_dup2 3 0.91% 93.31% # number of syscalls executed +system.cpu.kern.syscall_fcntl 10 3.04% 96.35% # number of syscalls executed +system.cpu.kern.syscall_socket 2 0.61% 96.96% # number of syscalls executed +system.cpu.kern.syscall_connect 2 0.61% 97.57% # number of syscalls executed +system.cpu.kern.syscall_setgid 4 1.22% 98.78% # number of syscalls executed +system.cpu.kern.syscall_getrlimit 2 0.61% 99.39% # number of syscalls executed +system.cpu.kern.syscall_setsid 2 0.61% 100.00% # number of syscalls executed +system.cpu.not_idle_fraction 0.021461 # Percentage of non-idle cycles +system.cpu.numCycles 3651873858 # number of cpu cycles simulated +system.cpu.num_insts 61760478 # Number of instructions executed +system.cpu.num_refs 16793874 # Number of memory references system.disk0.dma_read_bytes 1024 # Number of bytes transfered via DMA reads (not PRD). system.disk0.dma_read_full_pages 0 # Number of full page size DMA reads (not PRD). system.disk0.dma_read_txs 1 # Number of DMA read transactions (not PRD). -system.disk0.dma_write_bytes 2521088 # Number of bytes transfered via DMA writes. -system.disk0.dma_write_full_pages 285 # Number of full page size DMA writes. -system.disk0.dma_write_txs 375 # Number of DMA write transactions. +system.disk0.dma_write_bytes 2702336 # Number of bytes transfered via DMA writes. +system.disk0.dma_write_full_pages 302 # Number of full page size DMA writes. +system.disk0.dma_write_txs 408 # Number of DMA write transactions. system.disk2.dma_read_bytes 0 # Number of bytes transfered via DMA reads (not PRD). system.disk2.dma_read_full_pages 0 # Number of full page size DMA reads (not PRD). system.disk2.dma_read_txs 0 # Number of DMA read transactions (not PRD). diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr index 6204251a5..4741dd710 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr @@ -1,4 +1,4 @@ 0: system.tsunami.io.rtc: Real-time clock set to Sun Jan 1 00:00:00 2006 -Listening for console connection on port 3456 -0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 +Listening for console connection on port 3457 +0: system.remote_gdb.listener: listening for remote gdb #0 on port 7001 warn: Entering event queue @ 0. Starting simulation... diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout index 2739943d2..2ffd4c8b9 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout @@ -5,8 +5,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:32:34 -M5 started Tue Sep 5 15:43:59 2006 -M5 executing on zizzer.eecs.umich.edu +M5 compiled Oct 10 2006 01:59:16 +M5 started Tue Oct 10 02:04:59 2006 +M5 executing on zamp.eecs.umich.edu command line: build/ALPHA_FS/m5.opt -d build/ALPHA_FS/tests/opt/quick/10.linux-boot/alpha/linux/tsunami-simple-timing tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-timing -Exiting @ tick 3506218170 because m5_exit instruction encountered +Exiting @ tick 3651873858 because m5_exit instruction encountered diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.ini b/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.ini index a4b103732..95cccfbf2 100644 --- a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.ini +++ b/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.ini @@ -56,6 +56,7 @@ physmem=system.physmem type=AtomicSimpleCPU children=workload clock=1 +cpu_id=0 defer_registration=false function_trace=false function_trace_start=0 @@ -64,6 +65,7 @@ max_insts_any_thread=500000 max_loads_all_threads=0 max_loads_any_thread=0 mem=system.physmem +progress_interval=0 simulate_stalls=false system=system width=1 @@ -92,6 +94,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out b/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out index 8f236d9cc..1138f2dbe 100644 --- a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out +++ b/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out @@ -33,8 +33,10 @@ max_insts_any_thread=500000 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.physmem system=system +cpu_id=0 workload=system.cpu.workload clock=1 defer_registration=false @@ -46,6 +48,7 @@ simulate_stalls=false [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -89,3 +92,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/m5stats.txt b/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/m5stats.txt index 0132ecf1b..bbc6e55b5 100644 --- a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/m5stats.txt +++ b/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/m5stats.txt @@ -1,9 +1,9 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 1397534 # Simulator instruction rate (inst/s) -host_mem_usage 147632 # Number of bytes of host memory used -host_seconds 0.36 # Real time elapsed on the host -host_tick_rate 1395943 # Simulator tick rate (ticks/s) +host_inst_rate 1432213 # Simulator instruction rate (inst/s) +host_mem_usage 147652 # Number of bytes of host memory used +host_seconds 0.35 # Real time elapsed on the host +host_tick_rate 1430432 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 500000 # Number of instructions simulated sim_seconds 0.000000 # Number of seconds simulated diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/stdout b/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/stdout index d3edcdc0a..de2559c1c 100644 --- a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/stdout +++ b/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/stdout @@ -7,8 +7,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:28:48 -M5 started Tue Sep 5 15:42:20 2006 +M5 compiled Oct 8 2006 14:00:39 +M5 started Sun Oct 8 14:00:58 2006 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/20.eio-short/alpha/eio/simple-atomic tests/run.py quick/20.eio-short/alpha/eio/simple-atomic Exiting @ tick 499999 because a thread reached the max instruction count diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.ini b/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.ini index 27568ad50..72ea32994 100644 --- a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.ini +++ b/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.ini @@ -56,6 +56,7 @@ physmem=system.physmem type=TimingSimpleCPU children=dcache icache l2cache toL2Bus workload clock=1 +cpu_id=0 defer_registration=false function_trace=false function_trace_start=0 @@ -64,6 +65,7 @@ max_insts_any_thread=500000 max_loads_all_threads=0 max_loads_any_thread=0 mem=system.cpu.dcache +progress_interval=0 system=system workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side @@ -215,6 +217,7 @@ port=system.membus.port[0] [trace] bufsize=0 +cycle=0 dump_on_exit=false file=cout flags= diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out b/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out index ba6875a7b..14eb07351 100644 --- a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out +++ b/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out @@ -72,8 +72,10 @@ max_insts_any_thread=500000 max_insts_all_threads=0 max_loads_any_thread=0 max_loads_all_threads=0 +progress_interval=0 mem=system.cpu.dcache system=system +cpu_id=0 workload=system.cpu.workload clock=1 defer_registration=false @@ -167,6 +169,7 @@ hit_latency=1 [trace] flags= start=0 +cycle=0 bufsize=0 file=cout dump_on_exit=false @@ -210,3 +213,6 @@ trace_system=client [debug] break_cycles= +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/m5stats.txt b/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/m5stats.txt index 6339e48b7..ebc70e1f0 100644 --- a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/m5stats.txt +++ b/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/m5stats.txt @@ -1,67 +1,67 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 620120 # Simulator instruction rate (inst/s) -host_mem_usage 159196 # Number of bytes of host memory used +host_inst_rate 620088 # Simulator instruction rate (inst/s) +host_mem_usage 159272 # Number of bytes of host memory used host_seconds 0.81 # Real time elapsed on the host -host_tick_rate 845850 # Simulator tick rate (ticks/s) +host_tick_rate 845969 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 500000 # Number of instructions simulated sim_seconds 0.000001 # Number of seconds simulated -sim_ticks 682354 # Number of ticks simulated -system.cpu.dcache.ReadReq_accesses 124564 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 2.987952 # average ReadReq miss latency +sim_ticks 682488 # Number of ticks simulated +system.cpu.dcache.ReadReq_accesses 124435 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 3 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 124315 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 744 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.001999 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 249 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 496 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.001991 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 248 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 56744 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 1.256024 # average WriteReq miss latency +system.cpu.dcache.ReadReq_hits 124120 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 945 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.002531 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 315 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 630 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.002531 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 315 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 56340 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 3 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 2 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 56412 # number of WriteReq hits +system.cpu.dcache.WriteReq_hits 56201 # number of WriteReq hits system.cpu.dcache.WriteReq_miss_latency 417 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.005851 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 332 # number of WriteReq misses +system.cpu.dcache.WriteReq_miss_rate 0.002467 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 139 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_miss_latency 278 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.002450 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_miss_rate 0.002467 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 139 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 311.061962 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 397.182819 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 181308 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 1.998279 # average overall miss latency +system.cpu.dcache.demand_accesses 180775 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 3 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency 2 # average overall mshr miss latency -system.cpu.dcache.demand_hits 180727 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 1161 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.003204 # miss rate for demand accesses -system.cpu.dcache.demand_misses 581 # number of demand (read+write) misses +system.cpu.dcache.demand_hits 180321 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 1362 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.002511 # miss rate for demand accesses +system.cpu.dcache.demand_misses 454 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 774 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.002134 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 387 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 908 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.002511 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 454 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 181308 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 1.998279 # average overall miss latency +system.cpu.dcache.overall_accesses 180775 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 3 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency 2 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 180727 # number of overall hits -system.cpu.dcache.overall_miss_latency 1161 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.003204 # miss rate for overall accesses -system.cpu.dcache.overall_misses 581 # number of overall misses +system.cpu.dcache.overall_hits 180321 # number of overall hits +system.cpu.dcache.overall_miss_latency 1362 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.002511 # miss rate for overall accesses +system.cpu.dcache.overall_misses 454 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 774 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.002134 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 387 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_miss_latency 908 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.002511 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 454 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -74,10 +74,10 @@ system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.dcache.replacements 0 # number of replacements -system.cpu.dcache.sampled_refs 581 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 454 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 347.118131 # Cycle average of tags in use -system.cpu.dcache.total_refs 180727 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 291.968600 # Cycle average of tags in use +system.cpu.dcache.total_refs 180321 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.icache.ReadReq_accesses 500000 # number of ReadReq accesses(hits+misses) @@ -138,20 +138,20 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 403 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 268.434590 # Cycle average of tags in use +system.cpu.icache.tagsinuse 268.423238 # Cycle average of tags in use system.cpu.icache.total_refs 499597 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idle_fraction 0 # Percentage of idle cycles -system.cpu.l2cache.ReadReq_accesses 984 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 1.605691 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_accesses 857 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 2 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 1 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 1580 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 1714 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 984 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 790 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.802846 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 790 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_misses 857 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 857 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 857 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. @@ -160,32 +160,32 @@ system.cpu.l2cache.blocked_no_targets 0 # nu system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 984 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 1.605691 # average overall miss latency +system.cpu.l2cache.demand_accesses 857 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 2 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 1 # average overall mshr miss latency system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 1580 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 1714 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 984 # number of demand (read+write) misses +system.cpu.l2cache.demand_misses 857 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 790 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 0.802846 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 790 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_miss_latency 857 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 857 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 984 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 1.605691 # average overall miss latency +system.cpu.l2cache.overall_accesses 857 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 2 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 1 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 1580 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 1714 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 984 # number of overall misses +system.cpu.l2cache.overall_misses 857 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 790 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 0.802846 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 790 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_miss_latency 857 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 857 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -198,9 +198,9 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 984 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 857 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 615.553879 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 560.393094 # Cycle average of tags in use system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/stdout b/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/stdout index 158dcfe2b..076cf0a5a 100644 --- a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/stdout +++ b/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/stdout @@ -7,8 +7,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 5 2006 15:28:48 -M5 started Tue Sep 5 15:42:20 2006 +M5 compiled Oct 8 2006 20:54:51 +M5 started Sun Oct 8 20:55:29 2006 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/20.eio-short/alpha/eio/simple-timing tests/run.py quick/20.eio-short/alpha/eio/simple-timing -Exiting @ tick 682354 because a thread reached the max instruction count +Exiting @ tick 682488 because a thread reached the max instruction count diff --git a/tests/quick/50.memtest/test.py b/tests/quick/50.memtest/test.py new file mode 100644 index 000000000..e894b8fb8 --- /dev/null +++ b/tests/quick/50.memtest/test.py @@ -0,0 +1,28 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + |