diff options
83 files changed, 3672 insertions, 925 deletions
diff --git a/SConstruct b/SConstruct index 50089700a..dac4d137c 100644 --- a/SConstruct +++ b/SConstruct @@ -347,7 +347,10 @@ sticky_opts.AddOptions( ('CC', 'C compiler', os.environ.get('CC', env['CC'])), ('CXX', 'C++ compiler', os.environ.get('CXX', env['CXX'])), BoolOption('BATCH', 'Use batch pool for build and tests', False), - ('BATCH_CMD', 'Batch pool submission command name', 'qdo') + ('BATCH_CMD', 'Batch pool submission command name', 'qdo'), + ('PYTHONHOME', + 'Override the default PYTHONHOME for this system (use with caution)', + '%s:%s' % (sys.prefix, sys.exec_prefix)) ) # Non-sticky options only apply to the current build. @@ -359,7 +362,7 @@ nonsticky_opts.AddOptions( # These options get exported to #defines in config/*.hh (see src/SConscript). env.ExportOptions = ['FULL_SYSTEM', 'ALPHA_TLASER', 'USE_FENV', \ 'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP', \ - 'USE_CHECKER'] + 'USE_CHECKER', 'PYTHONHOME'] # Define a handy 'no-op' action def no_action(target, source, env): @@ -399,8 +402,13 @@ def config_emitter(target, source, env): option = str(target[0]) # True target is config header file target = os.path.join('config', option.lower() + '.hh') - # Force value to 0/1 even if it's a Python bool - val = int(eval(str(env[option]))) + val = env[option] + if isinstance(val, bool): + # Force value to 0/1 + val = int(val) + elif isinstance(val, str): + val = '"' + val + '"' + # Sources are option name & value (packaged in SCons Value nodes) return ([target], [Value(option), Value(val)]) diff --git a/configs/boot/mutex-test.rcS b/configs/boot/mutex-test.rcS new file mode 100644 index 000000000..acf875368 --- /dev/null +++ b/configs/boot/mutex-test.rcS @@ -0,0 +1,6 @@ +#!/bin/sh + +cd /benchmarks/tests +/sbin/m5 resetstats +./pthread_mutex_test 4 10000 +/sbin/m5 exit diff --git a/configs/common/Benchmarks.py b/configs/common/Benchmarks.py index 1f272517a..eda0e80f9 100644 --- a/configs/common/Benchmarks.py +++ b/configs/common/Benchmarks.py @@ -97,6 +97,8 @@ Benchmarks = { 'ValStreamScale': [SysConfig('micro_streamscale.rcS', '512MB')], 'ValStreamCopy': [SysConfig('micro_streamcopy.rcS', '512MB')], + 'MutexTest': [SysConfig('mutex-test.rcS', '128MB')], + 'bnAn': [SysConfig('/z/saidi/work/m5.newmem.head/configs/boot/bn-app.rcS', '128MB', '/z/saidi/work/bottleneck/bnimg.img')] } diff --git a/configs/example/fs.py b/configs/example/fs.py index a5b8772af..a9daf63be 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -40,26 +40,49 @@ if not m5.build_env['FULL_SYSTEM']: parser = optparse.OptionParser() -parser.add_option("-d", "--detailed", action="store_true") -parser.add_option("-t", "--timing", action="store_true") -parser.add_option("-n", "--num_cpus", type="int", default=1) -parser.add_option("--caches", action="store_true") -parser.add_option("-m", "--maxtick", type="int") -parser.add_option("--maxtime", type="float") +# Benchmark options parser.add_option("--dual", action="store_true", help="Simulate two systems attached with an ethernet link") parser.add_option("-b", "--benchmark", action="store", type="string", dest="benchmark", help="Specify the benchmark to run. Available benchmarks: %s"\ % DefinedBenchmarks) + +# system options +parser.add_option("-d", "--detailed", action="store_true") +parser.add_option("-t", "--timing", action="store_true") +parser.add_option("-n", "--num_cpus", type="int", default=1) +parser.add_option("--caches", action="store_true") + +# Run duration options +parser.add_option("-m", "--maxtick", type="int") +parser.add_option("--maxtime", type="float") + +# Metafile options parser.add_option("--etherdump", action="store", type="string", dest="etherdump", help="Specify the filename to dump a pcap capture of the" \ "ethernet traffic") + +# Checkpointing options +###Note that performing checkpointing via python script files will override +###checkpoint instructions built into binaries. +parser.add_option("--take_checkpoints", action="store", type="string", + help="<M,N> will take checkpoint at cycle M and every N cycles \ + thereafter") +parser.add_option("--max_checkpoints", action="store", type="int", + help="the maximum number of checkpoints to drop", + default=5) parser.add_option("--checkpoint_dir", action="store", type="string", help="Place all checkpoints in this absolute directory") -parser.add_option("-c", "--checkpoint", action="store", type="int", +parser.add_option("-r", "--checkpoint_restore", action="store", type="int", help="restore from checkpoint <N>") +# CPU Switching - default switch model goes from a checkpoint +# to a timing simple CPU with caches to warm up, then to detailed CPU for +# data measurement +parser.add_option("-s", "--standard_switch", action="store_true", + help="switch from one cpu mode to another") + (options, args) = parser.parse_args() if args: @@ -74,23 +97,24 @@ class MyCache(BaseCache): tgts_per_mshr = 5 protocol = CoherenceProtocol(protocol='moesi') -# client system CPU is always simple... note this is an assignment of +# driver system CPU is always simple... note this is an assignment of # a class, not an instance. -ClientCPUClass = AtomicSimpleCPU -client_mem_mode = 'atomic' +DriveCPUClass = AtomicSimpleCPU +drive_mem_mode = 'atomic' +# system under test can be any of these CPUs if options.detailed: - ServerCPUClass = DerivO3CPU - server_mem_mode = 'timing' + TestCPUClass = DerivO3CPU + test_mem_mode = 'timing' elif options.timing: - ServerCPUClass = TimingSimpleCPU - server_mem_mode = 'timing' + TestCPUClass = TimingSimpleCPU + test_mem_mode = 'timing' else: - ServerCPUClass = AtomicSimpleCPU - server_mem_mode = 'atomic' + TestCPUClass = AtomicSimpleCPU + test_mem_mode = 'atomic' -ServerCPUClass.clock = '2GHz' -ClientCPUClass.clock = '2GHz' +TestCPUClass.clock = '2GHz' +DriveCPUClass.clock = '2GHz' if options.benchmark: try: @@ -105,38 +129,59 @@ else: else: bm = [SysConfig()] -server_sys = makeLinuxAlphaSystem(server_mem_mode, bm[0]) +test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0]) np = options.num_cpus -server_sys.cpu = [ServerCPUClass(cpu_id=i) for i in xrange(np)] +test_sys.cpu = [TestCPUClass(cpu_id=i) for i in xrange(np)] for i in xrange(np): - if options.caches: - server_sys.cpu[i].addPrivateSplitL1Caches(MyCache(size = '32kB'), + if options.caches and not options.standard_switch: + test_sys.cpu[i].addPrivateSplitL1Caches(MyCache(size = '32kB'), MyCache(size = '64kB')) - server_sys.cpu[i].connectMemPorts(server_sys.membus) - server_sys.cpu[i].mem = server_sys.physmem + test_sys.cpu[i].connectMemPorts(test_sys.membus) + test_sys.cpu[i].mem = test_sys.physmem if len(bm) == 2: - client_sys = makeLinuxAlphaSystem(client_mem_mode, bm[1]) - client_sys.cpu = ClientCPUClass(cpu_id=0) - client_sys.cpu.connectMemPorts(client_sys.membus) - client_sys.cpu.mem = client_sys.physmem - root = makeDualRoot(server_sys, client_sys, options.etherdump) + drive_sys = makeLinuxAlphaSystem(drive_mem_mode, bm[1]) + drive_sys.cpu = DriveCPUClass(cpu_id=0) + drive_sys.cpu.connectMemPorts(drive_sys.membus) + drive_sys.cpu.mem = drive_sys.physmem + root = makeDualRoot(test_sys, drive_sys, options.etherdump) elif len(bm) == 1: - root = Root(clock = '1THz', system = server_sys) + root = Root(clock = '1THz', system = test_sys) else: print "Error I don't know how to create more than 2 systems." sys.exit(1) +if options.standard_switch: + switch_cpus = [TimingSimpleCPU(defer_registration=True, cpu_id=(np+i) for i in xrange(np))] + switch_cpus1 = [DerivO3CPU(defer_registration=True, cpu_id=(2*np+i) for i in xrange(np))] + for i in xrange(np): + switch_cpus[i].system = test_sys + switch_cpus1[i].system = test_sys + switch_cpus[i].clock = TestCPUClass.clock + switch_cpus1[i].clock = TestCPUClass.clock + if options.caches: + switch_cpus[i].addPrivateSplitL1Caches(MyCache(size = '32kB'), + MyCache(size = '64kB')) + + switch_cpus[i].mem = test_sys.physmem + switch_cpus1[i].mem = test_sys.physmem + switch_cpus[i].connectMemPorts(test_sys.membus) + root.switch_cpus = switch_cpus + root.switch_cpus1 = switch_cpus1 + switch_cpu_list = [(test_sys.cpu[i], switch_cpus[i]) for i in xrange(np)] + switch_cpu_list1 = [(switch_cpus[i], switch_cpus1[i]) for i in xrange(np)] + m5.instantiate(root) -if options.checkpoint: +if options.checkpoint_dir: + cptdir = options.checkpoint_dir +else: + cptdir = getcwd() + +if options.checkpoint_restore: from os.path import isdir from os import listdir, getcwd import re - if options.checkpoint_dir: - cptdir = options.checkpoint_dir - else: - cptdir = getcwd() if not isdir(cptdir): m5.panic("checkpoint dir %s does not exist!" % cptdir) @@ -149,10 +194,26 @@ if options.checkpoint: if match: cpts.append(match.group(1)) - if options.checkpoint > len(cpts): - m5.panic('Checkpoint %d not found' % options.checkpoint) + cpts.sort(lambda a,b: cmp(long(a), long(b))) - m5.restoreCheckpoint(root, "/".join([cptdir, "cpt.%s" % cpts[options.checkpoint - 1]])) + if options.checkpoint_restore > len(cpts): + m5.panic('Checkpoint %d not found' % options.checkpoint_restore) + + m5.restoreCheckpoint(root, "/".join([cptdir, "cpt.%s" % cpts[options.checkpoint_restore - 1]])) + +if options.standard_switch: + exit_event = m5.simulate(1000) + ## when you change to Timing (or Atomic), you halt the system given + ## as argument. When you are finished with the system changes + ## (including switchCpus), you must resume the system manually. + ## You DON'T need to resume after just switching CPUs if you haven't + ## changed anything on the system level. + m5.changeToTiming(test_sys) + m5.switchCpus(switch_cpu_list) + m5.resume(test_sys) + + exit_event = m5.simulate(500000000000) + m5.switchCpus(switch_cpu_list1) if options.maxtick: maxtick = options.maxtick @@ -163,17 +224,56 @@ elif options.maxtime: else: maxtick = -1 -exit_event = m5.simulate(maxtick) +num_checkpoints = 0 -while exit_event.getCause() == "checkpoint": - if options.checkpoint_dir: - m5.checkpoint(root, "/".join([options.checkpoint_dir, "cpt.%d"])) - else: - m5.checkpoint(root, "cpt.%d") +exit_cause = '' - if maxtick == -1: - exit_event = m5.simulate(maxtick) - else: - exit_event = m5.simulate(maxtick - m5.curTick()) +if options.take_checkpoints: + [when, period] = options.take_checkpoints.split(",", 1) + when = int(when) + period = int(period) + + exit_event = m5.simulate(when) + while exit_event.getCause() == "checkpoint": + exit_event = m5.simulate(when - m5.curTick()) + + if exit_event.getCause() == "simulate() limit reached": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + + sim_ticks = when + exit_cause = "maximum %d checkpoints dropped" % options.max_checkpoints + while num_checkpoints < options.max_checkpoints: + if (sim_ticks + period) > maxtick and maxtick != -1: + exit_event = m5.simulate(maxtick - sim_ticks) + exit_cause = exit_event.getCause() + break + else: + exit_event = m5.simulate(period) + sim_ticks += period + while exit_event.getCause() == "checkpoint": + exit_event = m5.simulate(period - m5.curTick()) + if exit_event.getCause() == "simulate() limit reached": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + +else: #no checkpoints being taken via this script + exit_event = m5.simulate(maxtick) + + while exit_event.getCause() == "checkpoint": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + if num_checkpoints == options.max_checkpoints: + exit_cause = "maximum %d checkpoints dropped" % options.max_checkpoints + break + + if maxtick == -1: + exit_event = m5.simulate(maxtick) + else: + exit_event = m5.simulate(maxtick - m5.curTick()) + + exit_cause = exit_event.getCause() -print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() +if exit_cause == '': + exit_cause = exit_event.getCause() +print 'Exiting @ cycle', m5.curTick(), 'because ', exit_cause diff --git a/configs/example/memtest.py b/configs/example/memtest.py new file mode 100644 index 000000000..141ecfd8e --- /dev/null +++ b/configs/example/memtest.py @@ -0,0 +1,138 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +import m5 +from m5.objects import * +import os, optparse, sys +m5.AddToPath('../common') + +parser = optparse.OptionParser() + +parser.add_option("--caches", action="store_true") +parser.add_option("-t", "--timing", action="store_true") +parser.add_option("-m", "--maxtick", type="int") +parser.add_option("-l", "--maxloads", default = "1000000000000", type="int") +parser.add_option("-n", "--numtesters", default = "8", type="int") +parser.add_option("-p", "--protocol", + default="moesi", + help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)") + +(options, args) = parser.parse_args() + +if args: + print "Error: script doesn't take any positional arguments" + sys.exit(1) + +# -------------------- +# Base L1 Cache +# ==================== + +class L1(BaseCache): + latency = 1 + block_size = 64 + mshrs = 12 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol=options.protocol) + +# ---------------------- +# Base L2 Cache +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = 10 + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +#MAX CORES IS 8 with the false sharing method +if options.numtesters > 8: + print "Error: NUmber of testers limited to 8 because of false sharing" + sys,exit(1) + +if options.timing: + cpus = [ MemTest(atomic=False, max_loads=options.maxloads, percent_functional=50, + percent_uncacheable=10, progress_interval=1000) + for i in xrange(options.numtesters) ] +else: + cpus = [ MemTest(atomic=True, max_loads=options.maxloads, percent_functional=50, + percent_uncacheable=10, progress_interval=1000) + for i in xrange(options.numtesters) ] +# system simulated +system = System(cpu = cpus, funcmem = PhysicalMemory(), + physmem = PhysicalMemory(latency = "50ps"), membus = Bus(clock="500GHz", width=16)) + +# l2cache & bus +if options.caches: + system.toL2Bus = Bus(clock="500GHz", width=16) + system.l2c = L2(size='64kB', assoc=8) + system.l2c.cpu_side = system.toL2Bus.port + + # connect l2c to membus + system.l2c.mem_side = system.membus.port + +which_port = 0 +# add L1 caches +for cpu in cpus: + if options.caches: + cpu.l1c = L1(size = '32kB', assoc = 4) + cpu.test = cpu.l1c.cpu_side + cpu.l1c.mem_side = system.toL2Bus.port + else: + cpu.test = system.membus.port + if which_port == 0: + system.funcmem.port = cpu.functional + which_port = 1 + else: + system.funcmem.functional = cpu.functional + + +# connect memory to membus +system.physmem.port = system.membus.port + + +# ----------------------- +# run simulation +# ----------------------- + +root = Root( system = system ) +if options.timing: + root.system.mem_mode = 'timing' +else: + root.system.mem_mode = 'atomic' + +# instantiate configuration +m5.instantiate(root) + +# simulate until program terminates +if options.maxtick: + exit_event = m5.simulate(options.maxtick) +else: + exit_event = m5.simulate() + +print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() diff --git a/configs/example/se.py b/configs/example/se.py index 6a941b9da..2e63e27da 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -37,6 +37,7 @@ m5.AddToPath('../common') parser = optparse.OptionParser() +# Benchmark options parser.add_option("-c", "--cmd", default="../../tests/test-progs/hello/bin/alpha/linux/hello", help="The binary to run in syscall emulation mode.") @@ -45,9 +46,35 @@ parser.add_option("-o", "--options", default="", string.") parser.add_option("-i", "--input", default="", help="A file of input to give to the binary.") + +# System options parser.add_option("-d", "--detailed", action="store_true") parser.add_option("-t", "--timing", action="store_true") +parser.add_option("--caches", action="store_true") + +# Run duration options parser.add_option("-m", "--maxtick", type="int") +parser.add_option("--maxtime", type="float") + +#Checkpointing options +###Note that performing checkpointing via python script files will override +###checkpoint instructions built into binaries. +parser.add_option("--take_checkpoints", action="store", type="string", + help="<M,N> will take checkpoint at cycle M and every N cycles \ + thereafter") +parser.add_option("--max_checkpoints", action="store", type="int", + help="the maximum number of checkpoints to drop", + default=5) +parser.add_option("--checkpoint_dir", action="store", type="string", + help="Place all checkpoints in this absolute directory") +parser.add_option("-r", "--checkpoint_restore", action="store", type="int", + help="restore from checkpoint <N>") + +#CPU Switching - default switch model generally goes from a checkpoint +#to a timing simple CPU with caches to warm up, then to detailed CPU for +#data measurement +parser.add_option("-s", "--standard_switch", action="store_true", + help="switch from one cpu mode to another") (options, args) = parser.parse_args() @@ -55,6 +82,13 @@ if args: print "Error: script doesn't take any positional arguments" sys.exit(1) +class MyCache(BaseCache): + assoc = 2 + block_size = 64 + latency = 1 + mshrs = 10 + tgts_per_mshr = 5 + process = LiveProcess() process.executable = options.cmd process.cmd = options.cmd + " " + options.options @@ -93,25 +127,149 @@ cpu.workload = process cpu.cpu_id = 0 system = System(cpu = cpu, - physmem = PhysicalMemory(), + physmem = PhysicalMemory(range=AddrRange("512MB")), membus = Bus()) system.physmem.port = system.membus.port system.cpu.connectMemPorts(system.membus) system.cpu.mem = system.physmem +system.cpu.clock = '2GHz' +if options.caches and not options.standard_switch: + system.cpu.addPrivateSplitL1Caches(MyCache(size = '32kB'), + MyCache(size = '64kB')) root = Root(system = system) if options.timing or options.detailed: root.system.mem_mode = 'timing' +if options.standard_switch: + switch_cpu = TimingSimpleCPU(defer_registration=True, cpu_id=1) + switch_cpu1 = DerivO3CPU(defer_registration=True, cpu_id=2) + switch_cpu.system = system + switch_cpu1.system = system + switch_cpu.clock = cpu.clock + switch_cpu1.clock = cpu.clock + if options.caches: + switch_cpu.addPrivateSplitL1Caches(MyCache(size = '32kB'), + MyCache(size = '64kB')) + + switch_cpu.workload = process + switch_cpu1.workload = process + switch_cpu.mem = system.physmem + switch_cpu1.mem = system.physmem + switch_cpu.connectMemPorts(system.membus) + root.switch_cpu = switch_cpu + root.switch_cpu1 = switch_cpu1 + switch_cpu_list = [(system.cpu, switch_cpu)] + switch_cpu_list1 = [(switch_cpu, switch_cpu1)] + # instantiate configuration m5.instantiate(root) -# simulate until program terminates +if options.checkpoint_dir: + cptdir = options.checkpoint_dir +else: + cptdir = getcwd() + +if options.checkpoint_restore: + from os.path import isdir + from os import listdir, getcwd + import re + + if not isdir(cptdir): + m5.panic("checkpoint dir %s does not exist!" % cptdir) + + dirs = listdir(cptdir) + expr = re.compile('cpt.([0-9]*)') + cpts = [] + for dir in dirs: + match = expr.match(dir) + if match: + cpts.append(match.group(1)) + + cpts.sort(lambda a,b: cmp(long(a), long(b))) + + if options.checkpoint_restore > len(cpts): + m5.panic('Checkpoint %d not found' % options.checkpoint_restore) + + print "restoring checkpoint from ","/".join([cptdir, "cpt.%s" % cpts[options.checkpoint_restore - 1]]) + m5.restoreCheckpoint(root, "/".join([cptdir, "cpt.%s" % cpts[options.checkpoint_restore - 1]])) + +if options.standard_switch: + exit_event = m5.simulate(10000) + ## when you change to Timing (or Atomic), you halt the system given + ## as argument. When you are finished with the system changes + ## (including switchCpus), you must resume the system manually. + ## You DON'T need to resume after just switching CPUs if you haven't + ## changed anything on the system level. + m5.changeToTiming(system) + m5.switchCpus(switch_cpu_list) + m5.resume(system) + + exit_event = m5.simulate(500000000000) + m5.switchCpus(switch_cpu_list1) + if options.maxtick: - exit_event = m5.simulate(options.maxtick) + maxtick = options.maxtick +elif options.maxtime: + simtime = int(options.maxtime * root.clock.value) + print "simulating for: ", simtime + maxtick = simtime else: - exit_event = m5.simulate() + maxtick = -1 + +num_checkpoints = 0 + +exit_cause = '' + +if options.take_checkpoints: + [when, period] = options.take_checkpoints.split(",", 1) + when = int(when) + period = int(period) + + exit_event = m5.simulate(when) + while exit_event.getCause() == "checkpoint": + exit_event = m5.simulate(when - m5.curTick()) + + if exit_event.getCause() == "simulate() limit reached": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + + sim_ticks = when + exit_cause = "maximum %d checkpoints dropped" % options.max_checkpoints + while num_checkpoints < options.max_checkpoints: + if (sim_ticks + period) > maxtick and maxtick != -1: + exit_event = m5.simulate(maxtick - sim_ticks) + exit_cause = exit_event.getCause() + break + else: + exit_event = m5.simulate(period) + sim_ticks += period + while exit_event.getCause() == "checkpoint": + exit_event = m5.simulate(period - m5.curTick()) + if exit_event.getCause() == "simulate() limit reached": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + +else: #no checkpoints being taken via this script + exit_event = m5.simulate(maxtick) + + while exit_event.getCause() == "checkpoint": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + if num_checkpoints == options.max_checkpoints: + exit_cause = "maximum %d checkpoints dropped" % options.max_checkpoints + break + + if maxtick == -1: + exit_event = m5.simulate(maxtick) + else: + exit_event = m5.simulate(maxtick - m5.curTick()) + + exit_cause = exit_event.getCause() + +if exit_cause == '': + exit_cause = exit_event.getCause() +print 'Exiting @ cycle', m5.curTick(), 'because ', exit_cause -print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() diff --git a/configs/splash2/run.py b/configs/splash2/run.py index ebbe14939..93b166d77 100644 --- a/configs/splash2/run.py +++ b/configs/splash2/run.py @@ -1,4 +1,4 @@ -# Copyright (c) 2005 The Regents of The University of Michigan +# Copyright (c) 2005-2006 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -26,54 +26,243 @@ # # Authors: Ron Dreslinski -from m5 import * -import Splash2 - -if 'SYSTEM' not in env: - panic("The SYSTEM environment variable must be set!\ne.g -ESYSTEM=Detailed\n") - -if env['SYSTEM'] == 'Simple': - from SimpleConfig import * - BaseCPU.workload = Parent.workload - SimpleStandAlone.cpu = [ CPU() for i in xrange(int(env['NP'])) ] - root = SimpleStandAlone() -elif env['SYSTEM'] == 'Detailed': - from DetailedConfig import * - BaseCPU.workload = Parent.workload - DetailedStandAlone.cpu = [ DetailedCPU() for i in xrange(int(env['NP'])) ] - root = DetailedStandAlone() +# Splash2 Run Script +# + +import m5 +from m5.objects import * +import os, optparse, sys +m5.AddToPath('../common') + +# -------------------- +# Define Command Line Options +# ==================== + +parser = optparse.OptionParser() + +parser.add_option("-d", "--detailed", action="store_true") +parser.add_option("-t", "--timing", action="store_true") +parser.add_option("-m", "--maxtick", type="int") +parser.add_option("-n", "--numcpus", + help="Number of cpus in total", type="int") +parser.add_option("-f", "--frequency", + default = "1GHz", + help="Frequency of each CPU") +parser.add_option("-p", "--protocol", + default="moesi", + help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)") +parser.add_option("--l1size", + default = "32kB") +parser.add_option("--l1latency", + default = 1) +parser.add_option("--l2size", + default = "256kB") +parser.add_option("--l2latency", + default = 10) +parser.add_option("--rootdir", + help="ROot directory of Splash2", + default="/dist/splash2/codes") +parser.add_option("-b", "--benchmark", + help="Splash 2 benchmark to run") + +(options, args) = parser.parse_args() + +if args: + print "Error: script doesn't take any positional arguments" + sys.exit(1) + +if not options.numcpus: + print "Specify the number of cpus with -n" + sys.exit(1) + +# -------------------- +# Define Splash2 Benchmarks +# ==================== +class Cholesky(LiveProcess): + executable = options.rootdir + '/kernels/cholesky/CHOLESKY' + cmd = 'CHOLESKY -p' + str(options.numcpus) + ' '\ + + options.rootdir + '/kernels/cholesky/inputs/tk23.O' + +class FFT(LiveProcess): + executable = options.rootdir + '/kernels/fft/FFT' + cmd = 'FFT -p' + str(options.numcpus) + ' -m18' + +class LU_contig(LiveProcess): + executable = options.rootdir + '/kernels/lu/contiguous_blocks/LU' + cmd = 'LU -p' + str(options.numcpus) + +class LU_noncontig(LiveProcess): + executable = options.rootdir + '/kernels/lu/non_contiguous_blocks/LU' + cmd = 'LU -p' + str(options.numcpus) + +class Radix(LiveProcess): + executable = options.rootdir + '/kernels/radix/RADIX' + cmd = 'RADIX -n524288 -p' + str(options.numcpus) + +class Barnes(LiveProcess): + executable = options.rootdir + '/apps/barnes/BARNES' + cmd = 'BARNES' + input = options.rootdir + '/apps/barnes/input.p' + str(options.numcpus) + +class FMM(LiveProcess): + executable = options.rootdir + '/apps/fmm/FMM' + cmd = 'FMM' + input = options.rootdir + '/apps/fmm/inputs/input.2048.p' + str(options.numcpus) + +class Ocean_contig(LiveProcess): + executable = options.rootdir + '/apps/ocean/contiguous_partitions/OCEAN' + cmd = 'OCEAN -p' + str(options.numcpus) + +class Ocean_noncontig(LiveProcess): + executable = options.rootdir + '/apps/ocean/non_contiguous_partitions/OCEAN' + cmd = 'OCEAN -p' + str(options.numcpus) + +class Raytrace(LiveProcess): + executable = options.rootdir + '/apps/raytrace/RAYTRACE' + cmd = 'RAYTRACE -p' + str(options.numcpus) + ' ' \ + + options.rootdir + 'apps/raytrace/inputs/teapot.env' + +class Water_nsquared(LiveProcess): + executable = options.rootdir + '/apps/water-nsquared/WATER-NSQUARED' + cmd = 'WATER-NSQUARED' + input = options.rootdir + '/apps/water-nsquared/input.p' + str(options.numcpus) + +class Water_spatial(LiveProcess): + executable = options.rootdir + '/apps/water-spatial/WATER-SPATIAL' + cmd = 'WATER-SPATIAL' + input = options.rootdir + '/apps/water-spatial/input.p' + str(options.numcpus) + + +# -------------------- +# Base L1 Cache Definition +# ==================== + +class L1(BaseCache): + latency = options.l1latency + block_size = 64 + mshrs = 12 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol=options.protocol) + +# ---------------------- +# Base L2 Cache Definition +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = options.l2latency + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +# ---------------------- +# Define the cpus +# ---------------------- + +busFrequency = Frequency(options.frequency) + +if options.timing: + cpus = [TimingSimpleCPU(cpu_id = i, + clock=options.frequency) + for i in xrange(options.numcpus)] +elif options.detailed: + cpus = [DerivO3CPU(cpu_id = i, + clock=options.frequency) + for i in xrange(options.numcpus)] else: - panic("The SYSTEM environment variable was set to something improper.\n Use Simple or Detailed\n") - -if 'BENCHMARK' not in env: - panic("The BENCHMARK environment variable must be set!\ne.g. -EBENCHMARK=Cholesky\n") - -if env['BENCHMARK'] == 'Cholesky': - root.workload = Splash2.Cholesky() -elif env['BENCHMARK'] == 'FFT': - root.workload = Splash2.FFT() -elif env['BENCHMARK'] == 'LUContig': - root.workload = Splash2.LU_contig() -elif env['BENCHMARK'] == 'LUNoncontig': - root.workload = Splash2.LU_noncontig() -elif env['BENCHMARK'] == 'Radix': - root.workload = Splash2.Radix() -elif env['BENCHMARK'] == 'Barnes': - root.workload = Splash2.Barnes() -elif env['BENCHMARK'] == 'FMM': - root.workload = Splash2.FMM() -elif env['BENCHMARK'] == 'OceanContig': - root.workload = Splash2.Ocean_contig() -elif env['BENCHMARK'] == 'OceanNoncontig': - root.workload = Splash2.Ocean_noncontig() -elif env['BENCHMARK'] == 'Raytrace': - root.workload = Splash2.Raytrace() -elif env['BENCHMARK'] == 'WaterNSquared': - root.workload = Splash2.Water_nsquared() -elif env['BENCHMARK'] == 'WaterSpatial': - root.workload = Splash2.Water_spatial() + cpus = [AtomicSimpleCPU(cpu_id = i, + clock=options.frequency) + for i in xrange(options.numcpus)] + +# ---------------------- +# Create a system, and add system wide objects +# ---------------------- +system = System(cpu = cpus, physmem = PhysicalMemory(), + membus = Bus(clock = busFrequency)) + +system.toL2bus = Bus(clock = busFrequency) +system.l2 = L2(size = options.l2size, assoc = 8) + +# ---------------------- +# Connect the L2 cache and memory together +# ---------------------- + +system.physmem.port = system.membus.port +system.l2.cpu_side = system.toL2bus.port +system.l2.mem_side = system.membus.port + +# ---------------------- +# Connect the L2 cache and clusters together +# ---------------------- +for cpu in cpus: + cpu.addPrivateSplitL1Caches(L1(size = options.l1size, assoc = 1), + L1(size = options.l1size, assoc = 4)) + cpu.mem = cpu.dcache + # connect cpu level-1 caches to shared level-2 cache + cpu.connectMemPorts(system.toL2bus) + + +# ---------------------- +# Define the root +# ---------------------- + +root = Root(system = system) + +# -------------------- +# Pick the correct Splash2 Benchmarks +# ==================== +if options.benchmark == 'Cholesky': + root.workload = Cholesky() +elif options.benchmark == 'FFT': + root.workload = FFT() +elif options.benchmark == 'LUContig': + root.workload = LU_contig() +elif options.benchmark == 'LUNoncontig': + root.workload = LU_noncontig() +elif options.benchmark == 'Radix': + root.workload = Radix() +elif options.benchmark == 'Barnes': + root.workload = Barnes() +elif options.benchmark == 'FMM': + root.workload = FMM() +elif options.benchmark == 'OceanContig': + root.workload = Ocean_contig() +elif options.benchmark == 'OceanNoncontig': + root.workload = Ocean_noncontig() +elif options.benchmark == 'Raytrace': + root.workload = Raytrace() +elif options.benchmark == 'WaterNSquared': + root.workload = Water_nsquared() +elif options.benchmark == 'WaterSpatial': + root.workload = Water_spatial() else: - panic("The BENCHMARK environment variable was set to something" \ + panic("The --benchmark environment variable was set to something" \ +" improper.\nUse Cholesky, FFT, LUContig, LUNoncontig, Radix" \ +", Barnes, FMM, OceanContig,\nOceanNoncontig, Raytrace," \ +" WaterNSquared, or WaterSpatial\n") + +# -------------------- +# Assign the workload to the cpus +# ==================== + +for cpu in cpus: + cpu.workload = root.workload + +# ---------------------- +# Run the simulation +# ---------------------- + +if options.timing or options.detailed: + root.system.mem_mode = 'timing' + +# instantiate configuration +m5.instantiate(root) + +# simulate until program terminates +if options.maxtick: + exit_event = m5.simulate(options.maxtick) +else: + exit_event = m5.simulate() + +print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() + diff --git a/src/arch/alpha/faults.cc b/src/arch/alpha/faults.cc index eef4361fd..7179bf025 100644 --- a/src/arch/alpha/faults.cc +++ b/src/arch/alpha/faults.cc @@ -194,7 +194,8 @@ void PageTableFault::invoke(ThreadContext *tc) // We've accessed the next page if (vaddr > p->stack_min - PageBytes) { - warn("Increasing stack %#x:%#x to %#x:%#x because of access to %#x", + DPRINTF(Stack, + "Increasing stack %#x:%#x to %#x:%#x because of access to %#x", p->stack_min, p->stack_base, p->stack_min - PageBytes, p->stack_base, vaddr); p->stack_min -= PageBytes; @@ -202,6 +203,7 @@ void PageTableFault::invoke(ThreadContext *tc) fatal("Over max stack size for one thread\n"); p->pTable->allocate(p->stack_min, PageBytes); } else { + warn("Page fault on address %#x\n", vaddr); FaultBase::invoke(tc); } } diff --git a/src/arch/alpha/utility.hh b/src/arch/alpha/utility.hh index d3ccc0444..0304d1c3a 100644 --- a/src/arch/alpha/utility.hh +++ b/src/arch/alpha/utility.hh @@ -37,16 +37,17 @@ #include "arch/alpha/isa_traits.hh" #include "arch/alpha/regfile.hh" #include "base/misc.hh" +#include "cpu/thread_context.hh" namespace AlphaISA { static inline ExtMachInst - makeExtMI(MachInst inst, const uint64_t &pc) { + makeExtMI(MachInst inst, ThreadContext * xc) { #if FULL_SYSTEM ExtMachInst ext_inst = inst; - if (pc && 0x1) - return ext_inst|=(static_cast<ExtMachInst>(pc & 0x1) << 32); + if (xc->readPC() && 0x1) + return ext_inst|=(static_cast<ExtMachInst>(xc->readPC() & 0x1) << 32); else return ext_inst; #else diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index 4d522e18a..b235398f1 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -1636,7 +1636,7 @@ opClassRE = re.compile(r'.*Op|No_OpClass') class InstObjParams: def __init__(self, mnem, class_name, base_class = '', - code = None, opt_args = [], *extras): + code = None, opt_args = [], extras = {}): self.mnemonic = mnem self.class_name = class_name self.base_class = base_class @@ -1648,13 +1648,23 @@ class InstObjParams: else: origCode = code codeBlock = CodeBlock(code) - compositeCode = '\n'.join([origCode] + - [pair[1] for pair in extras]) + stringExtras = {} + otherExtras = {} + for (k, v) in extras.items(): + if type(v) == str: + stringExtras[k] = v + else: + otherExtras[k] = v + compositeCode = "\n".join([origCode] + stringExtras.values()) + # compositeCode = '\n'.join([origCode] + + # [pair[1] for pair in extras]) compositeBlock = CodeBlock(compositeCode) for code_attr in compositeBlock.__dict__.keys(): setattr(self, code_attr, getattr(compositeBlock, code_attr)) - for (key, snippet) in extras: + for (key, snippet) in stringExtras.items(): setattr(self, key, CodeBlock(snippet).code) + for (key, item) in otherExtras.items(): + setattr(self, key, item) self.code = codeBlock.code self.orig_code = origCode else: diff --git a/src/arch/mips/utility.hh b/src/arch/mips/utility.hh index ae2fe1aea..56689ba4d 100644 --- a/src/arch/mips/utility.hh +++ b/src/arch/mips/utility.hh @@ -35,6 +35,7 @@ #include "arch/mips/types.hh" #include "base/misc.hh" #include "config/full_system.hh" +#include "cpu/thread_context.hh" //XXX This is needed for size_t. We should use something other than size_t //#include "kern/linux/linux.hh" #include "sim/host.hh" @@ -86,11 +87,11 @@ namespace MipsISA { } static inline ExtMachInst - makeExtMI(MachInst inst, const uint64_t &pc) { + makeExtMI(MachInst inst, ThreadContext * xc) { #if FULL_SYSTEM ExtMachInst ext_inst = inst; - if (pc && 0x1) - return ext_inst|=(static_cast<ExtMachInst>(pc & 0x1) << 32); + if (xc->readPC() && 0x1) + return ext_inst|=(static_cast<ExtMachInst>(xc->readPC() & 0x1) << 32); else return ext_inst; #else diff --git a/src/arch/sparc/SConscript b/src/arch/sparc/SConscript index 66f2b57e0..e317502e0 100644 --- a/src/arch/sparc/SConscript +++ b/src/arch/sparc/SConscript @@ -44,6 +44,7 @@ Import('env') # Base sources used by all configurations. base_sources = Split(''' + asi.cc faults.cc floatregfile.cc intregfile.cc diff --git a/src/arch/sparc/asi.cc b/src/arch/sparc/asi.cc new file mode 100644 index 000000000..00c9e041e --- /dev/null +++ b/src/arch/sparc/asi.cc @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/sparc/asi.hh" + +namespace SparcISA +{ + bool AsiIsBlock(ASI asi) + { + return + (asi == ASI_BLK_AIUP) || + (asi == ASI_BLK_AIUS) || + (asi == ASI_BLK_AIUPL) || + (asi == ASI_BLK_AIUSL) || + (asi == ASI_BLK_P) || + (asi == ASI_BLK_S) || + (asi == ASI_BLK_PL) || + (asi == ASI_BLK_SL); + } + + bool AsiIsPrimary(ASI asi) + { + return + (asi == ASI_AIUP) || + (asi == ASI_BLK_AIUP) || + (asi == ASI_AIUPL) || + (asi == ASI_BLK_AIUPL) || + (asi == ASI_LDTX_AIUP) || + (asi == ASI_LDTX_AIUPL) || + (asi == ASI_P) || + (asi == ASI_PNF) || + (asi == ASI_PL) || + (asi == ASI_PNFL) || + (asi == ASI_PST8_P) || + (asi == ASI_PST16_P) || + (asi == ASI_PST32_P) || + (asi == ASI_PST8_PL) || + (asi == ASI_PST16_PL) || + (asi == ASI_PST32_PL) || + (asi == ASI_FL8_P) || + (asi == ASI_FL16_P) || + (asi == ASI_FL8_PL) || + (asi == ASI_FL16_PL) || + (asi == ASI_LDTX_P) || + (asi == ASI_LDTX_PL) || + (asi == ASI_BLK_P) || + (asi == ASI_BLK_PL); + } + + bool AsiIsSecondary(ASI asi) + { + return + (asi == ASI_AIUS) || + (asi == ASI_BLK_AIUS) || + (asi == ASI_AIUSL) || + (asi == ASI_BLK_AIUSL) || + (asi == ASI_LDTX_AIUS) || + (asi == ASI_LDTX_AIUSL) || + (asi == ASI_S) || + (asi == ASI_SNF) || + (asi == ASI_SL) || + (asi == ASI_SNFL) || + (asi == ASI_PST8_S) || + (asi == ASI_PST16_S) || + (asi == ASI_PST32_S) || + (asi == ASI_PST8_SL) || + (asi == ASI_PST16_SL) || + (asi == ASI_PST32_SL) || + (asi == ASI_FL8_S) || + (asi == ASI_FL16_S) || + (asi == ASI_FL8_SL) || + (asi == ASI_FL16_SL) || + (asi == ASI_LDTX_S) || + (asi == ASI_LDTX_SL) || + (asi == ASI_BLK_S) || + (asi == ASI_BLK_SL); + } + + bool AsiNucleus(ASI asi) + { + return + (asi == ASI_N) || + (asi == ASI_NL) || + (asi == ASI_LDTX_N) || + (asi == ASI_LDTX_NL); + } + + bool AsiIsAsIfUser(ASI asi) + { + return + (asi == ASI_AIUP) || + (asi == ASI_AIUS) || + (asi == ASI_BLK_AIUP) || + (asi == ASI_BLK_AIUS) || + (asi == ASI_AIUPL) || + (asi == ASI_AIUSL) || + (asi == ASI_BLK_AIUPL) || + (asi == ASI_BLK_AIUSL) || + (asi == ASI_LDTX_AIUP) || + (asi == ASI_LDTX_AIUS) || + (asi == ASI_LDTX_AIUPL) || + (asi == ASI_LDTX_AIUSL); + } + + bool AsiIsIO(ASI asi) + { + return + (asi == ASI_REAL_IO) || + (asi == ASI_REAL_IO_L); + } + + bool AsiIsReal(ASI asi) + { + return + (asi == ASI_REAL) || + (asi == ASI_REAL_IO) || + (asi == ASI_REAL_L) || + (asi == ASI_REAL_IO_L) || + (asi == ASI_LDTX_REAL) || + (asi == ASI_LDTX_REAL_L) || + (asi == ASI_MMU_REAL); + } + + bool AsiIsLittle(ASI asi) + { + return + (asi == ASI_NL) || + (asi == ASI_AIUPL) || + (asi == ASI_AIUSL) || + (asi == ASI_REAL_L) || + (asi == ASI_REAL_IO_L) || + (asi == ASI_BLK_AIUPL) || + (asi == ASI_BLK_AIUSL) || + (asi == ASI_LDTX_AIUPL) || + (asi == ASI_LDTX_AIUSL) || + (asi == ASI_LDTX_REAL_L) || + (asi == ASI_LDTX_NL) || + (asi == ASI_PL) || + (asi == ASI_SL) || + (asi == ASI_PNFL) || + (asi == ASI_SNFL) || + (asi == ASI_PST8_PL) || + (asi == ASI_PST8_SL) || + (asi == ASI_PST16_PL) || + (asi == ASI_PST16_SL) || + (asi == ASI_PST32_PL) || + (asi == ASI_PST32_SL) || + (asi == ASI_FL8_PL) || + (asi == ASI_FL8_SL) || + (asi == ASI_FL16_PL) || + (asi == ASI_FL16_SL) || + (asi == ASI_LDTX_PL) || + (asi == ASI_LDTX_SL) || + (asi == ASI_BLK_PL) || + (asi == ASI_BLK_SL); + } + + bool AsiIsTwin(ASI asi) + { + return + (asi == ASI_LDTX_AIUP) || + (asi == ASI_LDTX_AIUS) || + (asi == ASI_LDTX_REAL) || + (asi == ASI_LDTX_N) || + (asi == ASI_LDTX_AIUPL) || + (asi == ASI_LDTX_AIUSL) || + (asi == ASI_LDTX_REAL_L) || + (asi == ASI_LDTX_NL) || + (asi == ASI_LDTX_P) || + (asi == ASI_LDTX_S) || + (asi == ASI_LDTX_PL) || + (asi == ASI_LDTX_SL); + } + + bool AsiIsPartialStore(ASI asi) + { + return + (asi == ASI_PST8_P) || + (asi == ASI_PST8_S) || + (asi == ASI_PST16_P) || + (asi == ASI_PST16_S) || + (asi == ASI_PST32_P) || + (asi == ASI_PST32_S) || + (asi == ASI_PST8_PL) || + (asi == ASI_PST8_SL) || + (asi == ASI_PST16_PL) || + (asi == ASI_PST16_SL) || + (asi == ASI_PST32_PL) || + (asi == ASI_PST32_SL); + } + + bool AsiIsFloatingLoad(ASI asi) + { + return + (asi == ASI_FL8_P) || + (asi == ASI_FL8_S) || + (asi == ASI_FL16_P) || + (asi == ASI_FL16_S) || + (asi == ASI_FL8_PL) || + (asi == ASI_FL8_SL) || + (asi == ASI_FL16_PL) || + (asi == ASI_FL16_SL); + } + + bool AsiIsNoFault(ASI asi) + { + return + (asi == ASI_PNF) || + (asi == ASI_SNF) || + (asi == ASI_PNFL) || + (asi == ASI_SNFL); + } + + bool AsiIsScratchPad(ASI asi) + { + return + (asi == ASI_SCRATCHPAD) || + (asi == ASI_HYP_SCRATCHPAD); + } + + bool AsiIsCmt(ASI asi) + { + return + (asi == ASI_CMT_PER_STRAND) || + (asi == ASI_CMT_SHARED); + } + + bool AsiIsQueue(ASI asi) + { + return asi == ASI_QUEUE; + } + + bool AsiIsDtlb(ASI asi) + { + return + (asi == ASI_DTLB_DATA_IN_REG) || + (asi == ASI_DTLB_DATA_ACCESS_REG) || + (asi == ASI_DTLB_TAG_READ_REG); + } + + bool AsiIsMmu(ASI asi) + { + return + (asi == ASI_MMU_CONTEXTID) || + (asi == ASI_IMMU) || + (asi == ASI_MMU_REAL) || + (asi == ASI_MMU) || + (asi == ASI_DMMU) || + (asi == ASI_UMMU) || + (asi == ASI_DMMU_DEMAP); + } +} diff --git a/src/arch/sparc/asi.hh b/src/arch/sparc/asi.hh index 482e077e0..876567225 100644 --- a/src/arch/sparc/asi.hh +++ b/src/arch/sparc/asi.hh @@ -156,23 +156,23 @@ namespace SparcISA ASI_PST32_SL = 0xCD, ASI_PST32_SECONDARY_LITTLE = ASI_PST32_SL, //0xCE-0xCF implementation dependent - ASI_PL8_P = 0xD0, - ASI_PL8_PRIMARY = ASI_PL8_P, - ASI_PL8_S = 0xD1, - ASI_PL8_SECONDARY = ASI_PL8_S, - ASI_PL16_P = 0xD2, - ASI_PL16_PRIMARY = ASI_PL16_P, - ASI_PL16_S = 0xD3, - ASI_PL16_SECONDARY = ASI_PL16_S, + ASI_FL8_P = 0xD0, + ASI_FL8_PRIMARY = ASI_FL8_P, + ASI_FL8_S = 0xD1, + ASI_FL8_SECONDARY = ASI_FL8_S, + ASI_FL16_P = 0xD2, + ASI_FL16_PRIMARY = ASI_FL16_P, + ASI_FL16_S = 0xD3, + ASI_FL16_SECONDARY = ASI_FL16_S, //0xD4-0xD7 implementation dependent - ASI_PL8_PL = 0xD8, - ASI_PL8_PRIMARY_LITTLE = ASI_PL8_PL, - ASI_PL8_SL = 0xD9, - ASI_PL8_SECONDARY_LITTLE = ASI_PL8_SL, - ASI_PL16_PL = 0xDA, - ASI_PL16_PRIMARY_LITTLE = ASI_PL16_PL, - ASI_PL16_SL = 0xDB, - ASI_PL16_SECONDARY_LITTLE = ASI_PL16_SL, + ASI_FL8_PL = 0xD8, + ASI_FL8_PRIMARY_LITTLE = ASI_FL8_PL, + ASI_FL8_SL = 0xD9, + ASI_FL8_SECONDARY_LITTLE = ASI_FL8_SL, + ASI_FL16_PL = 0xDA, + ASI_FL16_PRIMARY_LITTLE = ASI_FL16_PL, + ASI_FL16_SL = 0xDB, + ASI_FL16_SECONDARY_LITTLE = ASI_FL16_SL, //0xDC-0xDF implementation dependent //0xE0-0xE1 reserved ASI_LDTX_P = 0xE2, @@ -193,9 +193,30 @@ namespace SparcISA ASI_BLK_PL = 0xF8, ASI_BLOCK_PRIMARY_LITTLE = ASI_BLK_PL, ASI_BLK_SL = 0xF9, - ASI_BLOCK_SECONDARY_LITTLE = ASI_BLK_SL + ASI_BLOCK_SECONDARY_LITTLE = ASI_BLK_SL, //0xFA-0xFF implementation dependent + MAX_ASI = 0xFF }; + + //Functions that classify an asi + bool AsiIsBlock(ASI); + bool AsiIsPrimary(ASI); + bool AsiIsSecondary(ASI); + bool AsiIsNucleus(ASI); + bool AsiIsAsIfUser(ASI); + bool AsiIsIO(ASI); + bool AsiIsReal(ASI); + bool AsiIsLittle(ASI); + bool AsiIsTwin(ASI); + bool AsiIsPartialStore(ASI); + bool AsiIsFloatingLoad(ASI); + bool AsiIsNoFault(ASI); + bool AsiIsScratchPad(ASI); + bool AsiIsCmt(ASI); + bool AsiIsQueue(ASI); + bool AsiIsDtlb(ASI); + bool AsiIsMmu(ASI); + }; #endif // __ARCH_SPARC_TLB_HH__ diff --git a/src/arch/sparc/isa/base.isa b/src/arch/sparc/isa/base.isa index b518265aa..a4c022411 100644 --- a/src/arch/sparc/isa/base.isa +++ b/src/arch/sparc/isa/base.isa @@ -77,7 +77,7 @@ output header {{ protected: // Constructor. SparcStaticInst(const char *mnem, - MachInst _machInst, OpClass __opClass) + ExtMachInst _machInst, OpClass __opClass) : StaticInst(mnem, _machInst, __opClass) { } diff --git a/src/arch/sparc/isa/bitfields.isa b/src/arch/sparc/isa/bitfields.isa index 27f52fa29..372f5c4ef 100644 --- a/src/arch/sparc/isa/bitfields.isa +++ b/src/arch/sparc/isa/bitfields.isa @@ -76,3 +76,7 @@ def bitfield SIMM11 <10:0>; def bitfield SIMM13 <12:0>; def bitfield SW_TRAP <7:0>; def bitfield X <12>; + +// Extended bitfields which aren't part of the actual instruction. + +def bitfield EXT_ASI <39:32>; diff --git a/src/arch/sparc/isa/decoder.isa b/src/arch/sparc/isa/decoder.isa index 1384b21a0..45d3616d9 100644 --- a/src/arch/sparc/isa/decoder.isa +++ b/src/arch/sparc/isa/decoder.isa @@ -441,7 +441,7 @@ decode OP default Unknown::unknown() 0x34: decode OPF{ format BasicOperate{ 0x01: fmovs({{ - Frd.uw = Frs2.uw; + Frds.uw = Frs2s.uw; //fsr.ftt = fsr.cexc = 0 Fsr &= ~(7 << 14); Fsr &= ~(0x1F); @@ -454,7 +454,7 @@ decode OP default Unknown::unknown() }}); 0x03: Trap::fmovq({{fault = new FpDisabled;}}); 0x05: fnegs({{ - Frd.uw = Frs2.uw ^ (1UL << 31); + Frds.uw = Frs2s.uw ^ (1UL << 31); //fsr.ftt = fsr.cexc = 0 Fsr &= ~(7 << 14); Fsr &= ~(0x1F); @@ -467,7 +467,7 @@ decode OP default Unknown::unknown() }}); 0x07: Trap::fnegq({{fault = new FpDisabled;}}); 0x09: fabss({{ - Frd.uw = ((1UL << 31) - 1) & Frs2.uw; + Frds.uw = ((1UL << 31) - 1) & Frs2s.uw; //fsr.ftt = fsr.cexc = 0 Fsr &= ~(7 << 14); Fsr &= ~(0x1F); @@ -479,55 +479,55 @@ decode OP default Unknown::unknown() Fsr &= ~(0x1F); }}); 0x0B: Trap::fabsq({{fault = new FpDisabled;}}); - 0x29: fsqrts({{Frd.sf = sqrt(Frs2.sf);}}); + 0x29: fsqrts({{Frds.sf = sqrt(Frs2s.sf);}}); 0x2A: fsqrtd({{Frd.df = sqrt(Frs2.df);}}); 0x2B: Trap::fsqrtq({{fault = new FpDisabled;}}); - 0x41: fadds({{Frd.sf = Frs1.sf + Frs2.sf;}}); + 0x41: fadds({{Frds.sf = Frs1s.sf + Frs2s.sf;}}); 0x42: faddd({{Frd.df = Frs1.df + Frs2.df;}}); 0x43: Trap::faddq({{fault = new FpDisabled;}}); - 0x45: fsubs({{Frd.sf = Frs1.sf - Frs2.sf;}}); + 0x45: fsubs({{Frds.sf = Frs1s.sf - Frs2s.sf;}}); 0x46: fsubd({{Frd.df = Frs1.df - Frs2.df;}}); 0x47: Trap::fsubq({{fault = new FpDisabled;}}); - 0x49: fmuls({{Frd.sf = Frs1.sf * Frs2.sf;}}); + 0x49: fmuls({{Frds.sf = Frs1s.sf * Frs2s.sf;}}); 0x4A: fmuld({{Frd.df = Frs1.df * Frs2.df;}}); 0x4B: Trap::fmulq({{fault = new FpDisabled;}}); - 0x4D: fdivs({{Frd.sf = Frs1.sf / Frs2.sf;}}); + 0x4D: fdivs({{Frds.sf = Frs1s.sf / Frs2s.sf;}}); 0x4E: fdivd({{Frd.df = Frs1.df / Frs2.df;}}); 0x4F: Trap::fdivq({{fault = new FpDisabled;}}); - 0x69: fsmuld({{Frd.df = Frs1.sf * Frs2.sf;}}); + 0x69: fsmuld({{Frd.df = Frs1s.sf * Frs2s.sf;}}); 0x6E: Trap::fdmulq({{fault = new FpDisabled;}}); 0x81: fstox({{ - Frd.df = (double)static_cast<int64_t>(Frs2.sf); + Frd.df = (double)static_cast<int64_t>(Frs2s.sf); }}); 0x82: fdtox({{ Frd.df = (double)static_cast<int64_t>(Frs2.df); }}); 0x83: Trap::fqtox({{fault = new FpDisabled;}}); 0x84: fxtos({{ - Frd.sf = static_cast<float>((int64_t)Frs2.df); + Frds.sf = static_cast<float>((int64_t)Frs2.df); }}); 0x88: fxtod({{ Frd.df = static_cast<double>((int64_t)Frs2.df); }}); 0x8C: Trap::fxtoq({{fault = new FpDisabled;}}); 0xC4: fitos({{ - Frd.sf = static_cast<float>((int32_t)Frs2.sf); + Frds.sf = static_cast<float>((int32_t)Frs2s.sf); }}); - 0xC6: fdtos({{Frd.sf = Frs2.df;}}); + 0xC6: fdtos({{Frds.sf = Frs2.df;}}); 0xC7: Trap::fqtos({{fault = new FpDisabled;}}); 0xC8: fitod({{ - Frd.df = static_cast<double>((int32_t)Frs2.sf); + Frd.df = static_cast<double>((int32_t)Frs2s.sf); }}); - 0xC9: fstod({{Frd.df = Frs2.sf;}}); + 0xC9: fstod({{Frd.df = Frs2s.sf;}}); 0xCB: Trap::fqtod({{fault = new FpDisabled;}}); 0xCC: Trap::fitoq({{fault = new FpDisabled;}}); 0xCD: Trap::fstoq({{fault = new FpDisabled;}}); 0xCE: Trap::fdtoq({{fault = new FpDisabled;}}); 0xD1: fstoi({{ - Frd.sf = (float)static_cast<int32_t>(Frs2.sf); + Frds.sf = (float)static_cast<int32_t>(Frs2s.sf); }}); 0xD2: fdtoi({{ - Frd.sf = (float)static_cast<int32_t>(Frs2.df); + Frds.sf = (float)static_cast<int32_t>(Frs2.df); }}); 0xD3: Trap::fqtoi({{fault = new FpDisabled;}}); default: Trap::fpop1({{fault = new FpDisabled;}}); @@ -620,7 +620,7 @@ decode OP default Unknown::unknown() 0x56: Trap::fpsub32({{fault = new IllegalInstruction;}}); 0x57: Trap::fpsub32s({{fault = new IllegalInstruction;}}); 0x60: BasicOperate::fzero({{Frd.df = 0;}}); - 0x61: BasicOperate::fzeros({{Frd.sf = 0;}}); + 0x61: BasicOperate::fzeros({{Frds.sf = 0;}}); 0x62: Trap::fnor({{fault = new IllegalInstruction;}}); 0x63: Trap::fnors({{fault = new IllegalInstruction;}}); 0x64: Trap::fandnot2({{fault = new IllegalInstruction;}}); @@ -629,7 +629,7 @@ decode OP default Unknown::unknown() Frd.df = (double)(~((uint64_t)Frs2.df)); }}); 0x67: BasicOperate::fnot2s({{ - Frd.sf = (float)(~((uint32_t)Frs2.sf)); + Frds.sf = (float)(~((uint32_t)Frs2s.sf)); }}); 0x68: Trap::fandnot1({{fault = new IllegalInstruction;}}); 0x69: Trap::fandnot1s({{fault = new IllegalInstruction;}}); @@ -637,7 +637,7 @@ decode OP default Unknown::unknown() Frd.df = (double)(~((uint64_t)Frs1.df)); }}); 0x6B: BasicOperate::fnot1s({{ - Frd.sf = (float)(~((uint32_t)Frs1.sf)); + Frds.sf = (float)(~((uint32_t)Frs1s.sf)); }}); 0x6C: Trap::fxor({{fault = new IllegalInstruction;}}); 0x6D: Trap::fxors({{fault = new IllegalInstruction;}}); @@ -820,92 +820,248 @@ decode OP default Unknown::unknown() } 0x3: decode OP3 { format Load { - 0x00: lduw({{Rd = Mem;}}, {{32}}); - 0x01: ldub({{Rd = Mem;}}, {{8}}); - 0x02: lduh({{Rd = Mem;}}, {{16}}); + 0x00: lduw({{Rd = Mem.uw;}}); + 0x01: ldub({{Rd = Mem.ub;}}); + 0x02: lduh({{Rd = Mem.uhw;}}); 0x03: ldd({{ - uint64_t val = Mem; + uint64_t val = Mem.udw; RdLow = val<31:0>; RdHigh = val<63:32>; - }}, {{64}}); + }}); } format Store { - 0x04: stw({{Mem = Rd.sw;}}, {{32}}); - 0x05: stb({{Mem = Rd.sb;}}, {{8}}); - 0x06: sth({{Mem = Rd.shw;}}, {{16}}); - 0x07: std({{Mem = RdLow<31:0> | (RdHigh<31:0> << 32);}}, {{64}}); + 0x04: stw({{Mem.uw = Rd.sw;}}); + 0x05: stb({{Mem.ub = Rd.sb;}}); + 0x06: sth({{Mem.uhw = Rd.shw;}}); + 0x07: std({{Mem.udw = RdLow<31:0> | (RdHigh<31:0> << 32);}}); } format Load { - 0x08: ldsw({{Rd = (int32_t)Mem;}}, {{32}}); - 0x09: ldsb({{Rd = (int8_t)Mem;}}, {{8}}); - 0x0A: ldsh({{Rd = (int16_t)Mem;}}, {{16}}); - 0x0B: ldx({{Rd = (int64_t)Mem;}}, {{64}}); + 0x08: ldsw({{Rd = (int32_t)Mem.sw;}}); + 0x09: ldsb({{Rd = (int8_t)Mem.sb;}}); + 0x0A: ldsh({{Rd = (int16_t)Mem.shw;}}); + 0x0B: ldx({{Rd = (int64_t)Mem.sdw;}}); 0x0D: ldstub({{ - Rd = Mem; - Mem = 0xFF; - }}, {{8}}); + Rd = Mem.ub; + Mem.ub = 0xFF; + }}); } - 0x0E: Store::stx({{Mem = Rd}}, {{64}}); - 0x0F: LoadStore::swap({{ - uint32_t temp = Rd; - Rd = Mem; - Mem = temp; - }}, {{32}}); + 0x0E: Store::stx({{Mem.udw = Rd}}); + 0x0F: LoadStore::swap( + {{*temp = Rd.uw; + Rd.uw = Mem.uw;}}, + {{Mem.uw = *temp;}}); format Load { - 0x10: lduwa({{Rd = Mem;}}, {{32}}); - 0x11: lduba({{Rd = Mem;}}, {{8}}); - 0x12: lduha({{Rd = Mem;}}, {{16}}); + 0x10: lduwa({{Rd = Mem.uw;}}); + 0x11: lduba({{Rd = Mem.ub;}}); + 0x12: lduha({{Rd = Mem.uhw;}}); 0x13: ldda({{ - uint64_t val = Mem; + uint64_t val = Mem.udw; RdLow = val<31:0>; RdHigh = val<63:32>; - }}, {{64}}); + }}); } format Store { - 0x14: stwa({{Mem = Rd;}}, {{32}}); - 0x15: stba({{Mem = Rd;}}, {{8}}); - 0x16: stha({{Mem = Rd;}}, {{16}}); - 0x17: stda({{Mem = RdLow<31:0> | RdHigh<31:0> << 32;}}, {{64}}); + 0x14: stwa({{Mem.uw = Rd;}}); + 0x15: stba({{Mem.ub = Rd;}}); + 0x16: stha({{Mem.uhw = Rd;}}); + 0x17: stda({{Mem.udw = RdLow<31:0> | RdHigh<31:0> << 32;}}); } format Load { - 0x18: ldswa({{Rd = (int32_t)Mem;}}, {{32}}); - 0x19: ldsba({{Rd = (int8_t)Mem;}}, {{8}}); - 0x1A: ldsha({{Rd = (int16_t)Mem;}}, {{16}}); - 0x1B: ldxa({{Rd = (int64_t)Mem;}}, {{64}}); + 0x18: ldswa({{Rd = (int32_t)Mem.sw;}}); + 0x19: ldsba({{Rd = (int8_t)Mem.sb;}}); + 0x1A: ldsha({{Rd = (int16_t)Mem.shw;}}); + 0x1B: ldxa({{Rd = (int64_t)Mem.sdw;}}); } - 0x1D: LoadStore::ldstuba({{ - Rd = Mem; - Mem = 0xFF; - }}, {{8}}); - 0x1E: Store::stxa({{Mem = Rd}}, {{64}}); - 0x1F: LoadStore::swapa({{ - uint32_t temp = Rd; - Rd = Mem; - Mem = temp; - }}, {{32}}); + 0x1D: LoadStore::ldstuba( + {{Rd = Mem.ub;}}, + {{Mem.ub = 0xFF}}); + 0x1E: Store::stxa({{Mem.udw = Rd}}); + 0x1F: LoadStore::swapa( + {{*temp = Rd.uw; + Rd.uw = Mem.uw;}}, + {{Mem.uw = *temp;}}); format Trap { - 0x20: Load::ldf({{Frd.uw = Mem;}}, {{32}}); + 0x20: Load::ldf({{Frd.uw = Mem.uw;}}); 0x21: decode X { - 0x0: Load::ldfsr({{Fsr = Mem<31:0> | Fsr<63:32>;}}, {{32}}); - 0x1: Load::ldxfsr({{Fsr = Mem;}}, {{64}}); + 0x0: Load::ldfsr({{Fsr = Mem.uw | Fsr<63:32>;}}); + 0x1: Load::ldxfsr({{Fsr = Mem.udw;}}); } 0x22: ldqf({{fault = new FpDisabled;}}); - 0x23: Load::lddf({{Frd.udw = Mem;}}, {{64}}); - 0x24: Store::stf({{Mem = Frd.uw;}}, {{32}}); + 0x23: Load::lddf({{Frd.udw = Mem.udw;}}); + 0x24: Store::stf({{Mem.uw = Frd.uw;}}); 0x25: decode X { - 0x0: Store::stfsr({{Mem = Fsr<31:0>;}}, {{32}}); - 0x1: Store::stxfsr({{Mem = Fsr;}}, {{64}}); + 0x0: Store::stfsr({{Mem.uw = Fsr<31:0>;}}); + 0x1: Store::stxfsr({{Mem.udw = Fsr;}}); } 0x26: stqf({{fault = new FpDisabled;}}); - 0x27: Store::stdf({{Mem = Frd.udw;}}, {{64}}); + 0x27: Store::stdf({{Mem.udw = Frd.udw;}}); 0x2D: Nop::prefetch({{ }}); - 0x30: Load::ldfa({{Frd.uw = Mem;}}, {{32}}); + 0x30: Load::ldfa({{Frd.uw = Mem.uw;}}); 0x32: ldqfa({{fault = new FpDisabled;}}); - 0x33: Load::lddfa({{Frd.udw = Mem;}}, {{64}}); - 0x34: Store::stfa({{Mem = Frd.uw;}}, {{32}}); + format LoadAlt { + 0x33: decode EXT_ASI { + //ASI_NUCLEUS + 0x04: FailUnimpl::lddfa_n(); + //ASI_NUCLEUS_LITTLE + 0x0C: FailUnimpl::lddfa_nl(); + //ASI_AS_IF_USER_PRIMARY + 0x10: FailUnimpl::lddfa_aiup(); + //ASI_AS_IF_USER_PRIMARY_LITTLE + 0x18: FailUnimpl::lddfa_aiupl(); + //ASI_AS_IF_USER_SECONDARY + 0x11: FailUnimpl::lddfa_aius(); + //ASI_AS_IF_USER_SECONDARY_LITTLE + 0x19: FailUnimpl::lddfa_aiusl(); + //ASI_REAL + 0x14: FailUnimpl::lddfa_real(); + //ASI_REAL_LITTLE + 0x1C: FailUnimpl::lddfa_real_l(); + //ASI_REAL_IO + 0x15: FailUnimpl::lddfa_real_io(); + //ASI_REAL_IO_LITTLE + 0x1D: FailUnimpl::lddfa_real_io_l(); + //ASI_PRIMARY + 0x80: FailUnimpl::lddfa_p(); + //ASI_PRIMARY_LITTLE + 0x88: FailUnimpl::lddfa_pl(); + //ASI_SECONDARY + 0x81: FailUnimpl::lddfa_s(); + //ASI_SECONDARY_LITTLE + 0x89: FailUnimpl::lddfa_sl(); + //ASI_PRIMARY_NO_FAULT + 0x82: FailUnimpl::lddfa_pnf(); + //ASI_PRIMARY_NO_FAULT_LITTLE + 0x8A: FailUnimpl::lddfa_pnfl(); + //ASI_SECONDARY_NO_FAULT + 0x83: FailUnimpl::lddfa_snf(); + //ASI_SECONDARY_NO_FAULT_LITTLE + 0x8B: FailUnimpl::lddfa_snfl(); + + format BlockLoad { + // LDBLOCKF + //ASI_BLOCK_AS_IF_USER_PRIMARY + 0x16: FailUnimpl::ldblockf_aiup(); + //ASI_BLOCK_AS_IF_USER_SECONDARY + 0x17: FailUnimpl::ldblockf_aius(); + //ASI_BLOCK_AS_IF_USER_PRIMARY_LITTLE + 0x1E: FailUnimpl::ldblockf_aiupl(); + //ASI_BLOCK_AS_IF_USER_SECONDARY_LITTLE + 0x1F: FailUnimpl::ldblockf_aiusl(); + //ASI_BLOCK_PRIMARY + 0xF0: ldblockf_p({{Frd_N.udw = Mem.udw;}}); + //ASI_BLOCK_SECONDARY + 0xF1: FailUnimpl::ldblockf_s(); + //ASI_BLOCK_PRIMARY_LITTLE + 0xF8: FailUnimpl::ldblockf_pl(); + //ASI_BLOCK_SECONDARY_LITTLE + 0xF9: FailUnimpl::ldblockf_sl(); + } + + //LDSHORTF + //ASI_FL8_PRIMARY + 0xD0: FailUnimpl::ldshortf_8p(); + //ASI_FL8_SECONDARY + 0xD1: FailUnimpl::ldshortf_8s(); + //ASI_FL8_PRIMARY_LITTLE + 0xD8: FailUnimpl::ldshortf_8pl(); + //ASI_FL8_SECONDARY_LITTLE + 0xD9: FailUnimpl::ldshortf_8sl(); + //ASI_FL16_PRIMARY + 0xD2: FailUnimpl::ldshortf_16p(); + //ASI_FL16_SECONDARY + 0xD3: FailUnimpl::ldshortf_16s(); + //ASI_FL16_PRIMARY_LITTLE + 0xDA: FailUnimpl::ldshortf_16pl(); + //ASI_FL16_SECONDARY_LITTLE + 0xDB: FailUnimpl::ldshortf_16sl(); + //Not an ASI which is legal with lddfa + default: Trap::lddfa_bad_asi( + {{fault = new DataAccessException;}}); + } + } + 0x34: Store::stfa({{Mem.uw = Frd.uw;}}); 0x36: stqfa({{fault = new FpDisabled;}}); - //XXX need to work in the ASI thing - 0x37: Store::stdfa({{Mem = Frd.udw;}}, {{64}}); + format StoreAlt { + 0x37: decode EXT_ASI { + //ASI_NUCLEUS + 0x04: FailUnimpl::stdfa_n(); + //ASI_NUCLEUS_LITTLE + 0x0C: FailUnimpl::stdfa_nl(); + //ASI_AS_IF_USER_PRIMARY + 0x10: FailUnimpl::stdfa_aiup(); + //ASI_AS_IF_USER_PRIMARY_LITTLE + 0x18: FailUnimpl::stdfa_aiupl(); + //ASI_AS_IF_USER_SECONDARY + 0x11: FailUnimpl::stdfa_aius(); + //ASI_AS_IF_USER_SECONDARY_LITTLE + 0x19: FailUnimpl::stdfa_aiusl(); + //ASI_REAL + 0x14: FailUnimpl::stdfa_real(); + //ASI_REAL_LITTLE + 0x1C: FailUnimpl::stdfa_real_l(); + //ASI_REAL_IO + 0x15: FailUnimpl::stdfa_real_io(); + //ASI_REAL_IO_LITTLE + 0x1D: FailUnimpl::stdfa_real_io_l(); + //ASI_PRIMARY + 0x80: FailUnimpl::stdfa_p(); + //ASI_PRIMARY_LITTLE + 0x88: FailUnimpl::stdfa_pl(); + //ASI_SECONDARY + 0x81: FailUnimpl::stdfa_s(); + //ASI_SECONDARY_LITTLE + 0x89: FailUnimpl::stdfa_sl(); + //ASI_PRIMARY_NO_FAULT + 0x82: FailUnimpl::stdfa_pnf(); + //ASI_PRIMARY_NO_FAULT_LITTLE + 0x8A: FailUnimpl::stdfa_pnfl(); + //ASI_SECONDARY_NO_FAULT + 0x83: FailUnimpl::stdfa_snf(); + //ASI_SECONDARY_NO_FAULT_LITTLE + 0x8B: FailUnimpl::stdfa_snfl(); + + format BlockStore { + // STBLOCKF + //ASI_BLOCK_AS_IF_USER_PRIMARY + 0x16: FailUnimpl::stblockf_aiup(); + //ASI_BLOCK_AS_IF_USER_SECONDARY + 0x17: FailUnimpl::stblockf_aius(); + //ASI_BLOCK_AS_IF_USER_PRIMARY_LITTLE + 0x1E: FailUnimpl::stblockf_aiupl(); + //ASI_BLOCK_AS_IF_USER_SECONDARY_LITTLE + 0x1F: FailUnimpl::stblockf_aiusl(); + //ASI_BLOCK_PRIMARY + 0xF0: stblockf_p({{Mem.udw = Frd_N.udw;}}); + //ASI_BLOCK_SECONDARY + 0xF1: FailUnimpl::stblockf_s(); + //ASI_BLOCK_PRIMARY_LITTLE + 0xF8: FailUnimpl::stblockf_pl(); + //ASI_BLOCK_SECONDARY_LITTLE + 0xF9: FailUnimpl::stblockf_sl(); + } + + //STSHORTF + //ASI_FL8_PRIMARY + 0xD0: FailUnimpl::stshortf_8p(); + //ASI_FL8_SECONDARY + 0xD1: FailUnimpl::stshortf_8s(); + //ASI_FL8_PRIMARY_LITTLE + 0xD8: FailUnimpl::stshortf_8pl(); + //ASI_FL8_SECONDARY_LITTLE + 0xD9: FailUnimpl::stshortf_8sl(); + //ASI_FL16_PRIMARY + 0xD2: FailUnimpl::stshortf_16p(); + //ASI_FL16_SECONDARY + 0xD3: FailUnimpl::stshortf_16s(); + //ASI_FL16_PRIMARY_LITTLE + 0xDA: FailUnimpl::stshortf_16pl(); + //ASI_FL16_SECONDARY_LITTLE + 0xDB: FailUnimpl::stshortf_16sl(); + //Not an ASI which is legal with lddfa + default: Trap::stdfa_bad_asi( + {{fault = new DataAccessException;}}); + } + } 0x3C: Cas::casa({{ uint64_t val = Mem.uw; if(Rs2.uw == val) diff --git a/src/arch/sparc/isa/formats.isa b/src/arch/sparc/isa/formats.isa deleted file mode 100644 index 17d68061b..000000000 --- a/src/arch/sparc/isa/formats.isa +++ /dev/null @@ -1,28 +0,0 @@ -//Include the basic format -//Templates from this format are used later -##include "formats/basic.isa" - -//Include the noop format -##include "formats/nop.isa" - -//Include the integerOp and integerOpCc format -##include "formats/integerop.isa" - -//Include the memory format -##include "formats/mem.isa" - -//Include the compare and swap format -##include "formats/cas.isa" - -//Include the trap format -##include "formats/trap.isa" - -//Include the "unknown" format -##include "formats/unknown.isa" - -//Include the priveleged mode format -##include "formats/priv.isa" - -//Include the branch format -##include "formats/branch.isa" - diff --git a/src/arch/sparc/isa/formats/basic.isa b/src/arch/sparc/isa/formats/basic.isa index 0a47a7ffe..a4c05387b 100644 --- a/src/arch/sparc/isa/formats/basic.isa +++ b/src/arch/sparc/isa/formats/basic.isa @@ -33,6 +33,14 @@ def template BasicExecDeclare {{ Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const; }}; +// Definitions of execute methods that panic. +def template BasicExecPanic {{ + Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const + { + panic("Execute method called when it shouldn't!"); + } +}}; + // Basic instruction class declaration template. def template BasicDeclare {{ /** @@ -42,14 +50,14 @@ def template BasicDeclare {{ { public: // Constructor. - %(class_name)s(MachInst machInst); + %(class_name)s(ExtMachInst machInst); %(BasicExecDeclare)s }; }}; // Basic instruction class constructor template. def template BasicConstructor {{ - inline %(class_name)s::%(class_name)s(MachInst machInst) + inline %(class_name)s::%(class_name)s(ExtMachInst machInst) : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) { %(constructor)s; @@ -80,6 +88,11 @@ def template BasicDecode {{ return new %(class_name)s(machInst); }}; +// Basic decode template, passing mnemonic in as string arg to constructor. +def template BasicDecodeWithMnemonic {{ + return new %(class_name)s("%(mnemonic)s", machInst); +}}; + // The most basic instruction format... used only for a few misc. insts def format BasicOperate(code, *flags) {{ iop = InstObjParams(name, Name, 'SparcStaticInst', diff --git a/src/arch/sparc/isa/formats/branch.isa b/src/arch/sparc/isa/formats/branch.isa index 2c206354b..5fb7ade2d 100644 --- a/src/arch/sparc/isa/formats/branch.isa +++ b/src/arch/sparc/isa/formats/branch.isa @@ -80,7 +80,7 @@ output header {{ OpClass __opClass) : BranchDisp(mnem, _machInst, __opClass) { - disp = sign_ext(_machInst << 2, bits + 2); + disp = sext<bits + 2>((_machInst & mask(bits)) << 2); } }; @@ -95,7 +95,7 @@ output header {{ OpClass __opClass) : BranchDisp(mnem, _machInst, __opClass) { - disp = sign_ext((D16HI << 16) | (D16LO << 2), 18); + disp = sext<18>((D16HI << 16) | (D16LO << 2)); } }; @@ -108,7 +108,7 @@ output header {{ protected: // Constructor BranchImm13(const char *mnem, MachInst _machInst, OpClass __opClass) : - Branch(mnem, _machInst, __opClass), imm(sign_ext(SIMM13, 13)) + Branch(mnem, _machInst, __opClass), imm(sext<13>(SIMM13)) { } diff --git a/src/arch/sparc/isa/formats/formats.isa b/src/arch/sparc/isa/formats/formats.isa new file mode 100644 index 000000000..5b81a1ab1 --- /dev/null +++ b/src/arch/sparc/isa/formats/formats.isa @@ -0,0 +1,62 @@ +// Copyright (c) 2006 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//Include the basic format +//Templates from this format are used later +##include "basic.isa" + +//Include base classes for microcoding instructions +##include "micro.isa" + +//Include the noop format +##include "nop.isa" + +//Include the integerOp and integerOpCc format +##include "integerop.isa" + +//Include the memory formats +##include "mem/mem.isa" + +//Include the compare and swap format +##include "cas.isa" + +//Include the trap format +##include "trap.isa" + +//Include the unimplemented format +##include "unimp.isa" + +//Include the "unknown" format +##include "unknown.isa" + +//Include the priveleged mode format +##include "priv.isa" + +//Include the branch format +##include "branch.isa" + diff --git a/src/arch/sparc/isa/formats/integerop.isa b/src/arch/sparc/isa/formats/integerop.isa index 83c7e6958..4f8ebebcc 100644 --- a/src/arch/sparc/isa/formats/integerop.isa +++ b/src/arch/sparc/isa/formats/integerop.isa @@ -87,7 +87,7 @@ output header {{ OpClass __opClass) : IntOpImm(mnem, _machInst, __opClass) { - imm = sign_ext(SIMM10, 10); + imm = sext<10>(SIMM10); } }; @@ -102,7 +102,7 @@ output header {{ OpClass __opClass) : IntOpImm(mnem, _machInst, __opClass) { - imm = sign_ext(SIMM11, 11); + imm = sext<11>(SIMM11); } }; @@ -117,7 +117,7 @@ output header {{ OpClass __opClass) : IntOpImm(mnem, _machInst, __opClass) { - imm = sign_ext(SIMM13, 13); + imm = sext<13>(SIMM13); } }; @@ -264,13 +264,13 @@ let {{ (usesImm, code, immCode, rString, iString) = splitOutImm(code) iop = InstObjParams(name, Name, 'IntOp', code, - opt_flags, ("cc_code", ccCode)) + opt_flags, {"cc_code": ccCode}) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) exec_output = IntOpExecute.subst(iop) if usesImm: imm_iop = InstObjParams(name, Name + 'Imm', 'IntOpImm' + iString, - immCode, opt_flags, ("cc_code", ccCode)) + immCode, opt_flags, {"cc_code": ccCode}) header_output += BasicDeclare.subst(imm_iop) decoder_output += BasicConstructor.subst(imm_iop) exec_output += IntOpExecute.subst(imm_iop) @@ -341,7 +341,7 @@ def format IntOpCcRes(code, *opt_flags) {{ def format SetHi(code, *opt_flags) {{ iop = InstObjParams(name, Name, 'SetHi', - code, opt_flags, ("cc_code", '')) + code, opt_flags, {"cc_code": ''}) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) exec_output = IntOpExecute.subst(iop) diff --git a/src/arch/sparc/isa/formats/mem.isa b/src/arch/sparc/isa/formats/mem/basicmem.isa index 9011c1fc6..c13194d0f 100644 --- a/src/arch/sparc/isa/formats/mem.isa +++ b/src/arch/sparc/isa/formats/mem/basicmem.isa @@ -1,3 +1,32 @@ +// Copyright (c) 2006 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Ali Saidi +// Gabe Black + //////////////////////////////////////////////////////////////////// // // Mem instructions @@ -30,15 +59,13 @@ output header {{ // Constructor MemImm(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : - Mem(mnem, _machInst, __opClass) - { - imm = sign_ext(SIMM13, 13); - } + Mem(mnem, _machInst, __opClass), imm(sext<13>(SIMM13)) + {} std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; - int32_t imm; + const int32_t imm; }; }}; @@ -99,73 +126,69 @@ output decoder {{ } }}; -def template MemExecute {{ - Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, - Trace::InstRecord *traceData) const +def template MemDeclare {{ + /** + * Static instruction class for "%(mnemonic)s". + */ + class %(class_name)s : public %(base_class)s { - Fault fault = NoFault; - Addr EA; - %(op_decl)s; - %(op_rd)s; - %(ea_code)s; - DPRINTF(Sparc, "The address is 0x%x\n", EA); - %(load)s; - %(code)s; - - if(fault == NoFault) - { - %(store)s; - //Write the resulting state to the execution context - %(op_wb)s; - } + public: - return fault; - } + /// Constructor. + %(class_name)s(ExtMachInst machInst); + + %(BasicExecDeclare)s + + %(InitiateAccDeclare)s + + %(CompleteAccDeclare)s + }; }}; let {{ - # Leave memAccessFlags at 0 for now - loadString = "xc->read(EA, (uint%(width)s_t&)Mem, 0);" - storeString = "uint64_t write_result = 0; \ - xc->write((uint%(width)s_t)Mem, EA, 0, &write_result);" - - def doMemFormat(code, load, store, name, Name, opt_flags): + def doMemFormat(code, execute, faultCode, name, Name, opt_flags): addrCalcReg = 'EA = Rs1 + Rs2;' addrCalcImm = 'EA = Rs1 + imm;' iop = InstObjParams(name, Name, 'Mem', code, - opt_flags, ("ea_code", addrCalcReg), - ("load", load), ("store", store)) - iop_imm = InstObjParams(name, Name + 'Imm', 'MemImm', code, - opt_flags, ("ea_code", addrCalcImm), - ("load", load), ("store", store)) - header_output = BasicDeclare.subst(iop) + BasicDeclare.subst(iop_imm) + opt_flags, {"fault_check": faultCode, "ea_code": addrCalcReg}) + iop_imm = InstObjParams(name, Name + "Imm", 'MemImm', code, + opt_flags, {"fault_check": faultCode, "ea_code": addrCalcImm}) + header_output = MemDeclare.subst(iop) + MemDeclare.subst(iop_imm) decoder_output = BasicConstructor.subst(iop) + BasicConstructor.subst(iop_imm) decode_block = ROrImmDecode.subst(iop) - exec_output = MemExecute.subst(iop) + MemExecute.subst(iop_imm) + exec_output = doSplitExecute(code, addrCalcReg, addrCalcImm, execute, + faultCode, name, name + "Imm", Name, Name + "Imm", opt_flags) return (header_output, decoder_output, exec_output, decode_block) }}; -def format Load(code, width, *opt_flags) {{ +def format LoadAlt(code, *opt_flags) {{ (header_output, decoder_output, exec_output, - decode_block) = doMemFormat(code, - loadString % {"width":width}, '', name, Name, opt_flags) + decode_block) = doMemFormat(code, LoadExecute, + AlternateAsiPrivFaultCheck, name, Name, opt_flags) }}; -def format Store(code, width, *opt_flags) {{ +def format StoreAlt(code, *opt_flags) {{ (header_output, decoder_output, exec_output, - decode_block) = doMemFormat(code, '', - storeString % {"width":width}, name, Name, opt_flags) + decode_block) = doMemFormat(code, StoreExecute, + AlternateAsiPrivFaultCheck, name, Name, opt_flags) +}}; + +def format Load(code, *opt_flags) {{ + (header_output, + decoder_output, + exec_output, + decode_block) = doMemFormat(code, + LoadExecute, '', name, Name, opt_flags) }}; -def format LoadStore(code, width, *opt_flags) {{ +def format Store(code, *opt_flags) {{ (header_output, decoder_output, exec_output, decode_block) = doMemFormat(code, - loadString % {"width":width}, storeString % {"width":width}, - name, Name, opt_flags) + StoreExecute, '', name, Name, opt_flags) }}; diff --git a/src/arch/sparc/isa/formats/mem/blockmem.isa b/src/arch/sparc/isa/formats/mem/blockmem.isa new file mode 100644 index 000000000..93ad1b2b8 --- /dev/null +++ b/src/arch/sparc/isa/formats/mem/blockmem.isa @@ -0,0 +1,337 @@ +// Copyright (c) 2006 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Ali Saidi +// Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Block Memory instructions +// + +output header {{ + + class BlockMem : public SparcMacroInst + { + protected: + + // Constructor + // We make the assumption that all block memory operations + // Will take 8 instructions to execute + BlockMem(const char *mnem, ExtMachInst _machInst) : + SparcMacroInst(mnem, _machInst, No_OpClass, 8) + {} + }; + + class BlockMemImm : public BlockMem + { + protected: + + // Constructor + BlockMemImm(const char *mnem, ExtMachInst _machInst) : + BlockMem(mnem, _machInst) + {} + }; + + class BlockMemMicro : public SparcDelayedMicroInst + { + protected: + + // Constructor + BlockMemMicro(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, int8_t _offset) : + SparcDelayedMicroInst(mnem, _machInst, __opClass), + offset(_offset) + {} + + std::string generateDisassembly(Addr pc, + const SymbolTable *symtab) const; + + const int8_t offset; + }; + + class BlockMemImmMicro : public BlockMemMicro + { + protected: + + // Constructor + BlockMemImmMicro(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, int8_t _offset) : + BlockMemMicro(mnem, _machInst, __opClass, _offset), + imm(sext<13>(SIMM13)) + {} + + std::string generateDisassembly(Addr pc, + const SymbolTable *symtab) const; + + const int32_t imm; + }; +}}; + +output decoder {{ + std::string BlockMemMicro::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { + std::stringstream response; + bool load = flags[IsLoad]; + bool save = flags[IsStore]; + + printMnemonic(response, mnemonic); + if(save) + { + printReg(response, _srcRegIdx[0]); + ccprintf(response, ", "); + } + ccprintf(response, "[ "); + printReg(response, _srcRegIdx[!save ? 0 : 1]); + ccprintf(response, " + "); + printReg(response, _srcRegIdx[!save ? 1 : 2]); + ccprintf(response, " ]"); + if(load) + { + ccprintf(response, ", "); + printReg(response, _destRegIdx[0]); + } + + return response.str(); + } + + std::string BlockMemImmMicro::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { + std::stringstream response; + bool load = flags[IsLoad]; + bool save = flags[IsStore]; + + printMnemonic(response, mnemonic); + if(save) + { + printReg(response, _srcRegIdx[1]); + ccprintf(response, ", "); + } + ccprintf(response, "[ "); + printReg(response, _srcRegIdx[0]); + if(imm >= 0) + ccprintf(response, " + 0x%x ]", imm); + else + ccprintf(response, " + -0x%x ]", -imm); + if(load) + { + ccprintf(response, ", "); + printReg(response, _destRegIdx[0]); + } + + return response.str(); + } + +}}; + +def template BlockMemDeclare {{ + /** + * Static instruction class for a block memory operation + */ + class %(class_name)s : public %(base_class)s + { + public: + //Constructor + %(class_name)s(ExtMachInst machInst); + + protected: + class %(class_name)s_0 : public %(base_class)sMicro + { + public: + //Constructor + %(class_name)s_0(ExtMachInst machInst); + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; + + class %(class_name)s_1 : public %(base_class)sMicro + { + public: + //Constructor + %(class_name)s_1(ExtMachInst machInst); + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; + + class %(class_name)s_2 : public %(base_class)sMicro + { + public: + //Constructor + %(class_name)s_2(ExtMachInst machInst); + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; + + class %(class_name)s_3 : public %(base_class)sMicro + { + public: + //Constructor + %(class_name)s_3(ExtMachInst machInst); + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; + + class %(class_name)s_4 : public %(base_class)sMicro + { + public: + //Constructor + %(class_name)s_4(ExtMachInst machInst); + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; + + class %(class_name)s_5 : public %(base_class)sMicro + { + public: + //Constructor + %(class_name)s_5(ExtMachInst machInst); + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; + + class %(class_name)s_6 : public %(base_class)sMicro + { + public: + //Constructor + %(class_name)s_6(ExtMachInst machInst); + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; + + class %(class_name)s_7 : public %(base_class)sMicro + { + public: + //Constructor + %(class_name)s_7(ExtMachInst machInst); + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; + }; +}}; + +// Basic instruction class constructor template. +def template BlockMemConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst) + : %(base_class)s("%(mnemonic)s", machInst) + { + %(constructor)s; + microOps[0] = new %(class_name)s_0(machInst); + microOps[1] = new %(class_name)s_1(machInst); + microOps[2] = new %(class_name)s_2(machInst); + microOps[3] = new %(class_name)s_3(machInst); + microOps[4] = new %(class_name)s_4(machInst); + microOps[5] = new %(class_name)s_5(machInst); + microOps[6] = new %(class_name)s_6(machInst); + microOps[7] = new %(class_name)s_7(machInst); + } +}}; + +def template BlockMemMicroConstructor {{ + inline %(class_name)s:: + %(class_name)s_%(micro_pc)s:: + %(class_name)s_%(micro_pc)s(ExtMachInst machInst) : + %(base_class)sMicro("%(mnemonic)s[%(micro_pc)s]", + machInst, %(op_class)s, %(micro_pc)s * 8) + { + %(constructor)s; + %(set_flags)s; + } +}}; + +let {{ + + def doBlockMemFormat(code, faultCode, execute, name, Name, opt_flags): + # XXX Need to take care of pstate.hpriv as well. The lower ASIs + # are split into ones that are available in priv and hpriv, and + # those that are only available in hpriv + addrCalcReg = 'EA = Rs1 + Rs2 + offset;' + addrCalcImm = 'EA = Rs1 + imm + offset;' + iop = InstObjParams(name, Name, 'BlockMem', code, opt_flags) + iop_imm = InstObjParams(name, Name + 'Imm', 'BlockMemImm', code, opt_flags) + header_output = BlockMemDeclare.subst(iop) + BlockMemDeclare.subst(iop_imm) + decoder_output = BlockMemConstructor.subst(iop) + BlockMemConstructor.subst(iop_imm) + decode_block = ROrImmDecode.subst(iop) + matcher = re.compile(r'Frd_N') + exec_output = '' + for microPc in range(8): + flag_code = '' + if (microPc == 7): + flag_code = "flags[IsLastMicroOp] = true;" + pcedCode = matcher.sub("Frd_%d" % microPc, code) + iop = InstObjParams(name, Name, 'BlockMem', pcedCode, + opt_flags, {"ea_code": addrCalcReg, + "fault_check": faultCode, "micro_pc": microPc, + "set_flags": flag_code}) + iop_imm = InstObjParams(name, Name + 'Imm', 'BlockMemImm', pcedCode, + opt_flags, {"ea_code": addrCalcImm, + "fault_check": faultCode, "micro_pc": microPc, + "set_flags": flag_code}) + decoder_output += BlockMemMicroConstructor.subst(iop) + decoder_output += BlockMemMicroConstructor.subst(iop_imm) + exec_output += doSplitExecute( + pcedCode, addrCalcReg, addrCalcImm, execute, faultCode, + makeMicroName(name, microPc), + makeMicroName(name + "Imm", microPc), + makeMicroName(Name, microPc), + makeMicroName(Name + "Imm", microPc), + opt_flags); + faultCode = '' + return (header_output, decoder_output, exec_output, decode_block) +}}; + +def format BlockLoad(code, *opt_flags) {{ + # We need to make sure to check the highest priority fault last. + # That way, if other faults have been detected, they'll be overwritten + # rather than the other way around. + faultCode = AlternateASIPrivFaultCheck + BlockAlignmentFaultCheck + (header_output, + decoder_output, + exec_output, + decode_block) = doBlockMemFormat(code, faultCode, + LoadExecute, name, Name, opt_flags) +}}; + +def format BlockStore(code, *opt_flags) {{ + # We need to make sure to check the highest priority fault last. + # That way, if other faults have been detected, they'll be overwritten + # rather than the other way around. + faultCode = AlternateASIPrivFaultCheck + BlockAlignmentFaultCheck + (header_output, + decoder_output, + exec_output, + decode_block) = doBlockMemFormat(code, faultCode, + StoreExecute, name, Name, opt_flags) +}}; diff --git a/src/arch/sparc/isa/formats/mem/mem.isa b/src/arch/sparc/isa/formats/mem/mem.isa new file mode 100644 index 000000000..20a22c45d --- /dev/null +++ b/src/arch/sparc/isa/formats/mem/mem.isa @@ -0,0 +1,45 @@ +// Copyright (c) 2006 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Ali Saidi +// Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Mem formats +// + +//Include mem utility templates and functions +##include "util.isa" + +//Include the basic memory format +##include "basicmem.isa" + +//Include the block memory format +##include "blockmem.isa" + +//Include the load/store memory format +##include "loadstore.isa" diff --git a/src/arch/sparc/isa/formats/mem/util.isa b/src/arch/sparc/isa/formats/mem/util.isa new file mode 100644 index 000000000..241a25d17 --- /dev/null +++ b/src/arch/sparc/isa/formats/mem/util.isa @@ -0,0 +1,226 @@ +// Copyright (c) 2006 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Ali Saidi +// Gabe Black +// Steve Reinhardt + +//////////////////////////////////////////////////////////////////// +// +// Mem utility templates and functions +// + +//This template provides the execute functions for a load +def template LoadExecute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + DPRINTF(Sparc, "The address is 0x%x\n", EA); + %(fault_check)s; + if(fault == NoFault) + { + fault = xc->read(EA, (uint%(mem_acc_size)s_t&)Mem, 0); + } + if(fault == NoFault) + { + %(code)s; + } + if(fault == NoFault) + { + //Write the resulting state to the execution context + %(op_wb)s; + } + + return fault; + } + + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s * xc, + Trace::InstRecord * traceData) const + { + Fault fault = NoFault; + Addr EA; + uint%(mem_acc_size)s_t Mem; + %(ea_decl)s; + %(ea_rd)s; + %(ea_code)s; + %(fault_check)s; + if(fault == NoFault) + { + fault = xc->read(EA, (uint%(mem_acc_size)s_t&)Mem, 0); + } + return fault; + } + + Fault %(class_name)s::completeAcc(PacketPtr pkt, %(CPU_exec_context)s * xc, + Trace::InstRecord * traceData) const + { + Fault fault = NoFault; + %(code_decl)s; + %(code_rd)s; + Mem = pkt->get<typeof(Mem)>(); + %(code)s; + if(fault == NoFault) + { + %(code_wb)s; + } + return fault; + } +}}; + +//This template provides the execute functions for a store +def template StoreExecute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + uint64_t write_result = 0; + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + DPRINTF(Sparc, "The address is 0x%x\n", EA); + %(fault_check)s; + if(fault == NoFault) + { + %(code)s; + } + if(fault == NoFault) + { + fault = xc->write((uint%(mem_acc_size)s_t)Mem, EA, 0, &write_result); + } + if(fault == NoFault) + { + //Write the resulting state to the execution context + %(op_wb)s; + } + + return fault; + } + + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s * xc, + Trace::InstRecord * traceData) const + { + Fault fault = NoFault; + uint64_t write_result = 0; + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + DPRINTF(Sparc, "The address is 0x%x\n", EA); + %(fault_check)s; + if(fault == NoFault) + { + %(code)s; + } + if(fault == NoFault) + { + fault = xc->write((uint%(mem_acc_size)s_t)Mem, EA, 0, &write_result); + } + if(fault == NoFault) + { + //Write the resulting state to the execution context + %(op_wb)s; + } + return fault; + } + + Fault %(class_name)s::completeAcc(PacketPtr, %(CPU_exec_context)s * xc, + Trace::InstRecord * traceData) const + { + return NoFault; + } +}}; + +//This delcares the initiateAcc function in memory operations +def template InitiateAccDeclare {{ + Fault initiateAcc(%(CPU_exec_context)s *, Trace::InstRecord *) const; +}}; + +//This declares the completeAcc function in memory operations +def template CompleteAccDeclare {{ + Fault completeAcc(PacketPtr, %(CPU_exec_context)s *, Trace::InstRecord *) const; +}}; + +//Here are some code snippets which check for various fault conditions +let {{ + # The LSB can be zero, since it's really the MSB in doubles and quads + # and we're dealing with doubles + BlockAlignmentFaultCheck = ''' + if(RD & 0xe) + fault = new IllegalInstruction; + else if(EA & 0x3f) + fault = new MemAddressNotAligned; + ''' + # XXX Need to take care of pstate.hpriv as well. The lower ASIs + # are split into ones that are available in priv and hpriv, and + # those that are only available in hpriv + AlternateASIPrivFaultCheck = ''' + if(bits(Pstate,2,2) == 0 && (EXT_ASI & 0x80) == 0) + fault = new PrivilegedAction; + else if(AsiIsAsIfUser((ASI)EXT_ASI) && !bits(Pstate,2,2)) + fault = new PrivilegedAction; + ''' + +}}; + +//A simple function to generate the name of the macro op of a certain +//instruction at a certain micropc +let {{ + def makeMicroName(name, microPc): + return name + "::" + name + "_" + str(microPc) +}}; + +//This function properly generates the execute functions for one of the +//templates above. This is needed because in one case, ea computation, +//fault checks and the actual code all occur in the same function, +//and in the other they're distributed across two. Also note that for +//execute functions, the name of the base class doesn't matter. +let {{ + def doSplitExecute(code, eaRegCode, eaImmCode, execute, + faultCode, nameReg, nameImm, NameReg, NameImm, opt_flags): + codeIop = InstObjParams(nameReg, NameReg, '', code, opt_flags) + executeCode = '' + for (eaCode, name, Name) in ( + (eaRegCode, nameReg, NameReg), + (eaImmCode, nameImm, NameImm)): + eaIop = InstObjParams(name, Name, '', eaCode, + opt_flags, {"fault_check": faultCode}) + iop = InstObjParams(name, Name, '', code, opt_flags, + {"fault_check": faultCode, "ea_code" : eaCode}) + (iop.ea_decl, + iop.ea_rd, + iop.ea_wb) = (eaIop.op_decl, eaIop.op_rd, eaIop.op_wb) + (iop.code_decl, + iop.code_rd, + iop.code_wb) = (codeIop.op_decl, codeIop.op_rd, codeIop.op_wb) + executeCode += execute.subst(iop) + return executeCode +}}; diff --git a/src/arch/sparc/isa/formats/micro.isa b/src/arch/sparc/isa/formats/micro.isa new file mode 100644 index 000000000..82d7fb4cb --- /dev/null +++ b/src/arch/sparc/isa/formats/micro.isa @@ -0,0 +1,103 @@ +// Copyright (c) 2006 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +output header {{ + + class SparcMacroInst : public SparcStaticInst + { + protected: + const uint32_t numMicroOps; + + //Constructor. + SparcMacroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint32_t _numMicroOps) + : SparcStaticInst(mnem, _machInst, __opClass), + numMicroOps(_numMicroOps) + { + assert(numMicroOps); + microOps = new StaticInstPtr[numMicroOps]; + flags[IsMacroOp] = true; + } + + ~SparcMacroInst() + { + delete [] microOps; + } + + std::string generateDisassembly(Addr pc, + const SymbolTable *symtab) const; + + StaticInstPtr * microOps; + + StaticInstPtr fetchMicroOp(MicroPC microPC) + { + assert(microPC < numMicroOps); + return microOps[microPC]; + } + + %(BasicExecPanic)s + }; + + class SparcMicroInst : public SparcStaticInst + { + protected: + //Constructor. + SparcMicroInst(const char *mnem, + ExtMachInst _machInst, OpClass __opClass) + : SparcStaticInst(mnem, _machInst, __opClass) + { + flags[IsMicroOp] = true; + } + }; + + class SparcDelayedMicroInst : public SparcMicroInst + { + protected: + //Constructor. + SparcDelayedMicroInst(const char *mnem, + ExtMachInst _machInst, OpClass __opClass) + : SparcMicroInst(mnem, _machInst, __opClass) + { + flags[IsDelayedCommit] = true; + } + }; +}}; + +output decoder {{ + + std::string SparcMacroInst::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { + std::stringstream response; + + printMnemonic(response, mnemonic); + + return response.str(); + } + +}}; diff --git a/src/arch/sparc/isa/formats/priv.isa b/src/arch/sparc/isa/formats/priv.isa index d7ee01519..2a38422a7 100644 --- a/src/arch/sparc/isa/formats/priv.isa +++ b/src/arch/sparc/isa/formats/priv.isa @@ -103,13 +103,13 @@ let {{ (usesImm, code, immCode, rString, iString) = splitOutImm(code) iop = InstObjParams(name, Name, 'Priv', code, - opt_flags, ("check", checkCode)) + opt_flags, {"check": checkCode}) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) exec_output = PrivExecute.subst(iop) if usesImm: imm_iop = InstObjParams(name, Name + 'Imm', 'PrivImm', - immCode, opt_flags, ("check", checkCode)) + immCode, opt_flags, {"check": checkCode}) header_output += BasicDeclare.subst(imm_iop) decoder_output += BasicConstructor.subst(imm_iop) exec_output += PrivExecute.subst(imm_iop) diff --git a/src/arch/sparc/isa/formats/unimp.isa b/src/arch/sparc/isa/formats/unimp.isa new file mode 100644 index 000000000..a623507a1 --- /dev/null +++ b/src/arch/sparc/isa/formats/unimp.isa @@ -0,0 +1,147 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2003-2005 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Steve Reinhardt + +//////////////////////////////////////////////////////////////////// +// +// Unimplemented instructions +// + +output header {{ + /** + * Static instruction class for unimplemented instructions that + * cause simulator termination. Note that these are recognized + * (legal) instructions that the simulator does not support; the + * 'Unknown' class is used for unrecognized/illegal instructions. + * This is a leaf class. + */ + class FailUnimplemented : public SparcStaticInst + { + public: + /// Constructor + FailUnimplemented(const char *_mnemonic, ExtMachInst _machInst) + : SparcStaticInst(_mnemonic, _machInst, No_OpClass) + { + // don't call execute() (which panics) if we're on a + // speculative path + flags[IsNonSpeculative] = true; + } + + %(BasicExecDeclare)s + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const; + }; + + /** + * Base class for unimplemented instructions that cause a warning + * to be printed (but do not terminate simulation). This + * implementation is a little screwy in that it will print a + * warning for each instance of a particular unimplemented machine + * instruction, not just for each unimplemented opcode. Should + * probably make the 'warned' flag a static member of the derived + * class. + */ + class WarnUnimplemented : public SparcStaticInst + { + private: + /// Have we warned on this instruction yet? + mutable bool warned; + + public: + /// Constructor + WarnUnimplemented(const char *_mnemonic, ExtMachInst _machInst) + : SparcStaticInst(_mnemonic, _machInst, No_OpClass), warned(false) + { + // don't call execute() (which panics) if we're on a + // speculative path + flags[IsNonSpeculative] = true; + } + + %(BasicExecDeclare)s + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const; + }; +}}; + +output decoder {{ + std::string + FailUnimplemented::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { + return csprintf("%-10s (unimplemented)", mnemonic); + } + + std::string + WarnUnimplemented::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { +#ifdef SS_COMPATIBLE_DISASSEMBLY + return csprintf("%-10s", mnemonic); +#else + return csprintf("%-10s (unimplemented)", mnemonic); +#endif + } +}}; + +output exec {{ + Fault + FailUnimplemented::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + panic("attempt to execute unimplemented instruction '%s' " + "(inst 0x%08x)", mnemonic, machInst); + return NoFault; + } + + Fault + WarnUnimplemented::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + if (!warned) { + warn("instruction '%s' unimplemented\n", mnemonic); + warned = true; + } + + return NoFault; + } +}}; + + +def format FailUnimpl() {{ + iop = InstObjParams(name, 'FailUnimplemented') + decode_block = BasicDecodeWithMnemonic.subst(iop) +}}; + +def format WarnUnimpl() {{ + iop = InstObjParams(name, 'WarnUnimplemented') + decode_block = BasicDecodeWithMnemonic.subst(iop) +}}; + diff --git a/src/arch/sparc/isa/includes.isa b/src/arch/sparc/isa/includes.isa index f1c2bee96..a324756ec 100644 --- a/src/arch/sparc/isa/includes.isa +++ b/src/arch/sparc/isa/includes.isa @@ -40,6 +40,7 @@ output header {{ #include "cpu/static_inst.hh" #include "arch/sparc/faults.hh" #include "mem/request.hh" // some constructors use MemReq flags +#include "mem/packet.hh" #include "arch/sparc/isa_traits.hh" #include "arch/sparc/regfile.hh" }}; @@ -48,6 +49,7 @@ output decoder {{ #include "base/cprintf.hh" #include "base/loader/symtab.hh" #include "cpu/thread_context.hh" // for Jump::branchTarget() +#include "mem/packet.hh" #if defined(linux) #include <fenv.h> @@ -65,6 +67,8 @@ output exec {{ #include "cpu/base.hh" #include "cpu/exetrace.hh" #include "sim/sim_exit.hh" +#include "mem/packet.hh" +#include "mem/packet_access.hh" using namespace SparcISA; }}; diff --git a/src/arch/sparc/isa/main.isa b/src/arch/sparc/isa/main.isa index 14acf54fa..df5ad0c99 100644 --- a/src/arch/sparc/isa/main.isa +++ b/src/arch/sparc/isa/main.isa @@ -26,7 +26,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: Korey Sewell +// Authors: Gabe Black //////////////////////////////////////////////////////////////////// // @@ -55,7 +55,7 @@ namespace SparcISA; ##include "base.isa" //Include the definitions for the instruction formats -##include "formats.isa" +##include "formats/formats.isa" //Include the decoder definition ##include "decoder.isa" diff --git a/src/arch/sparc/isa/operands.isa b/src/arch/sparc/isa/operands.isa index 605816083..b8b75170b 100644 --- a/src/arch/sparc/isa/operands.isa +++ b/src/arch/sparc/isa/operands.isa @@ -42,6 +42,16 @@ def operand_types {{ 'qf' : ('float', 128) }}; +output header {{ + // A function to "decompress" double and quad floating point + // register numbers stuffed into 5 bit fields. These have their + // MSB put in the LSB position but are otherwise normal. + static inline unsigned int dfpr(unsigned int regNum) + { + return (regNum & (~1)) | ((regNum & 1) << 5); + } +}}; + def operands {{ # Int regs default to unsigned, but code should not count on this. # For clarity, descriptions that depend on unsigned behavior should @@ -51,10 +61,22 @@ def operands {{ 'RdHigh': ('IntReg', 'udw', 'RD | 1', 'IsInteger', 3), 'Rs1': ('IntReg', 'udw', 'RS1', 'IsInteger', 4), 'Rs2': ('IntReg', 'udw', 'RS2', 'IsInteger', 5), - 'Frd': ('FloatReg', 'df', 'RD', 'IsFloating', 10), - 'Frs1': ('FloatReg', 'df', 'RS1', 'IsFloating', 11), - 'Frs2': ('FloatReg', 'df', 'RS2', 'IsFloating', 12), - 'Mem': ('Mem', 'udw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 20), + 'Frds': ('FloatReg', 'sf', 'RD', 'IsFloating', 10), + 'Frd': ('FloatReg', 'df', 'dfpr(RD)', 'IsFloating', 10), + # Each Frd_N refers to the Nth double precision register from Frd. + # Note that this adds twice N to the register number. + 'Frd_0': ('FloatReg', 'df', 'dfpr(RD)', 'IsFloating', 10), + 'Frd_1': ('FloatReg', 'df', 'dfpr(RD) + 2', 'IsFloating', 10), + 'Frd_2': ('FloatReg', 'df', 'dfpr(RD) + 4', 'IsFloating', 10), + 'Frd_3': ('FloatReg', 'df', 'dfpr(RD) + 6', 'IsFloating', 10), + 'Frd_4': ('FloatReg', 'df', 'dfpr(RD) + 8', 'IsFloating', 10), + 'Frd_5': ('FloatReg', 'df', 'dfpr(RD) + 10', 'IsFloating', 10), + 'Frd_6': ('FloatReg', 'df', 'dfpr(RD) + 12', 'IsFloating', 10), + 'Frd_7': ('FloatReg', 'df', 'dfpr(RD) + 14', 'IsFloating', 10), + 'Frs1s': ('FloatReg', 'df', 'RS1', 'IsFloating', 11), + 'Frs1': ('FloatReg', 'df', 'dfpr(RS1)', 'IsFloating', 11), + 'Frs2s': ('FloatReg', 'df', 'RS2', 'IsFloating', 12), + 'Frs2': ('FloatReg', 'df', 'dfpr(RS2)', 'IsFloating', 12), 'NPC': ('NPC', 'udw', None, ( None, None, 'IsControl' ), 31), 'NNPC': ('NNPC', 'udw', None, (None, None, 'IsControl' ), 32), #'Runiq': ('ControlReg', 'uq', 'Uniq', None, 1), @@ -84,6 +106,8 @@ def operands {{ 'Gl': ('ControlReg', 'udw', 'MISCREG_GL', None, 54), 'Fsr': ('ControlReg', 'udw', 'MISCREG_FSR', None, 55), - 'Gsr': ('ControlReg', 'udw', 'MISCREG_GSR', None, 56) + 'Gsr': ('ControlReg', 'udw', 'MISCREG_GSR', None, 56), + # Mem gets a large number so it's always last + 'Mem': ('Mem', 'udw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100) }}; diff --git a/src/arch/sparc/utility.hh b/src/arch/sparc/utility.hh index f1c071148..23fddf0e9 100644 --- a/src/arch/sparc/utility.hh +++ b/src/arch/sparc/utility.hh @@ -33,12 +33,25 @@ #include "arch/sparc/isa_traits.hh" #include "base/misc.hh" +#include "base/bitfield.hh" +#include "cpu/thread_context.hh" namespace SparcISA { inline ExtMachInst - makeExtMI(MachInst inst, const Addr &pc) { - return ExtMachInst(inst); + makeExtMI(MachInst inst, ThreadContext * xc) { + ExtMachInst emi = (unsigned MachInst) inst; + //The I bit, bit 13, is used to figure out where the ASI + //should come from. Use that in the ExtMachInst. This is + //slightly redundant, but it removes the need to put a condition + //into all the execute functions + if(inst & (1 << 13)) + emi |= (static_cast<ExtMachInst>(xc->readMiscReg(MISCREG_ASI)) + << (sizeof(MachInst) * 8)); + else + emi |= (static_cast<ExtMachInst>(bits(inst, 12, 5)) + << (sizeof(MachInst) * 8)); + return emi; } inline bool isCallerSaveIntegerRegister(unsigned int reg) { diff --git a/src/base/bitfield.hh b/src/base/bitfield.hh index f4e49aa8d..879780d56 100644 --- a/src/base/bitfield.hh +++ b/src/base/bitfield.hh @@ -32,7 +32,7 @@ #ifndef __BASE_BITFIELD_HH__ #define __BASE_BITFIELD_HH__ -#include "sim/host.hh" +#include <inttypes.h> /** * Generate a 64-bit mask of 'nbits' 1s, right justified. diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 640e7e165..92735aa5f 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -134,6 +134,7 @@ baseFlags = [ 'PciConfigAll', 'Pipeline', 'Printf', + 'Quiesce', 'ROB', 'Regs', 'Rename', diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index ba34c5cfc..36c7349e6 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -200,7 +200,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst) validateInst(inst); curStaticInst = StaticInst::decode(makeExtMI(machInst, - thread->readPC())); + thread->getTC())); #if FULL_SYSTEM thread->setInst(machInst); diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc index 8b1e60aea..9d85311bb 100644 --- a/src/cpu/exetrace.cc +++ b/src/cpu/exetrace.cc @@ -60,61 +60,66 @@ Trace::InstRecord::dump(ostream &outs) if (flags[PRINT_REG_DELTA]) { #if THE_ISA == SPARC_ISA - static uint64_t regs[32] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; - static uint64_t ccr = 0; - static uint64_t y = 0; - static uint64_t floats[32]; - uint64_t newVal; - static const char * prefixes[4] = {"G", "O", "L", "I"}; - - char buf[256]; - sprintf(buf, "PC = 0x%016llx", thread->readNextPC()); - outs << buf; - sprintf(buf, " NPC = 0x%016llx", thread->readNextNPC()); - outs << buf; - newVal = thread->readMiscReg(SparcISA::MISCREG_CCR); - if(newVal != ccr) + //Don't print what happens for each micro-op, just print out + //once at the last op, and for regular instructions. + if(!staticInst->isMicroOp() || staticInst->isLastMicroOp()) { - sprintf(buf, " CCR = 0x%016llx", newVal); + static uint64_t regs[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + static uint64_t ccr = 0; + static uint64_t y = 0; + static uint64_t floats[32]; + uint64_t newVal; + static const char * prefixes[4] = {"G", "O", "L", "I"}; + + char buf[256]; + sprintf(buf, "PC = 0x%016llx", thread->readNextPC()); outs << buf; - ccr = newVal; - } - newVal = thread->readMiscReg(SparcISA::MISCREG_Y); - if(newVal != y) - { - sprintf(buf, " Y = 0x%016llx", newVal); + sprintf(buf, " NPC = 0x%016llx", thread->readNextNPC()); outs << buf; - y = newVal; - } - for(int y = 0; y < 4; y++) - { - for(int x = 0; x < 8; x++) + newVal = thread->readMiscReg(SparcISA::MISCREG_CCR); + if(newVal != ccr) + { + sprintf(buf, " CCR = 0x%016llx", newVal); + outs << buf; + ccr = newVal; + } + newVal = thread->readMiscReg(SparcISA::MISCREG_Y); + if(newVal != y) + { + sprintf(buf, " Y = 0x%016llx", newVal); + outs << buf; + y = newVal; + } + for(int y = 0; y < 4; y++) { - int index = x + 8 * y; - newVal = thread->readIntReg(index); - if(regs[index] != newVal) + for(int x = 0; x < 8; x++) { - sprintf(buf, " %s%d = 0x%016llx", prefixes[y], x, newVal); - outs << buf; - regs[index] = newVal; + int index = x + 8 * y; + newVal = thread->readIntReg(index); + if(regs[index] != newVal) + { + sprintf(buf, " %s%d = 0x%016llx", prefixes[y], x, newVal); + outs << buf; + regs[index] = newVal; + } } } - } - for(int y = 0; y < 32; y++) - { - newVal = thread->readFloatRegBits(2 * y, 64); - if(floats[y] != newVal) + for(int y = 0; y < 32; y++) { - sprintf(buf, " F%d = 0x%016llx", y, newVal); - outs << buf; - floats[y] = newVal; + newVal = thread->readFloatRegBits(2 * y, 64); + if(floats[y] != newVal) + { + sprintf(buf, " F%d = 0x%016llx", 2 * y, newVal); + outs << buf; + floats[y] = newVal; + } } + outs << endl; } - outs << endl; #endif } else if (flags[INTEL_FORMAT]) { diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 1e0d07f9a..91e073cf0 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -113,7 +113,7 @@ MemTest::MemTest(const string &name, // PhysicalMemory *check_mem, unsigned _memorySize, unsigned _percentReads, -// unsigned _percentCopies, + unsigned _percentFunctional, unsigned _percentUncacheable, unsigned _progressInterval, unsigned _percentSourceUnaligned, @@ -130,7 +130,7 @@ MemTest::MemTest(const string &name, // checkMem(check_mem), size(_memorySize), percentReads(_percentReads), -// percentCopies(_percentCopies), + percentFunctional(_percentFunctional), percentUncacheable(_percentUncacheable), progressInterval(_progressInterval), nextProgressMessage(_progressInterval), @@ -345,7 +345,7 @@ MemTest::tick() } else { paddr = ((base) ? baseAddr1 : baseAddr2) + offset; } - bool probe = (random() % 2 == 1) && !(flags & UNCACHEABLE); + bool probe = (random() % 100 < percentFunctional) && !(flags & UNCACHEABLE); //bool probe = false; paddr &= ~((1 << access_size) - 1); @@ -501,7 +501,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(MemTest) // SimObjectParam<PhysicalMemory *> check_mem; Param<unsigned> memory_size; Param<unsigned> percent_reads; -// Param<unsigned> percent_copies; + Param<unsigned> percent_functional; Param<unsigned> percent_uncacheable; Param<unsigned> progress_interval; Param<unsigned> percent_source_unaligned; @@ -520,7 +520,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest) // INIT_PARAM(check_mem, "check memory"), INIT_PARAM(memory_size, "memory size"), INIT_PARAM(percent_reads, "target read percentage"), -// INIT_PARAM(percent_copies, "target copy percentage"), + INIT_PARAM(percent_functional, "percentage of access that are functional"), INIT_PARAM(percent_uncacheable, "target uncacheable percentage"), INIT_PARAM(progress_interval, "progress report interval (in accesses)"), INIT_PARAM(percent_source_unaligned, @@ -537,7 +537,7 @@ END_INIT_SIM_OBJECT_PARAMS(MemTest) CREATE_SIM_OBJECT(MemTest) { return new MemTest(getInstanceName(), /*cache->getInterface(),*/ /*main_mem,*/ - /*check_mem,*/ memory_size, percent_reads, /*percent_copies,*/ + /*check_mem,*/ memory_size, percent_reads, percent_functional, percent_uncacheable, progress_interval, percent_source_unaligned, percent_dest_unaligned, trace_addr, max_loads, atomic); diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index 4e88ecffd..edde4a3b2 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -55,7 +55,7 @@ class MemTest : public MemObject // PhysicalMemory *check_mem, unsigned _memorySize, unsigned _percentReads, -// unsigned _percentCopies, + unsigned _percentFunctional, unsigned _percentUncacheable, unsigned _progressInterval, unsigned _percentSourceUnaligned, @@ -144,7 +144,7 @@ class MemTest : public MemObject unsigned size; // size of testing memory region unsigned percentReads; // target percentage of read accesses -// unsigned percentCopies; // target percentage of copy accesses + unsigned percentFunctional; // target percentage of functional accesses unsigned percentUncacheable; int id; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index da75bfecf..e7bf83b20 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -1118,7 +1118,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *> (&cacheData[tid][offset])); - ext_inst = TheISA::makeExtMI(inst, fetch_PC); + ext_inst = TheISA::makeExtMI(inst, cpu->tcBase(tid)); // Create a new DynInst from the instruction fetched. DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index eed6de6dd..36e87ec9c 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -883,7 +883,7 @@ FrontEnd<Impl>::getInstFromCacheline() // Get the instruction from the array of the cache line. inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset])); - ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC); + ExtMachInst decode_inst = TheISA::makeExtMI(inst, tc); // Create a new DynInst from the instruction fetched. DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst), diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index 8307da521..dc58a8285 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -260,7 +260,7 @@ class OzoneLWLSQ { virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } virtual bool recvTiming(PacketPtr pkt); diff --git a/src/cpu/quiesce_event.cc b/src/cpu/quiesce_event.cc index 8dd20db02..fa79e6d1e 100644 --- a/src/cpu/quiesce_event.cc +++ b/src/cpu/quiesce_event.cc @@ -28,6 +28,7 @@ * Authors: Kevin Lim */ +#include "cpu/base.hh" #include "cpu/thread_context.hh" #include "cpu/quiesce_event.hh" @@ -39,6 +40,7 @@ EndQuiesceEvent::EndQuiesceEvent(ThreadContext *_tc) void EndQuiesceEvent::process() { + DPRINTF(Quiesce, "activating %s\n", tc->getCpuPtr()->name()); tc->activate(); } diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 38a8ba097..edba55b0d 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -183,11 +183,14 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) void AtomicSimpleCPU::resume() { - changeState(SimObject::Running); - if (thread->status() == ThreadContext::Active) { + if (_status != SwitchedOut && _status != Idle) { assert(system->getMemoryMode() == System::Atomic); - if (!tickEvent.scheduled()) - tickEvent.schedule(curTick); + + changeState(SimObject::Running); + if (thread->status() == ThreadContext::Active) { + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + } } } @@ -448,7 +451,8 @@ AtomicSimpleCPU::tick() for (int i = 0; i < width; ++i) { numCycles++; - checkForInterrupts(); + if (!curStaticInst || !curStaticInst->isDelayedCommit()) + checkForInterrupts(); Fault fault = setupFetchRequest(ifetch_req); diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index a79d3c542..cbb3980cb 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -396,7 +396,20 @@ BaseSimpleCPU::preExecute() // decode the instruction inst = gtoh(inst); - curStaticInst = StaticInst::decode(makeExtMI(inst, thread->readPC())); + //If we're not in the middle of a macro instruction + if (!curMacroStaticInst) { + StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->getTC())); + if (instPtr->isMacroOp()) { + curMacroStaticInst = instPtr; + curStaticInst = curMacroStaticInst->fetchMicroOp(0); + } else { + curStaticInst = instPtr; + } + } else { + //Read the next micro op from the macro op + curStaticInst = curMacroStaticInst->fetchMicroOp(thread->readMicroPC()); + } + traceData = Trace::getInstRecord(curTick, tc, curStaticInst, thread->readPC()); @@ -446,18 +459,35 @@ BaseSimpleCPU::advancePC(Fault fault) { if (fault != NoFault) { fault->invoke(tc); - } - else { - // go to the next instruction - thread->setPC(thread->readNextPC()); + } else { + //If we're at the last micro op for this instruction + if (curStaticInst->isLastMicroOp()) { + //We should be working with a macro op + assert(curMacroStaticInst); + //Close out this macro op, and clean up the + //microcode state + curMacroStaticInst = StaticInst::nullStaticInstPtr; + thread->setMicroPC(0); + thread->setNextMicroPC(1); + } + //If we're still in a macro op + if (curMacroStaticInst) { + //Advance the micro pc + thread->setMicroPC(thread->readNextMicroPC()); + //Advance the "next" micro pc. Note that there are no delay + //slots, and micro ops are "word" addressed. + thread->setNextMicroPC(thread->readNextMicroPC() + 1); + } else { + // go to the next instruction + thread->setPC(thread->readNextPC()); #if ISA_HAS_DELAY_SLOT - thread->setNextPC(thread->readNextNPC()); - thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); - assert(thread->readNextPC() != thread->readNextNPC()); + thread->setNextPC(thread->readNextNPC()); + thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); + assert(thread->readNextPC() != thread->readNextNPC()); #else - thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); + thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); #endif - + } } #if FULL_SYSTEM diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 57cfa3c2c..af6b6f835 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -128,6 +128,7 @@ class BaseSimpleCPU : public BaseCPU TheISA::IntReg dataReg; StaticInstPtr curStaticInst; + StaticInstPtr curMacroStaticInst; void checkForInterrupts(); Fault setupFetchRequest(Request *req); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 97df0e5d5..fe6775ea4 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -427,7 +427,8 @@ TimingSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) void TimingSimpleCPU::fetch() { - checkForInterrupts(); + if (!curStaticInst || !curStaticInst->isDelayedCommit()) + checkForInterrupts(); Request *ifetch_req = new Request(); ifetch_req->setThreadContext(cpu_id, /* thread ID */ 0); diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 6fa6500bd..fe22e6c43 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -377,6 +377,16 @@ class SimpleThread : public ThreadState regs.setPC(val); } + uint64_t readMicroPC() + { + return microPC; + } + + void setMicroPC(uint64_t val) + { + microPC = val; + } + uint64_t readNextPC() { return regs.readNextPC(); @@ -387,6 +397,16 @@ class SimpleThread : public ThreadState regs.setNextPC(val); } + uint64_t readNextMicroPC() + { + return nextMicroPC; + } + + void setNextMicroPC(uint64_t val) + { + nextMicroPC = val; + } + uint64_t readNextNPC() { return regs.readNextNPC(); diff --git a/src/cpu/static_inst.cc b/src/cpu/static_inst.cc index c311d2282..cb4a7cdf7 100644 --- a/src/cpu/static_inst.cc +++ b/src/cpu/static_inst.cc @@ -75,3 +75,10 @@ StaticInst::hasBranchTarget(Addr pc, ThreadContext *tc, Addr &tgt) const return false; } +StaticInstPtr +StaticInst::fetchMicroOp(MicroPC micropc) +{ + panic("StaticInst::fetchMicroOp() called on instruction " + "that is not microcoded."); +} + diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 578d14191..523cfae40 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -67,6 +67,8 @@ namespace Trace { class InstRecord; } +typedef uint32_t MicroPC; + /** * Base, ISA-independent static instruction class. * @@ -139,6 +141,14 @@ class StaticInstBase : public RefCounted IsIprAccess, ///< Accesses IPRs IsUnverifiable, ///< Can't be verified by a checker + //Flags for microcode + IsMacroOp, ///< Is a macroop containing microops + IsMicroOp, ///< Is a microop + IsDelayedCommit, ///< This microop doesn't commit right away + IsLastMicroOp, ///< This microop ends a microop sequence + //This flag doesn't do anything yet + IsMicroBranch, ///< This microop branches within the microcode for a macroop + NumFlags }; @@ -230,6 +240,12 @@ class StaticInstBase : public RefCounted bool isQuiesce() const { return flags[IsQuiesce]; } bool isIprAccess() const { return flags[IsIprAccess]; } bool isUnverifiable() const { return flags[IsUnverifiable]; } + bool isMacroOp() const { return flags[IsMacroOp]; } + bool isMicroOp() const { return flags[IsMicroOp]; } + bool isDelayedCommit() const { return flags[IsDelayedCommit]; } + bool isLastMicroOp() const { return flags[IsLastMicroOp]; } + //This flag doesn't do anything yet + bool isMicroBranch() const { return flags[IsMicroBranch]; } //@} /// Operation class. Used to select appropriate function unit in issue. @@ -347,6 +363,12 @@ class StaticInst : public StaticInstBase #include "cpu/static_inst_exec_sigs.hh" /** + * Return the microop that goes with a particular micropc. This should + * only be defined/used in macroops which will contain microops + */ + virtual StaticInstPtr fetchMicroOp(MicroPC micropc); + + /** * Return the target address for a PC-relative branch. * Invalid if not a PC-relative branch (i.e. isDirectCtrl() * should be true). diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc index 6a96560f1..c644ae8d7 100644 --- a/src/cpu/thread_state.cc +++ b/src/cpu/thread_state.cc @@ -42,13 +42,13 @@ ThreadState::ThreadState(int _cpuId, int _tid) : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0), profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL), - funcExeInst(0), storeCondFailures(0) + microPC(0), nextMicroPC(1), funcExeInst(0), storeCondFailures(0) #else ThreadState::ThreadState(int _cpuId, int _tid, Process *_process, short _asid, MemObject *mem) : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0), process(_process), asid(_asid), - funcExeInst(0), storeCondFailures(0) + microPC(0), nextMicroPC(1), funcExeInst(0), storeCondFailures(0) #endif { numInst = 0; diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh index 14e033b7f..60353760c 100644 --- a/src/cpu/thread_state.hh +++ b/src/cpu/thread_state.hh @@ -200,6 +200,16 @@ struct ThreadState { */ TheISA::MachInst inst; + /** The current microcode pc for the currently executing macro + * operation. + */ + MicroPC microPC; + + /** The next microcode pc for the currently executing macro + * operation. + */ + MicroPC nextMicroPC; + public: /** * Temporary storage to pass the source address from copy_load to diff --git a/src/dev/isa_fake.cc b/src/dev/isa_fake.cc index 4f1771ff9..23761cd10 100644 --- a/src/dev/isa_fake.cc +++ b/src/dev/isa_fake.cc @@ -61,6 +61,7 @@ IsaFake::read(PacketPtr pkt) DPRINTF(Tsunami, "read va=%#x size=%d\n", pkt->getAddr(), pkt->getSize()); switch (pkt->getSize()) { + case sizeof(uint64_t): pkt->set(0xFFFFFFFFFFFFFFFFULL); break; case sizeof(uint32_t): diff --git a/src/kern/linux/linux.hh b/src/kern/linux/linux.hh index 3a2677642..6e0b37d91 100644 --- a/src/kern/linux/linux.hh +++ b/src/kern/linux/linux.hh @@ -54,11 +54,11 @@ class Linux : public OperatingSystem //@{ /// Basic Linux types. -/* typedef uint64_t size_t; + typedef uint64_t size_t; typedef uint64_t off_t; typedef int64_t time_t; typedef uint32_t uid_t; - typedef uint32_t gid_t;*/ + typedef uint32_t gid_t; //@} /// Stat buffer. Note that we can't call it 'stat' since that diff --git a/src/kern/solaris/solaris.hh b/src/kern/solaris/solaris.hh index a5ca41cdd..ed574fdbf 100644 --- a/src/kern/solaris/solaris.hh +++ b/src/kern/solaris/solaris.hh @@ -56,7 +56,7 @@ class Solaris : public OperatingSystem //@{ /// Basic Solaris types. -/* typedef uint64_t size_t; + typedef uint64_t size_t; typedef uint64_t off_t; typedef int64_t time_t; typedef int32_t uid_t; @@ -65,7 +65,7 @@ class Solaris : public OperatingSystem typedef uint64_t ino_t; typedef uint64_t dev_t; typedef uint32_t mode_t; - typedef uint32_t nlink_t;*/ + typedef uint32_t nlink_t; //@} struct tgt_timespec { diff --git a/src/kern/tru64/tru64.hh b/src/kern/tru64/tru64.hh index 18671c364..6d6d0d96d 100644 --- a/src/kern/tru64/tru64.hh +++ b/src/kern/tru64/tru64.hh @@ -393,34 +393,6 @@ class Tru64 : public OperatingSystem uint64_t pad2[2]; //!< pad2 }; - /*/// Helper function to convert a host stat buffer to a target stat - /// buffer. Also copies the target buffer out to the simulated - /// memory space. Used by stat(), fstat(), and lstat(). - template <class T> - static void - copyOutStatBuf(TranslatingPort *mem, Addr addr, global_stat *host) - { - using namespace TheISA; - - TypedBufferArg<T> tgt(addr); - - tgt->st_dev = htog(host->st_dev); - tgt->st_ino = htog(host->st_ino); - tgt->st_mode = htog(host->st_mode); - tgt->st_nlink = htog(host->st_nlink); - tgt->st_uid = htog(host->st_uid); - tgt->st_gid = htog(host->st_gid); - tgt->st_rdev = htog(host->st_rdev); - tgt->st_size = htog(host->st_size); - tgt->st_atimeX = htog(host->st_atime); - tgt->st_mtimeX = htog(host->st_mtime); - tgt->st_ctimeX = htog(host->st_ctime); - tgt->st_blksize = htog(host->st_blksize); - tgt->st_blocks = htog(host->st_blocks); - - tgt.copyOut(mem); - }*/ - /// Helper function to convert a host statfs buffer to a target statfs /// buffer. Also copies the target buffer out to the simulated /// memory space. Used by statfs() and fstatfs(). @@ -450,34 +422,6 @@ class Tru64 : public OperatingSystem tgt.copyOut(mem); } -/* /// Helper function to convert a host stat buffer to an old pre-F64 - /// (4.x) target stat buffer. Also copies the target buffer out to - /// the simulated memory space. Used by pre_F64_stat(), - /// pre_F64_fstat(), and pre_F64_lstat(). - static void - copyOutPreF64StatBuf(TranslatingPort *mem, Addr addr, struct stat *host) - { - using namespace TheISA; - - TypedBufferArg<Tru64::pre_F64_stat> tgt(addr); - - tgt->st_dev = htog(host->st_dev); - tgt->st_ino = htog(host->st_ino); - tgt->st_mode = htog(host->st_mode); - tgt->st_nlink = htog(host->st_nlink); - tgt->st_uid = htog(host->st_uid); - tgt->st_gid = htog(host->st_gid); - tgt->st_rdev = htog(host->st_rdev); - tgt->st_size = htog(host->st_size); - tgt->st_atimeX = htog(host->st_atime); - tgt->st_mtimeX = htog(host->st_mtime); - tgt->st_ctimeX = htog(host->st_ctime); - tgt->st_blksize = htog(host->st_blksize); - tgt->st_blocks = htog(host->st_blocks); - - tgt.copyOut(mem); - }*/ - /// The target system's hostname. static const char *hostname; @@ -600,12 +544,19 @@ class Tru64 : public OperatingSystem process->next_thread_stack_base -= stack_size; } - stack_base = roundDown(stack_base, VMPageSize); + Addr rounded_stack_base = roundDown(stack_base, VMPageSize); + Addr rounded_stack_size = roundUp(stack_size, VMPageSize); + + DPRINTF(SyscallVerbose, + "stack_create: allocating stack @ %#x size %#x " + "(rounded from %#x, %#x)\n", + rounded_stack_base, rounded_stack_size, + stack_base, stack_size); // map memory - process->pTable->allocate(stack_base, roundUp(stack_size, VMPageSize)); + process->pTable->allocate(rounded_stack_base, rounded_stack_size); - argp->address = gtoh(stack_base); + argp->address = gtoh(rounded_stack_base); argp.copyOut(tc->getMemPort()); return 0; diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 6cbeef5a4..599958222 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -79,9 +79,7 @@ BaseCache::CachePort::recvTiming(PacketPtr pkt) && !pkt->isRead() && !pkt->isWrite()) { //Upgrade or Invalidate //Look into what happens if two slave caches on bus - DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(), - pkt->getAddr() & (((ULL(1))<<48)-1), - pkt->getAddr() & ~((Addr)cache->blkSize - 1)); + DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr()); assert(!(pkt->flags & SATISFIED)); pkt->flags |= SATISFIED; @@ -115,32 +113,7 @@ BaseCache::CachePort::recvFunctional(PacketPtr pkt) // If the target contains data, and it overlaps the // probed request, need to update data if (target->intersect(pkt)) { - uint8_t* pkt_data; - uint8_t* write_data; - int data_size; - if (target->getAddr() < pkt->getAddr()) { - int offset = pkt->getAddr() - target->getAddr(); - pkt_data = pkt->getPtr<uint8_t>(); - write_data = target->getPtr<uint8_t>() + offset; - data_size = target->getSize() - offset; - assert(data_size > 0); - if (data_size > pkt->getSize()) - data_size = pkt->getSize(); - } else { - int offset = target->getAddr() - pkt->getAddr(); - pkt_data = pkt->getPtr<uint8_t>() + offset; - write_data = target->getPtr<uint8_t>(); - data_size = pkt->getSize() - offset; - assert(data_size >= pkt->getSize()); - if (data_size > target->getSize()) - data_size = target->getSize(); - } - - if (pkt->isWrite()) { - memcpy(pkt_data, write_data, data_size); - } else { - memcpy(write_data, pkt_data, data_size); - } + fixPacket(pkt, target); } } cache->doFunctionalAccess(pkt, isCpuSide); diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index dcb0e7b78..9bb72e85c 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -205,9 +205,10 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) missQueue->doWriteback(writebacks.front()); writebacks.pop_front(); } - DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(), - pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->getAddr() & ~((Addr)blkSize - 1)); + + DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(), + (blk) ? "hit" : "miss"); + if (blk) { // Hit hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; @@ -231,8 +232,16 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) exitSimLoop("A cache reached the maximum miss count"); } } - missQueue->handleMiss(pkt, size, curTick + hitLatency); -// return MA_CACHE_MISS; + + if (pkt->flags & SATISFIED) { + // happens when a store conditional fails because it missed + // the cache completely + if (pkt->needsResponse()) + respond(pkt, curTick+lat); + } else { + missQueue->handleMiss(pkt, size, curTick + hitLatency); + } + return true; } @@ -280,10 +289,8 @@ Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr, CacheBlk::State old_state = (blk) ? blk->status : 0; CacheBlk::State new_state = coherence->getNewState(pkt,old_state); if (old_state != new_state) - DPRINTF(Cache, "Block for blk addr %x moving from " - "state %i to %i\n", - pkt->getAddr() & (((ULL(1))<<48)-1), - old_state, new_state); + DPRINTF(Cache, "Block for blk addr %x moving from state " + "%i to %i\n", pkt->getAddr(), old_state, new_state); //Set the state on the upgrade memcpy(pkt->getPtr<uint8_t>(), blk->data, blkSize); PacketList writebacks; @@ -323,8 +330,7 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(PacketPtr &pkt) //Make the response a Bad address and send it } // MemDebug::cacheResponse(pkt); - DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(), - pkt->getAddr() & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr()); if (pkt->isCacheFill() && !pkt->isNoAllocate()) { blk = tags->findBlock(pkt); @@ -334,7 +340,7 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(PacketPtr &pkt) if (old_state != new_state) DPRINTF(Cache, "Block for blk addr %x moving from " "state %i to %i\n", - pkt->getAddr() & (((ULL(1))<<48)-1), + pkt->getAddr(), old_state, new_state); blk = tags->handleFill(blk, (MSHR*)pkt->senderState, new_state, writebacks, pkt); @@ -419,8 +425,8 @@ Cache<TagStore,Buffering,Coherence>::snoop(PacketPtr &pkt) //Append the invalidate on missQueue->addTarget(mshr,invalidatePkt); - DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", - pkt->getAddr() & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Appending Invalidate to addr: %x\n", + pkt->getAddr()); return; } } @@ -428,8 +434,8 @@ Cache<TagStore,Buffering,Coherence>::snoop(PacketPtr &pkt) //We also need to check the writeback buffers and handle those std::vector<MSHR *> writebacks; if (missQueue->findWrites(blk_addr, writebacks)) { - DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", - pkt->getAddr() & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n", + pkt->getAddr()); //Look through writebacks for any non-uncachable writes, use that for (int i=0; i<writebacks.size(); i++) { @@ -520,12 +526,9 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update, { // MemDebug::cacheProbe(pkt); if (!pkt->req->isUncacheable()) { - if (pkt->isInvalidate() && !pkt->isRead() - && !pkt->isWrite()) { + if (pkt->isInvalidate() && !pkt->isRead() && !pkt->isWrite()) { //Upgrade or Invalidate, satisfy it, don't forward - DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(), - pkt->getAddr() & (((ULL(1))<<48)-1), - pkt->getAddr() & ~((Addr)blkSize - 1)); + DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr()); pkt->flags |= SATISFIED; return 0; } @@ -542,9 +545,8 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update, int lat; BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); - DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(), - pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->getAddr() & ~((Addr)blkSize - 1)); + DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), + pkt->getAddr(), (blk) ? "hit" : "miss"); // Need to check for outstanding misses and writes @@ -560,7 +562,6 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update, if (!update) { // Check for data in MSHR and writebuffer. if (mshr) { - warn("Found outstanding miss on an non-update probe"); MSHR::TargetList *targets = mshr->getTargetList(); MSHR::TargetList::iterator i = targets->begin(); MSHR::TargetList::iterator end = targets->end(); @@ -568,71 +569,15 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update, PacketPtr target = *i; // If the target contains data, and it overlaps the // probed request, need to update data - if (target->isWrite() && target->intersect(pkt)) { - uint8_t* pkt_data; - uint8_t* write_data; - int data_size; - if (target->getAddr() < pkt->getAddr()) { - int offset = pkt->getAddr() - target->getAddr(); - pkt_data = pkt->getPtr<uint8_t>(); - write_data = target->getPtr<uint8_t>() + offset; - data_size = target->getSize() - offset; - assert(data_size > 0); - if (data_size > pkt->getSize()) - data_size = pkt->getSize(); - } else { - int offset = target->getAddr() - pkt->getAddr(); - pkt_data = pkt->getPtr<uint8_t>() + offset; - write_data = target->getPtr<uint8_t>(); - data_size = pkt->getSize() - offset; - assert(data_size >= pkt->getSize()); - if (data_size > target->getSize()) - data_size = target->getSize(); - } - - if (pkt->isWrite()) { - memcpy(pkt_data, write_data, data_size); - } else { - pkt->flags |= SATISFIED; - pkt->result = Packet::Success; - memcpy(write_data, pkt_data, data_size); - } + if (target->intersect(pkt)) { + fixPacket(pkt, target); } } } for (int i = 0; i < writes.size(); ++i) { PacketPtr write = writes[i]->pkt; if (write->intersect(pkt)) { - warn("Found outstanding write on an non-update probe"); - uint8_t* pkt_data; - uint8_t* write_data; - int data_size; - if (write->getAddr() < pkt->getAddr()) { - int offset = pkt->getAddr() - write->getAddr(); - pkt_data = pkt->getPtr<uint8_t>(); - write_data = write->getPtr<uint8_t>() + offset; - data_size = write->getSize() - offset; - assert(data_size > 0); - if (data_size > pkt->getSize()) - data_size = pkt->getSize(); - } else { - int offset = write->getAddr() - pkt->getAddr(); - pkt_data = pkt->getPtr<uint8_t>() + offset; - write_data = write->getPtr<uint8_t>(); - data_size = pkt->getSize() - offset; - assert(data_size >= pkt->getSize()); - if (data_size > write->getSize()) - data_size = write->getSize(); - } - - if (pkt->isWrite()) { - memcpy(pkt_data, write_data, data_size); - } else { - pkt->flags |= SATISFIED; - pkt->result = Packet::Success; - memcpy(write_data, pkt_data, data_size); - } - + fixPacket(pkt, write); } } if (pkt->isRead() @@ -642,10 +587,10 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update, assert(pkt->result == Packet::Success); } return 0; - } else if (!blk) { + } else if (!blk && !(pkt->flags & SATISFIED)) { // update the cache state and statistics if (mshr || !writes.empty()){ - // Can't handle it, return pktuest unsatisfied. + // Can't handle it, return request unsatisfied. panic("Atomic access ran into outstanding MSHR's or WB's!"); } if (!pkt->req->isUncacheable()) { @@ -660,10 +605,8 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update, busPkt->time = curTick; - DPRINTF(Cache, "Sending a atomic %s for %x blk_addr: %x\n", - busPkt->cmdString(), - busPkt->getAddr() & (((ULL(1))<<48)-1), - busPkt->getAddr() & ~((Addr)blkSize - 1)); + DPRINTF(Cache, "Sending a atomic %s for %x\n", + busPkt->cmdString(), busPkt->getAddr()); lat = memSidePort->sendAtomic(busPkt); @@ -682,19 +625,13 @@ return 0; CacheBlk::State old_state = (blk) ? blk->status : 0; CacheBlk::State new_state = coherence->getNewState(busPkt, old_state); - DPRINTF(Cache, - "Receive response:%s for blk addr %x in state %i\n", - busPkt->cmdString(), - busPkt->getAddr() & (((ULL(1))<<48)-1), old_state); + DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n", + busPkt->cmdString(), busPkt->getAddr(), old_state); if (old_state != new_state) - DPRINTF(Cache, "Block for blk addr %x moving from " - "state %i to %i\n", - busPkt->getAddr() & (((ULL(1))<<48)-1), - old_state, new_state); - - tags->handleFill(blk, busPkt, - new_state, - writebacks, pkt); + DPRINTF(Cache, "Block for blk addr %x moving from state " + "%i to %i\n", busPkt->getAddr(), old_state, new_state); + + tags->handleFill(blk, busPkt, new_state, writebacks, pkt); //Free the packet delete busPkt; @@ -710,18 +647,20 @@ return 0; return memSidePort->sendAtomic(pkt); } } else { - // There was a cache hit. - // Handle writebacks if needed - while (!writebacks.empty()){ - memSidePort->sendAtomic(writebacks.front()); - writebacks.pop_front(); - } + if (blk) { + // There was a cache hit. + // Handle writebacks if needed + while (!writebacks.empty()){ + memSidePort->sendAtomic(writebacks.front()); + writebacks.pop_front(); + } - hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + } return hitLatency; } - fatal("Probe not handled.\n"); + return 0; } diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index 52beb0880..3d7721805 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -206,8 +206,7 @@ bool CoherenceProtocol::supplyTrans(BaseCache *cache, PacketPtr &pkt, CacheBlk *blk, MSHR *mshr, - CacheBlk::State & new_state - ) + CacheBlk::State & new_state) { return true; } @@ -263,182 +262,106 @@ CoherenceProtocol::CoherenceProtocol(const string &name, const bool doUpgrades) : SimObject(name) { - if ((protocol == "mosi" || protocol == "moesi") && !doUpgrades) { - cerr << "CoherenceProtocol: ownership protocols require upgrade transactions" - << "(write miss on owned block generates ReadExcl, which will clobber dirty block)" - << endl; - fatal(""); + // Python should catch this, but in case it doesn't... + if (!(protocol == "msi" || protocol == "mesi" || + protocol == "mosi" || protocol == "moesi")) { + fatal("CoherenceProtocol: unrecognized protocol %s\n", protocol); } - Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq; - Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeReq : Packet::ReadExResp; - -//@todo add in hardware prefetch to this list - if (protocol == "msi") { - // incoming requests: specify outgoing bus request - transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); - transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); - //Prefetching causes a read - transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); - - // on response to given request: specify new state - transitionTable[Invalid][Packet::ReadResp].onResponse(Shared); - transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); - transitionTable[Shared][writeToSharedResp].onResponse(Modified); - - // bus snoop transition functions - transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::ReadReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans); - //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - - if (doUpgrades) { - transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); - } + bool hasOwned = (protocol == "mosi" || protocol == "moesi"); + bool hasExclusive = (protocol == "mesi" || protocol == "moesi"); + + if (hasOwned && !doUpgrades) { + fatal("CoherenceProtocol: ownership protocols require upgrade " + "transactions\n(write miss on owned block generates ReadExcl, " + "which will clobber dirty block)\n"); } - else if(protocol == "mesi") { - // incoming requests: specify outgoing bus request - transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); - transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); - //Prefetching causes a read - transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); - - // on response to given request: specify new state - transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive); - //It will move into shared if the shared line is asserted in the - //getNewState function - transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); - transitionTable[Shared][writeToSharedResp].onResponse(Modified); - - // bus snoop transition functions - transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); - transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared); - transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans); - //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - - if (doUpgrades) { - transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); - } + // set up a few shortcuts to save typing & visual clutter + typedef Packet P; + StateTransition (&tt)[stateMax+1][NUM_MEM_CMDS] = transitionTable; + + P::Command writeToSharedCmd = doUpgrades ? P::UpgradeReq : P::ReadExReq; + P::Command writeToSharedResp = doUpgrades ? P::UpgradeReq : P::ReadExResp; + + // Note that all transitions by default cause a panic. + // Override the valid transitions with the appropriate actions here. + + // + // ----- incoming requests: specify outgoing bus request ----- + // + tt[Invalid][P::ReadReq].onRequest(P::ReadReq); + // we only support write allocate right now + tt[Invalid][P::WriteReq].onRequest(P::ReadExReq); + tt[Shared][P::WriteReq].onRequest(writeToSharedCmd); + if (hasOwned) { + tt[Owned][P::WriteReq].onRequest(writeToSharedCmd); } - else if(protocol == "mosi") { - // incoming requests: specify outgoing bus request - transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); - transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); - transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd); - //Prefetching causes a read - transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); - - // on response to given request: specify new state - transitionTable[Invalid][Packet::ReadResp].onResponse(Shared); - transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); - transitionTable[Shared][writeToSharedResp].onResponse(Modified); - transitionTable[Owned][writeToSharedResp].onResponse(Modified); - - // bus snoop transition functions - transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); - transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans); - //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + // Prefetching causes a read + tt[Invalid][P::SoftPFReq].onRequest(P::ReadReq); + tt[Invalid][P::HardPFReq].onRequest(P::ReadReq); + + // + // ----- on response to given request: specify new state ----- + // + tt[Invalid][P::ReadExResp].onResponse(Modified); + tt[Shared][writeToSharedResp].onResponse(Modified); + // Go to Exclusive state on read response if we have one (will + // move into shared if the shared line is asserted in the + // getNewState function) + // + // originally had this as: + // tt[Invalid][P::ReadResp].onResponse(hasExclusive ? Exclusive: Shared); + // ...but for some reason that caused a link error... + if (hasExclusive) { + tt[Invalid][P::ReadResp].onResponse(Exclusive); + } else { + tt[Invalid][P::ReadResp].onResponse(Shared); + } + if (hasOwned) { + tt[Owned][writeToSharedResp].onResponse(Modified); } - else if(protocol == "moesi") { - // incoming requests: specify outgoing bus request - transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); - transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); - transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd); - //Prefetching causes a read - transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); - - // on response to given request: specify new state - transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive); - //It will move into shared if the shared line is asserted in the - //getNewState function - transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); - transitionTable[Shared][writeToSharedResp].onResponse(Modified); - transitionTable[Owned][writeToSharedResp].onResponse(Modified); - - // bus snoop transition functions - transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); - transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared); - transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans); - //Transitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + // + // ----- bus snoop transition functions ----- + // + tt[Invalid][P::ReadReq].onSnoop(nullTransition); + tt[Invalid][P::ReadExReq].onSnoop(nullTransition); + tt[Invalid][P::InvalidateReq].onSnoop(invalidateTrans); + tt[Invalid][P::WriteInvalidateReq].onSnoop(invalidateTrans); + tt[Shared][P::ReadReq].onSnoop(hasExclusive + ? assertShared : nullTransition); + tt[Shared][P::ReadExReq].onSnoop(invalidateTrans); + tt[Shared][P::InvalidateReq].onSnoop(invalidateTrans); + tt[Shared][P::WriteInvalidateReq].onSnoop(invalidateTrans); + if (doUpgrades) { + tt[Invalid][P::UpgradeReq].onSnoop(nullTransition); + tt[Shared][P::UpgradeReq].onSnoop(invalidateTrans); + } + tt[Modified][P::ReadExReq].onSnoop(supplyAndInvalidateTrans); + tt[Modified][P::ReadReq].onSnoop(hasOwned + ? supplyAndGotoOwnedTrans + : supplyAndGotoSharedTrans); + tt[Modified][P::InvalidateReq].onSnoop(invalidateTrans); + tt[Modified][P::WriteInvalidateReq].onSnoop(invalidateTrans); + + if (hasExclusive) { + tt[Exclusive][P::ReadReq].onSnoop(assertShared); + tt[Exclusive][P::ReadExReq].onSnoop(invalidateTrans); + tt[Exclusive][P::InvalidateReq].onSnoop(invalidateTrans); + tt[Exclusive][P::WriteInvalidateReq].onSnoop(invalidateTrans); } - else { - cerr << "CoherenceProtocol: unrecognized protocol " << protocol - << endl; - fatal(""); + if (hasOwned) { + tt[Owned][P::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + tt[Owned][P::ReadExReq].onSnoop(supplyAndInvalidateTrans); + tt[Owned][P::UpgradeReq].onSnoop(invalidateTrans); + tt[Owned][P::InvalidateReq].onSnoop(invalidateTrans); + tt[Owned][P::WriteInvalidateReq].onSnoop(invalidateTrans); } + + // @todo add in hardware prefetch to this list } diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh index b30fb053b..481277523 100644 --- a/src/mem/cache/coherence/coherence_protocol.hh +++ b/src/mem/cache/coherence/coherence_protocol.hh @@ -211,31 +211,25 @@ class CoherenceProtocol : public SimObject friend class CoherenceProtocol::StateTransition; /** Mask to select status bits relevant to coherence protocol. */ - const static CacheBlk::State - stateMask = BlkValid | BlkWritable | BlkDirty; + static const int stateMask = BlkValid | BlkWritable | BlkDirty; /** The Modified (M) state. */ - const static CacheBlk::State - Modified = BlkValid | BlkWritable | BlkDirty; + static const int Modified = BlkValid | BlkWritable | BlkDirty; /** The Owned (O) state. */ - const static CacheBlk::State - Owned = BlkValid | BlkDirty; + static const int Owned = BlkValid | BlkDirty; /** The Exclusive (E) state. */ - const static CacheBlk::State - Exclusive = BlkValid | BlkWritable; + static const int Exclusive = BlkValid | BlkWritable; /** The Shared (S) state. */ - const static CacheBlk::State - Shared = BlkValid; + static const int Shared = BlkValid; /** The Invalid (I) state. */ - const static CacheBlk::State - Invalid = 0; + static const int Invalid = 0; /** * Maximum state encoding value (used to size transition lookup * table). Could be more than number of states, depends on * encoding of status bits. */ - const static int stateMax = stateMask; + static const int stateMax = stateMask; /** * The table of all possible transitions, organized by starting state and diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh index 4408cfc4f..934a843a6 100644 --- a/src/mem/cache/miss/blocking_buffer.hh +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -128,8 +128,8 @@ public: } /** - * Selects a outstanding pktuest to service. - * @return The pktuest to service, NULL if none found. + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. */ PacketPtr getPacket(); @@ -147,7 +147,7 @@ public: void restoreOrigCmd(PacketPtr &pkt); /** - * Marks a pktuest as in service (sent on the bus). This can have side + * Marks a request as in service (sent on the bus). This can have side * effect since storage for no response commands is deallocated once they * are successfully sent. * @param pkt The request that was sent on the bus. @@ -155,14 +155,14 @@ public: void markInService(PacketPtr &pkt, MSHR* mshr); /** - * Frees the resources of the pktuest and unblock the cache. + * Frees the resources of the request and unblock the cache. * @param pkt The request that has been satisfied. - * @param time The time when the pktuest is satisfied. + * @param time The time when the request is satisfied. */ void handleResponse(PacketPtr &pkt, Tick time); /** - * Removes all outstanding pktuests for a given thread number. If a request + * Removes all outstanding requests for a given thread number. If a request * has been sent to the bus, this function removes all of its targets. * @param threadNum The thread number of the requests to squash. */ @@ -220,14 +220,14 @@ public: int size, uint8_t *data, bool compressed); /** - * Perform a writeback pktuest. + * Perform a writeback request. * @param pkt The writeback request. */ void doWriteback(PacketPtr &pkt); /** - * Returns true if there are outstanding pktuests. - * @return True if there are outstanding pktuests. + * Returns true if there are outstanding requests. + * @return True if there are outstanding requests. */ bool havePending() { diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh index 2e04802fb..b67a896f4 100644 --- a/src/mem/cache/miss/miss_queue.hh +++ b/src/mem/cache/miss/miss_queue.hh @@ -77,7 +77,7 @@ class MissQueue /** The block size of the parent cache. */ int blkSize; - /** Increasing order number assigned to each incoming pktuest. */ + /** Increasing order number assigned to each incoming request. */ uint64_t order; bool prefetchMiss; @@ -212,7 +212,7 @@ class MissQueue void setPrefetcher(BasePrefetcher *_prefetcher); /** - * Handle a cache miss properly. Either allocate an MSHR for the pktuest, + * Handle a cache miss properly. Either allocate an MSHR for the request, * or forward it through the write buffer. * @param pkt The request that missed in the cache. * @param blk_size The block size of the cache. @@ -232,8 +232,8 @@ class MissQueue PacketPtr &target); /** - * Selects a outstanding pktuest to service. - * @return The pktuest to service, NULL if none found. + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. */ PacketPtr getPacket(); @@ -251,7 +251,7 @@ class MissQueue void restoreOrigCmd(PacketPtr &pkt); /** - * Marks a pktuest as in service (sent on the bus). This can have side + * Marks a request as in service (sent on the bus). This can have side * effect since storage for no response commands is deallocated once they * are successfully sent. * @param pkt The request that was sent on the bus. @@ -259,14 +259,14 @@ class MissQueue void markInService(PacketPtr &pkt, MSHR* mshr); /** - * Collect statistics and free resources of a satisfied pktuest. + * Collect statistics and free resources of a satisfied request. * @param pkt The request that has been satisfied. - * @param time The time when the pktuest is satisfied. + * @param time The time when the request is satisfied. */ void handleResponse(PacketPtr &pkt, Tick time); /** - * Removes all outstanding pktuests for a given thread number. If a request + * Removes all outstanding requests for a given thread number. If a request * has been sent to the bus, this function removes all of its targets. * @param threadNum The thread number of the requests to squash. */ @@ -313,14 +313,14 @@ class MissQueue int size, uint8_t *data, bool compressed); /** - * Perform the given writeback pktuest. + * Perform the given writeback request. * @param pkt The writeback request. */ void doWriteback(PacketPtr &pkt); /** - * Returns true if there are outstanding pktuests. - * @return True if there are outstanding pktuests. + * Returns true if there are outstanding requests. + * @return True if there are outstanding requests. */ bool havePending(); diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index d92aa8a85..281ea9d49 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -44,7 +44,7 @@ class MSHR; /** * Miss Status and handling Register. This class keeps all the information - * needed to handle a cache miss including a list of target pktuests. + * needed to handle a cache miss including a list of target requests. */ class MSHR { public: @@ -63,15 +63,15 @@ class MSHR { Addr addr; /** Adress space id of the miss. */ short asid; - /** True if the pktuest has been sent to the bus. */ + /** True if the request has been sent to the bus. */ bool inService; /** Thread number of the miss. */ int threadNum; - /** The pktuest that is forwarded to the next level of the hierarchy. */ + /** The request that is forwarded to the next level of the hierarchy. */ PacketPtr pkt; /** The number of currently allocated targets. */ short ntargets; - /** The original pktuesting command. */ + /** The original requesting command. */ Packet::Command originalCmd; /** Order number of assigned by the miss queue. */ uint64_t order; @@ -88,24 +88,24 @@ class MSHR { Iterator allocIter; private: - /** List of all pktuests that match the address */ + /** List of all requests that match the address */ TargetList targets; public: /** * Allocate a miss to this MSHR. - * @param cmd The pktuesting command. + * @param cmd The requesting command. * @param addr The address of the miss. * @param asid The address space id of the miss. - * @param size The number of bytes to pktuest. + * @param size The number of bytes to request. * @param pkt The original miss. */ void allocate(Packet::Command cmd, Addr addr, int size, PacketPtr &pkt); /** - * Allocate this MSHR as a buffer for the given pktuest. - * @param target The memory pktuest to buffer. + * Allocate this MSHR as a buffer for the given request. + * @param target The memory request to buffer. */ void allocateAsBuffer(PacketPtr &target); @@ -115,7 +115,7 @@ public: void deallocate(); /** - * Add a pktuest to the list of targets. + * Add a request to the list of targets. * @param target The target. */ void allocateTarget(PacketPtr &target); diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index 30397d9a0..ec2ddae8a 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -39,7 +39,7 @@ #include "mem/cache/miss/mshr.hh" /** - * A Class for maintaining a list of pending and allocated memory pktuests. + * A Class for maintaining a list of pending and allocated memory requests. */ class MSHRQueue { private: @@ -55,7 +55,7 @@ class MSHRQueue { // Parameters /** * The total number of MSHRs in this queue. This number is set as the - * number of MSHRs pktuested plus (numReserve - 1). This allows for + * number of MSHRs requested plus (numReserve - 1). This allows for * the same number of effective MSHRs while still maintaining the reserve. */ const int numMSHRs; @@ -103,14 +103,14 @@ class MSHRQueue { bool findMatches(Addr addr, std::vector<MSHR*>& matches) const; /** - * Find any pending pktuests that overlap the given request. + * Find any pending requests that overlap the given request. * @param pkt The request to find. * @return A pointer to the earliest matching MSHR. */ MSHR* findPending(PacketPtr &pkt) const; /** - * Allocates a new MSHR for the pktuest and size. This places the request + * Allocates a new MSHR for the request and size. This places the request * as the first target in the MSHR. * @param pkt The request to handle. * @param size The number in bytes to fetch from memory. @@ -121,12 +121,12 @@ class MSHRQueue { MSHR* allocate(PacketPtr &pkt, int size = 0); /** - * Allocate a read pktuest for the given address, and places the given + * Allocate a read request for the given address, and places the given * target on the target list. * @param addr The address to fetch. * @param asid The address space for the fetch. - * @param size The number of bytes to pktuest. - * @param target The first target for the pktuest. + * @param size The number of bytes to request. + * @param target The first target for the request. * @return Pointer to the new MSHR. */ MSHR* allocateFetch(Addr addr, int size, PacketPtr &target); @@ -135,7 +135,7 @@ class MSHRQueue { * Allocate a target list for the given address. * @param addr The address to fetch. * @param asid The address space for the fetch. - * @param size The number of bytes to pktuest. + * @param size The number of bytes to request. * @return Pointer to the new MSHR. */ MSHR* allocateTargetList(Addr addr, int size); @@ -181,14 +181,14 @@ class MSHRQueue { void markInService(MSHR* mshr); /** - * Mark an in service mshr as pending, used to resend a pktuest. + * Mark an in service mshr as pending, used to resend a request. * @param mshr The MSHR to resend. * @param cmd The command to resend. */ void markPending(MSHR* mshr, Packet::Command cmd); /** - * Squash outstanding pktuests with the given thread number. If a request + * Squash outstanding requests with the given thread number. If a request * is in service, just squashes the targets. * @param threadNum The thread to squash. */ @@ -196,7 +196,7 @@ class MSHRQueue { /** * Returns true if the pending list is not empty. - * @return True if there are outstanding pktuests. + * @return True if there are outstanding requests. */ bool havePending() const { @@ -213,8 +213,8 @@ class MSHRQueue { } /** - * Returns the pktuest at the head of the pendingList. - * @return The next pktuest to service. + * Returns the request at the head of the pendingList. + * @return The next request to service. */ PacketPtr getReq() const { diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh index 748f6fb25..898d3c7a0 100644 --- a/src/mem/cache/tags/split.hh +++ b/src/mem/cache/tags/split.hh @@ -71,13 +71,13 @@ class Split : public BaseTags Addr blkMask; - /** Number of NIC pktuests that hit in the NIC partition */ + /** Number of NIC requests that hit in the NIC partition */ Stats::Scalar<> NR_NP_hits; - /** Number of NIC pktuests that hit in the CPU partition */ + /** Number of NIC requests that hit in the CPU partition */ Stats::Scalar<> NR_CP_hits; - /** Number of CPU pktuests that hit in the NIC partition */ + /** Number of CPU requests that hit in the NIC partition */ Stats::Scalar<> CR_NP_hits; - /** Number of CPU pktuests that hit in the CPU partition */ + /** Number of CPU requests that hit in the CPU partition */ Stats::Scalar<> CR_CP_hits; /** The number of nic replacements (i.e. misses) */ Stats::Scalar<> nic_repl; diff --git a/src/mem/cache/tags/split_blk.hh b/src/mem/cache/tags/split_blk.hh index 64d903579..f38516180 100644 --- a/src/mem/cache/tags/split_blk.hh +++ b/src/mem/cache/tags/split_blk.hh @@ -47,7 +47,7 @@ class SplitBlk : public CacheBlk { bool isTouched; /** Has this block been used after being brought in? (for LIFO partition) */ bool isUsed; - /** is this blk a NIC block? (i.e. pktuested by the NIC) */ + /** is this blk a NIC block? (i.e. requested by the NIC) */ bool isNIC; /** timestamp of the arrival of this block into the cache */ Tick ts; diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 834f4b63c..fa8d82c46 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -151,7 +151,7 @@ fixPacket(PacketPtr func, PacketPtr timing) Addr timingStart = timing->getAddr(); Addr timingEnd = timing->getAddr() + timing->getSize() - 1; - assert(!(funcStart > timingEnd || timingStart < funcEnd)); + assert(!(funcStart > timingEnd || timingStart > funcEnd)); if (DTRACE(FunctionalAccess)) { DebugOut() << func; diff --git a/src/mem/packet.hh b/src/mem/packet.hh index d8ad49bdb..cb97dd036 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -51,16 +51,16 @@ typedef uint8_t* PacketDataPtr; typedef std::list<PacketPtr> PacketList; //Coherence Flags -#define NACKED_LINE 1 << 0 -#define SATISFIED 1 << 1 -#define SHARED_LINE 1 << 2 -#define CACHE_LINE_FILL 1 << 3 -#define COMPRESSED 1 << 4 -#define NO_ALLOCATE 1 << 5 -#define SNOOP_COMMIT 1 << 6 +#define NACKED_LINE (1 << 0) +#define SATISFIED (1 << 1) +#define SHARED_LINE (1 << 2) +#define CACHE_LINE_FILL (1 << 3) +#define COMPRESSED (1 << 4) +#define NO_ALLOCATE (1 << 5) +#define SNOOP_COMMIT (1 << 6) //for now. @todo fix later -#define NUM_MEM_CMDS 1 << 11 +#define NUM_MEM_CMDS (1 << 11) /** * A Packet is used to encapsulate a transfer between two objects in * the memory system (e.g., the L1 and L2 cache). (In contrast, a @@ -172,17 +172,17 @@ class Packet // as well. enum CommandAttribute { - IsRead = 1 << 0, - IsWrite = 1 << 1, - IsPrefetch = 1 << 2, - IsInvalidate = 1 << 3, - IsRequest = 1 << 4, - IsResponse = 1 << 5, - NeedsResponse = 1 << 6, + IsRead = 1 << 0, + IsWrite = 1 << 1, + IsPrefetch = 1 << 2, + IsInvalidate = 1 << 3, + IsRequest = 1 << 4, + IsResponse = 1 << 5, + NeedsResponse = 1 << 6, IsSWPrefetch = 1 << 7, IsHWPrefetch = 1 << 8, IsUpgrade = 1 << 9, - HasData = 1 << 10 + HasData = 1 << 10 }; public: @@ -190,27 +190,27 @@ class Packet enum Command { InvalidCmd = 0, - ReadReq = IsRead | IsRequest | NeedsResponse, + ReadReq = IsRead | IsRequest | NeedsResponse, WriteReq = IsWrite | IsRequest | NeedsResponse | HasData, - WriteReqNoAck = IsWrite | IsRequest | HasData, + WriteReqNoAck = IsWrite | IsRequest | HasData, ReadResp = IsRead | IsResponse | NeedsResponse | HasData, - WriteResp = IsWrite | IsResponse | NeedsResponse, + WriteResp = IsWrite | IsResponse | NeedsResponse, Writeback = IsWrite | IsRequest | HasData, SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, SoftPFResp = IsRead | IsResponse | IsSWPrefetch - | NeedsResponse | HasData, + | NeedsResponse | HasData, HardPFResp = IsRead | IsResponse | IsHWPrefetch - | NeedsResponse | HasData, + | NeedsResponse | HasData, InvalidateReq = IsInvalidate | IsRequest, - WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest - | HasData | NeedsResponse, - WriteInvalidateResp = IsWrite | IsInvalidate | IsRequest | NeedsResponse - | IsResponse, + WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest + | HasData | NeedsResponse, + WriteInvalidateResp = IsWrite | IsInvalidate | IsRequest + | NeedsResponse | IsResponse, UpgradeReq = IsInvalidate | IsRequest | IsUpgrade, ReadExReq = IsRead | IsInvalidate | IsRequest | NeedsResponse, ReadExResp = IsRead | IsInvalidate | IsResponse - | NeedsResponse | HasData + | NeedsResponse | HasData }; /** Return the string name of the cmd field (for debugging and @@ -310,6 +310,7 @@ class Packet * multiple transactions. */ void reinitFromRequest() { assert(req->validPaddr); + flags = 0; addr = req->paddr; size = req->size; time = req->time; diff --git a/src/mem/tport.cc b/src/mem/tport.cc index 350b4c8e3..55a461a8b 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -33,12 +33,10 @@ void SimpleTimingPort::recvFunctional(PacketPtr pkt) { - //First check queued events std::list<PacketPtr>::iterator i = transmitList.begin(); std::list<PacketPtr>::iterator end = transmitList.end(); - bool cont = true; - while (i != end && cont) { + while (i != end) { PacketPtr target = *i; // If the target contains data, and it overlaps the // probed request, need to update data @@ -46,8 +44,9 @@ SimpleTimingPort::recvFunctional(PacketPtr pkt) fixPacket(pkt, target); } + //Then just do an atomic access and throw away the returned latency - if (cont) + if (pkt->result != Packet::Success) recvAtomic(pkt); } diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 03e0508fb..d41fd5a61 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -150,7 +150,6 @@ def changeToAtomic(system): doDrain(system) print "Changing memory mode to atomic" system.changeTiming(cc_main.SimObject.Atomic) - resume(system) def changeToTiming(system): if not isinstance(system, objects.Root) and not isinstance(system, objects.System): @@ -159,7 +158,6 @@ def changeToTiming(system): doDrain(system) print "Changing memory mode to timing" system.changeTiming(cc_main.SimObject.Timing) - resume(system) def switchCpus(cpuList): print "switching cpus" @@ -190,7 +188,6 @@ def switchCpus(cpuList): cc_main.cleanupCountedDrain(drain_event) # Now all of the CPUs are ready to be switched out for old_cpu in old_cpus: - print "switching" old_cpu._ccObject.switchOut() index = 0 for new_cpu in new_cpus: diff --git a/src/python/m5/objects/MemTest.py b/src/python/m5/objects/MemTest.py index 83399be80..1219ddd4d 100644 --- a/src/python/m5/objects/MemTest.py +++ b/src/python/m5/objects/MemTest.py @@ -13,6 +13,7 @@ class MemTest(SimObject): percent_reads = Param.Percent(65, "target read percentage") percent_source_unaligned = Param.Percent(50, "percent of copy source address that are unaligned") + percent_functional = Param.Percent(50, "percent of access that are functional") percent_uncacheable = Param.Percent(10, "target uncacheable percentage") progress_interval = Param.Counter(1000000, diff --git a/src/sim/faults.cc b/src/sim/faults.cc index 650b728f7..cea35482a 100644 --- a/src/sim/faults.cc +++ b/src/sim/faults.cc @@ -37,7 +37,7 @@ #if !FULL_SYSTEM void FaultBase::invoke(ThreadContext * tc) { - fatal("fault (%s) detected @ PC 0x%08p", name(), tc->readPC()); + fatal("fault (%s) detected @ PC %p", name(), tc->readPC()); } #else void FaultBase::invoke(ThreadContext * tc) diff --git a/src/sim/main.cc b/src/sim/main.cc index 8bb0d7aaa..133141e57 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -55,6 +55,7 @@ #include "base/statistics.hh" #include "base/str.hh" #include "base/time.hh" +#include "config/pythonhome.hh" #include "cpu/base.hh" #include "cpu/smt.hh" #include "mem/mem_object.hh" @@ -145,6 +146,11 @@ main(int argc, char **argv) if (setenv("PYTHONPATH", pythonpath.c_str(), true) == -1) fatal("setenv: %s\n", strerror(errno)); + char *python_home = getenv("PYTHONHOME"); + if (!python_home) + python_home = PYTHONHOME; + Py_SetPythonHome(python_home); + // initialize embedded Python interpreter Py_Initialize(); PySys_SetArgv(argc, argv); diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index addf897c6..d913e159b 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -74,6 +74,8 @@ namespace AlphaPseudo if (!doQuiesce) return; + DPRINTF(Quiesce, "%s: quiesce()\n", tc->getCpuPtr()->name()); + tc->suspend(); if (tc->getKernelStats()) tc->getKernelStats()->quiesce(); @@ -87,10 +89,15 @@ namespace AlphaPseudo EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent(); + Tick resume = curTick + Clock::Int::ns * ns; + if (quiesceEvent->scheduled()) - quiesceEvent->reschedule(curTick + Clock::Int::ns * ns); + quiesceEvent->reschedule(resume); else - quiesceEvent->schedule(curTick + Clock::Int::ns * ns); + quiesceEvent->schedule(resume); + + DPRINTF(Quiesce, "%s: quiesceNs(%d) until %d\n", + tc->getCpuPtr()->name(), ns, resume); tc->suspend(); if (tc->getKernelStats()) @@ -105,12 +112,15 @@ namespace AlphaPseudo EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent(); + Tick resume = curTick + tc->getCpuPtr()->cycles(cycles); + if (quiesceEvent->scheduled()) - quiesceEvent->reschedule(curTick + - tc->getCpuPtr()->cycles(cycles)); + quiesceEvent->reschedule(resume); else - quiesceEvent->schedule(curTick + - tc->getCpuPtr()->cycles(cycles)); + quiesceEvent->schedule(resume); + + DPRINTF(Quiesce, "%s: quiesceCycles(%d) until %d\n", + tc->getCpuPtr()->name(), cycles, resume); tc->suspend(); if (tc->getKernelStats()) diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index edd4e331d..e79712a19 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -356,6 +356,14 @@ convertStatBuf(target_stat &tgt, host_stat *host, bool fakeTTY = false) tgt->st_dev = htog(tgt->st_dev); tgt->st_ino = host->st_ino; tgt->st_ino = htog(tgt->st_ino); + tgt->st_mode = host->st_mode; + tgt->st_mode = htog(tgt->st_mode); + tgt->st_nlink = host->st_nlink; + tgt->st_nlink = htog(tgt->st_nlink); + tgt->st_uid = host->st_uid; + tgt->st_uid = htog(tgt->st_uid); + tgt->st_gid = host->st_gid; + tgt->st_gid = htog(tgt->st_gid); if (fakeTTY) tgt->st_rdev = 0x880d; else diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini new file mode 100644 index 000000000..982973385 --- /dev/null +++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini @@ -0,0 +1,236 @@ +[root] +type=Root +children=system +checkpoint= +clock=1000000000000 +max_tick=0 +output_file=cout +progress_interval=0 + +[debug] +break_cycles= + +[exetrace] +intel_format=false +pc_symbol=true +print_cpseq=false +print_cycle=true +print_data=true +print_effaddr=true +print_fetchseq=false +print_iregs=false +print_opclass=true +print_reg_delta=false +print_thread=true +speculative=true +trace_system=client + +[serialize] +count=10 +cycle=0 +dir=cpt.%012d +period=0 + +[stats] +descriptions=true +dump_cycle=0 +dump_period=0 +dump_reset=false +ignore_events= +mysql_db= +mysql_host= +mysql_password= +mysql_user= +project_name=test +simulation_name=test +simulation_sample=0 +text_compat=true +text_file=m5stats.txt + +[system] +type=System +children=cpu membus physmem +mem_mode=atomic +physmem=system.physmem + +[system.cpu] +type=TimingSimpleCPU +children=dcache icache l2cache toL2Bus workload +clock=1 +cpu_id=0 +defer_registration=false +function_trace=false +function_trace_start=0 +max_insts_all_threads=0 +max_insts_any_thread=0 +max_loads_all_threads=0 +max_loads_any_thread=0 +mem=system.cpu.dcache +progress_interval=0 +system=system +workload=system.cpu.workload +dcache_port=system.cpu.dcache.cpu_side +icache_port=system.cpu.icache.cpu_side + +[system.cpu.dcache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=262144 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.dcache_port +mem_side=system.cpu.toL2Bus.port[1] + +[system.cpu.icache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=131072 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.icache_port +mem_side=system.cpu.toL2Bus.port[0] + +[system.cpu.l2cache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=2097152 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.toL2Bus.port[2] +mem_side=system.membus.port[1] + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +width=64 +port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side + +[system.cpu.workload] +type=LiveProcess +cmd=hello +egid=100 +env= +euid=100 +executable=tests/test-progs/hello/bin/sparc/linux/hello +gid=100 +input=cin +output=cout +pid=100 +ppid=99 +system=system +uid=100 + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +width=64 +port=system.physmem.port system.cpu.l2cache.mem_side + +[system.physmem] +type=PhysicalMemory +file= +latency=1 +range=0:134217727 +port=system.membus.port[0] + +[trace] +bufsize=0 +cycle=0 +dump_on_exit=false +file=cout +flags= +ignore= +start=0 + diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out new file mode 100644 index 000000000..5210081b3 --- /dev/null +++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out @@ -0,0 +1,227 @@ +[root] +type=Root +clock=1000000000000 +max_tick=0 +progress_interval=0 +output_file=cout + +[system.physmem] +type=PhysicalMemory +file= +range=[0,134217727] +latency=1 + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +width=64 + +[system.cpu.dcache] +type=BaseCache +size=262144 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.workload] +type=LiveProcess +cmd=hello +executable=tests/test-progs/hello/bin/sparc/linux/hello +input=cin +output=cout +env= +system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 + +[system.cpu] +type=TimingSimpleCPU +max_insts_any_thread=0 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +progress_interval=0 +mem=system.cpu.dcache +system=system +cpu_id=0 +workload=system.cpu.workload +clock=1 +defer_registration=false +// width not specified +function_trace=false +function_trace_start=0 +// simulate_stalls not specified + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +width=64 + +[system.cpu.icache] +type=BaseCache +size=131072 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.l2cache] +type=BaseCache +size=2097152 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[trace] +flags= +start=0 +cycle=0 +bufsize=0 +file=cout +dump_on_exit=false +ignore= + +[stats] +descriptions=true +project_name=test +simulation_name=test +simulation_sample=0 +text_file=m5stats.txt +text_compat=true +mysql_db= +mysql_user= +mysql_password= +mysql_host= +events_start=-1 +dump_reset=false +dump_cycle=0 +dump_period=0 +ignore_events= + +[random] +seed=1 + +[exetrace] +speculative=true +print_cycle=true +print_opclass=true +print_thread=true +print_effaddr=true +print_data=true +print_iregs=false +print_fetchseq=false +print_cpseq=false +print_reg_delta=false +pc_symbol=true +intel_format=false +trace_system=client + +[debug] +break_cycles= + +[statsreset] +reset_cycle=0 + diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt new file mode 100644 index 000000000..c4dc22855 --- /dev/null +++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt @@ -0,0 +1,214 @@ + +---------- Begin Simulation Statistics ---------- +host_inst_rate 53689 # Simulator instruction rate (inst/s) +host_mem_usage 177104 # Number of bytes of host memory used +host_seconds 0.08 # Real time elapsed on the host +host_tick_rate 17808084 # Simulator tick rate (ticks/s) +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 4483 # Number of instructions simulated +sim_seconds 0.000001 # Number of seconds simulated +sim_ticks 1497001 # Number of ticks simulated +system.cpu.dcache.ReadReq_accesses 464 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 3972.166667 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2972.166667 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 410 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 214497 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.116379 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 54 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 160497 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.116379 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 54 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 501 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 3980.840580 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 2980.840580 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 432 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 274678 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.137725 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 69 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_miss_latency 205678 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.137725 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 69 # number of WriteReq MSHR misses +system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.dcache.avg_refs 6.845528 # Average number of references to valid blocks. +system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.cache_copies 0 # number of cache copies performed +system.cpu.dcache.demand_accesses 965 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 3977.032520 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 2977.032520 # average overall mshr miss latency +system.cpu.dcache.demand_hits 842 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 489175 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.127461 # miss rate for demand accesses +system.cpu.dcache.demand_misses 123 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 366175 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.127461 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 123 # number of demand (read+write) MSHR misses +system.cpu.dcache.fast_writes 0 # number of fast writes performed +system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.dcache.overall_accesses 965 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 3977.032520 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 2977.032520 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_uncacheable_latency no value # average overall mshr uncacheable latency +system.cpu.dcache.overall_hits 842 # number of overall hits +system.cpu.dcache.overall_miss_latency 489175 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.127461 # miss rate for overall accesses +system.cpu.dcache.overall_misses 123 # number of overall misses +system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 366175 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.127461 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 123 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.dcache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.dcache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.dcache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.dcache.replacements 0 # number of replacements +system.cpu.dcache.sampled_refs 123 # Sample count of references to valid blocks. +system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.dcache.tagsinuse 71.370810 # Cycle average of tags in use +system.cpu.dcache.total_refs 842 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.dcache.writebacks 0 # number of writebacks +system.cpu.icache.ReadReq_accesses 4484 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 3979.178571 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 2979.178571 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 4232 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1002753 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.056200 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 252 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_miss_latency 750753 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.056200 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 252 # number of ReadReq MSHR misses +system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.icache.avg_refs 16.793651 # Average number of references to valid blocks. +system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.icache.cache_copies 0 # number of cache copies performed +system.cpu.icache.demand_accesses 4484 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 3979.178571 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 2979.178571 # average overall mshr miss latency +system.cpu.icache.demand_hits 4232 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1002753 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.056200 # miss rate for demand accesses +system.cpu.icache.demand_misses 252 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 750753 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.056200 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 252 # number of demand (read+write) MSHR misses +system.cpu.icache.fast_writes 0 # number of fast writes performed +system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.icache.overall_accesses 4484 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 3979.178571 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 2979.178571 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_uncacheable_latency no value # average overall mshr uncacheable latency +system.cpu.icache.overall_hits 4232 # number of overall hits +system.cpu.icache.overall_miss_latency 1002753 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.056200 # miss rate for overall accesses +system.cpu.icache.overall_misses 252 # number of overall misses +system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 750753 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.056200 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 252 # number of overall MSHR misses +system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.icache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.icache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.icache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.icache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.icache.replacements 0 # number of replacements +system.cpu.icache.sampled_refs 252 # Sample count of references to valid blocks. +system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.icache.tagsinuse 115.914677 # Cycle average of tags in use +system.cpu.icache.total_refs 4232 # Total number of references to valid blocks. +system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.icache.writebacks 0 # number of writebacks +system.cpu.idle_fraction 0 # Percentage of idle cycles +system.cpu.l2cache.ReadReq_accesses 375 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 2986.473118 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 1985.473118 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_hits 3 # number of ReadReq hits +system.cpu.l2cache.ReadReq_miss_latency 1110968 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.992000 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 372 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 738596 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.992000 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 372 # number of ReadReq MSHR misses +system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked +system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked +system.cpu.l2cache.avg_refs 0.008065 # Average number of references to valid blocks. +system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.cache_copies 0 # number of cache copies performed +system.cpu.l2cache.demand_accesses 375 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 2986.473118 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 1985.473118 # average overall mshr miss latency +system.cpu.l2cache.demand_hits 3 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 1110968 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.992000 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 372 # number of demand (read+write) misses +system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.l2cache.demand_mshr_miss_latency 738596 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.992000 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 372 # number of demand (read+write) MSHR misses +system.cpu.l2cache.fast_writes 0 # number of fast writes performed +system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.l2cache.overall_accesses 375 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 2986.473118 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 1985.473118 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no value # average overall mshr uncacheable latency +system.cpu.l2cache.overall_hits 3 # number of overall hits +system.cpu.l2cache.overall_miss_latency 1110968 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.992000 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 372 # number of overall misses +system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.l2cache.overall_mshr_miss_latency 738596 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.992000 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 372 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.l2cache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.l2cache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.l2cache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.l2cache.replacements 0 # number of replacements +system.cpu.l2cache.sampled_refs 372 # Sample count of references to valid blocks. +system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.l2cache.tagsinuse 185.896040 # Cycle average of tags in use +system.cpu.l2cache.total_refs 3 # Total number of references to valid blocks. +system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.l2cache.writebacks 0 # number of writebacks +system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles +system.cpu.numCycles 1497001 # number of cpu cycles simulated +system.cpu.num_insts 4483 # Number of instructions executed +system.cpu.num_refs 965 # Number of memory references +system.cpu.workload.PROG:num_syscalls 11 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/stderr b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stderr new file mode 100644 index 000000000..7873672f2 --- /dev/null +++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stderr @@ -0,0 +1,3 @@ +warn: More than two loadable segments in ELF object. +warn: Ignoring segment @ 0x0 length 0x0. +warn: Entering event queue @ 0. Starting simulation... diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout new file mode 100644 index 000000000..3c17ee40b --- /dev/null +++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout @@ -0,0 +1,12 @@ +Hello World!M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Oct 23 2006 07:47:36 +M5 started Mon Oct 23 07:47:41 2006 +M5 executing on zeep +command line: build/SPARC_SE/m5.debug -d build/SPARC_SE/tests/debug/quick/00.hello/sparc/linux/simple-timing tests/run.py quick/00.hello/sparc/linux/simple-timing +Exiting @ tick 1497001 because target called exit() diff --git a/util/statetrace/arch/tracechild_sparc.cc b/util/statetrace/arch/tracechild_sparc.cc index 378de0865..bad81b647 100644 --- a/util/statetrace/arch/tracechild_sparc.cc +++ b/util/statetrace/arch/tracechild_sparc.cc @@ -47,10 +47,10 @@ string SparcTraceChild::regNames[numregs] = { //Input registers "i0", "i1", "i2", "i3", "i4", "i5", "i6", "i7", //Floating point - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", - "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", - "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", - "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", + "f0", "f2", "f4", "f6", "f8", "f10", "f12", "f14", + "f16", "f18", "f20", "f22", "f24", "f26", "f28", "f30", + "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", + "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", //Miscelaneous "fsr", "fprs", "pc", "npc", "y", "cwp", "pstate", "asi", "ccr"}; @@ -98,37 +98,37 @@ int64_t getRegs(regs & myregs, fpu & myfpu, case SparcTraceChild::I7: return inputs[7]; //Floating point case SparcTraceChild::F0: return myfpu.f_fpstatus.fpu_fr[0]; - case SparcTraceChild::F1: return myfpu.f_fpstatus.fpu_fr[1]; - case SparcTraceChild::F2: return myfpu.f_fpstatus.fpu_fr[2]; - case SparcTraceChild::F3: return myfpu.f_fpstatus.fpu_fr[3]; - case SparcTraceChild::F4: return myfpu.f_fpstatus.fpu_fr[4]; - case SparcTraceChild::F5: return myfpu.f_fpstatus.fpu_fr[5]; - case SparcTraceChild::F6: return myfpu.f_fpstatus.fpu_fr[6]; - case SparcTraceChild::F7: return myfpu.f_fpstatus.fpu_fr[7]; - case SparcTraceChild::F8: return myfpu.f_fpstatus.fpu_fr[8]; - case SparcTraceChild::F9: return myfpu.f_fpstatus.fpu_fr[9]; - case SparcTraceChild::F10: return myfpu.f_fpstatus.fpu_fr[10]; - case SparcTraceChild::F11: return myfpu.f_fpstatus.fpu_fr[11]; - case SparcTraceChild::F12: return myfpu.f_fpstatus.fpu_fr[12]; - case SparcTraceChild::F13: return myfpu.f_fpstatus.fpu_fr[13]; - case SparcTraceChild::F14: return myfpu.f_fpstatus.fpu_fr[14]; - case SparcTraceChild::F15: return myfpu.f_fpstatus.fpu_fr[15]; - case SparcTraceChild::F16: return myfpu.f_fpstatus.fpu_fr[16]; - case SparcTraceChild::F17: return myfpu.f_fpstatus.fpu_fr[17]; - case SparcTraceChild::F18: return myfpu.f_fpstatus.fpu_fr[18]; - case SparcTraceChild::F19: return myfpu.f_fpstatus.fpu_fr[19]; - case SparcTraceChild::F20: return myfpu.f_fpstatus.fpu_fr[20]; - case SparcTraceChild::F21: return myfpu.f_fpstatus.fpu_fr[21]; - case SparcTraceChild::F22: return myfpu.f_fpstatus.fpu_fr[22]; - case SparcTraceChild::F23: return myfpu.f_fpstatus.fpu_fr[23]; - case SparcTraceChild::F24: return myfpu.f_fpstatus.fpu_fr[24]; - case SparcTraceChild::F25: return myfpu.f_fpstatus.fpu_fr[25]; - case SparcTraceChild::F26: return myfpu.f_fpstatus.fpu_fr[26]; - case SparcTraceChild::F27: return myfpu.f_fpstatus.fpu_fr[27]; - case SparcTraceChild::F28: return myfpu.f_fpstatus.fpu_fr[28]; - case SparcTraceChild::F29: return myfpu.f_fpstatus.fpu_fr[29]; - case SparcTraceChild::F30: return myfpu.f_fpstatus.fpu_fr[30]; - case SparcTraceChild::F31: return myfpu.f_fpstatus.fpu_fr[31]; + case SparcTraceChild::F2: return myfpu.f_fpstatus.fpu_fr[1]; + case SparcTraceChild::F4: return myfpu.f_fpstatus.fpu_fr[2]; + case SparcTraceChild::F6: return myfpu.f_fpstatus.fpu_fr[3]; + case SparcTraceChild::F8: return myfpu.f_fpstatus.fpu_fr[4]; + case SparcTraceChild::F10: return myfpu.f_fpstatus.fpu_fr[5]; + case SparcTraceChild::F12: return myfpu.f_fpstatus.fpu_fr[6]; + case SparcTraceChild::F14: return myfpu.f_fpstatus.fpu_fr[7]; + case SparcTraceChild::F16: return myfpu.f_fpstatus.fpu_fr[8]; + case SparcTraceChild::F18: return myfpu.f_fpstatus.fpu_fr[9]; + case SparcTraceChild::F20: return myfpu.f_fpstatus.fpu_fr[10]; + case SparcTraceChild::F22: return myfpu.f_fpstatus.fpu_fr[11]; + case SparcTraceChild::F24: return myfpu.f_fpstatus.fpu_fr[12]; + case SparcTraceChild::F26: return myfpu.f_fpstatus.fpu_fr[13]; + case SparcTraceChild::F28: return myfpu.f_fpstatus.fpu_fr[14]; + case SparcTraceChild::F30: return myfpu.f_fpstatus.fpu_fr[15]; + case SparcTraceChild::F32: return myfpu.f_fpstatus.fpu_fr[16]; + case SparcTraceChild::F34: return myfpu.f_fpstatus.fpu_fr[17]; + case SparcTraceChild::F36: return myfpu.f_fpstatus.fpu_fr[18]; + case SparcTraceChild::F38: return myfpu.f_fpstatus.fpu_fr[19]; + case SparcTraceChild::F40: return myfpu.f_fpstatus.fpu_fr[20]; + case SparcTraceChild::F42: return myfpu.f_fpstatus.fpu_fr[21]; + case SparcTraceChild::F44: return myfpu.f_fpstatus.fpu_fr[22]; + case SparcTraceChild::F46: return myfpu.f_fpstatus.fpu_fr[23]; + case SparcTraceChild::F48: return myfpu.f_fpstatus.fpu_fr[24]; + case SparcTraceChild::F50: return myfpu.f_fpstatus.fpu_fr[25]; + case SparcTraceChild::F52: return myfpu.f_fpstatus.fpu_fr[26]; + case SparcTraceChild::F54: return myfpu.f_fpstatus.fpu_fr[27]; + case SparcTraceChild::F56: return myfpu.f_fpstatus.fpu_fr[28]; + case SparcTraceChild::F58: return myfpu.f_fpstatus.fpu_fr[29]; + case SparcTraceChild::F60: return myfpu.f_fpstatus.fpu_fr[30]; + case SparcTraceChild::F62: return myfpu.f_fpstatus.fpu_fr[31]; //Miscelaneous case SparcTraceChild::FSR: return myfpu.f_fpstatus.Fpu_fsr; case SparcTraceChild::FPRS: return myregs.r_fprs; @@ -188,46 +188,110 @@ bool SparcTraceChild::step() //being breakpointed should be word (64bit) aligned, and that both the //next instruction and the instruction after that need to be breakpointed //so that annulled branches will still stop as well. + + /* + * Useful constants + */ const static uint64_t breakInst = 0x91d02001; const static uint64_t breakWord = breakInst | (breakInst << 32); - const static uint64_t lowMask = (uint64_t)(0xFFFFFFFF); + const static uint64_t lowMask = 0xFFFFFFFFULL; const static uint64_t highMask = lowMask << 32; + + /* + * storage for the original contents of the child process's memory + */ uint64_t originalInst, originalAnnulInst; + + /* + * Get information about where the process is and is headed next. + */ + uint64_t currentPC = getRegVal(PC); + bool unalignedPC = currentPC & 7; + uint64_t alignedPC = currentPC & (~7); uint64_t nextPC = getRegVal(NPC); - bool unaligned = nextPC & 7; - uint64_t alignedPC = nextPC & (~7); - originalInst = ptrace(PTRACE_PEEKTEXT, pid, alignedPC, 0); - if(unaligned) + bool unalignedNPC = nextPC & 7; + uint64_t alignedNPC = nextPC & (~7); + + /* + * Store the original contents of the child process's memory + */ + originalInst = ptrace(PTRACE_PEEKTEXT, pid, alignedNPC, 0); + //Save a ptrace call if we can + if(unalignedNPC) { - originalAnnulInst = ptrace(PTRACE_PEEKTEXT, pid, alignedPC+8, 0); + originalAnnulInst = ptrace(PTRACE_PEEKTEXT, pid, alignedNPC+8, 0); } - uint64_t newInst; - if(unaligned) + + /* + * Prepare breakpointed copies of child processes memory + */ + uint64_t newInst, newAnnulInst; + //If the current instruction is in the same word as the npc + if(alignedPC == alignedNPC) { - newInst = (originalInst & highMask) | (breakInst << 0); - if(ptrace(PTRACE_POKETEXT, pid, alignedPC, newInst) != 0) - cerr << "Poke failed" << endl; - newInst = (originalAnnulInst & lowMask) | (breakInst << 32); - if(ptrace(PTRACE_POKETEXT, pid, alignedPC+8, newInst) != 0) - cerr << "Poke failed" << endl; + //Make sure we only replace the other part + if(unalignedPC) + newInst = (originalInst & lowMask) | (breakWord & highMask); + else + newInst = (originalInst & highMask) | (breakWord & lowMask); + } + else + { + //otherwise replace the whole thing + newInst = breakWord; + } + //If the current instruction is in the same word as the word after + //the npc + if(alignedPC == alignedNPC+8) + { + //Make sure we only replace the other part + if(unalignedPC) + newAnnulInst = (originalAnnulInst & lowMask) | (breakWord & highMask); + else + newAnnulInst = (originalAnnulInst & highMask) | (breakWord & lowMask); } else { - if(ptrace(PTRACE_POKETEXT, pid, alignedPC, breakWord) != 0) + //otherwise replace the whole thing + newAnnulInst = breakWord; + } + + /* + * Stuff the breakpoint instructions into the child's address space. + */ + //Replace the word at npc + if(ptrace(PTRACE_POKETEXT, pid, alignedNPC, newInst) != 0) + cerr << "Poke failed" << endl; + //Replace the next word, if necessary + if(unalignedNPC) + { + if(ptrace(PTRACE_POKETEXT, pid, alignedNPC+8, newAnnulInst) != 0) cerr << "Poke failed" << endl; } + + /* + * Restart the child process + */ //Note that the "addr" parameter is supposed to be ignored, but in at //least one version of the kernel, it must be 1 or it will set what //pc to continue from - if(ptrace(PTRACE_CONT, pid, /*nextPC - 4*/ 1, 0) != 0) + if(ptrace(PTRACE_CONT, pid, 1, 0) != 0) cerr << "Cont failed" << endl; doWait(); + + /* + * Update our record of the child's state + */ update(pid); - if(ptrace(PTRACE_POKETEXT, pid, alignedPC, originalInst) != 0) + + /* + * Put back the original contents of the childs address space + */ + if(ptrace(PTRACE_POKETEXT, pid, alignedNPC, originalInst) != 0) cerr << "Repoke failed" << endl; - if(unaligned) + if(unalignedNPC) { - if(ptrace(PTRACE_POKETEXT, pid, alignedPC+8, originalAnnulInst) != 0) + if(ptrace(PTRACE_POKETEXT, pid, alignedNPC+8, originalAnnulInst) != 0) cerr << "Repoke failed" << endl; } return true; diff --git a/util/statetrace/arch/tracechild_sparc.hh b/util/statetrace/arch/tracechild_sparc.hh index d177d5941..80770211a 100644 --- a/util/statetrace/arch/tracechild_sparc.hh +++ b/util/statetrace/arch/tracechild_sparc.hh @@ -57,10 +57,10 @@ public: //Input registers I0, I1, I2, I3, I4, I5, I6, I7, //Floating point - F0, F1, F2, F3, F4, F5, F6, F7, - F8, F9, F10, F11, F12, F13, F14, F15, - F16, F17, F18, F19, F20, F21, F22, F23, - F24, F25, F26, F27, F28, F29, F30, F31, + F0, F2, F4, F6, F8, F10, F12, F14, + F16, F18, F20, F22, F24, F26, F28, F30, + F32, F34, F36, F38, F40, F42, F44, F46, + F48, F50, F52, F54, F56, F58, F60, F62, //Miscelaneous FSR, FPRS, PC, NPC, Y, CWP, PSTATE, ASI, CCR, numregs diff --git a/util/tracediff b/util/tracediff index f2377a999..b25efe9b2 100755 --- a/util/tracediff +++ b/util/tracediff @@ -33,23 +33,63 @@ # ******Note that you need to enable some trace flags in the args in order # to do anything useful!****** # -# If you want to pass different arguments to the two instances of m5, -# you can embed them in the simulator arguments like this: +# Script arguments are handled uniformly as follows: +# - If the argument does not contain a '|' character, it is appended +# to both command lines. +# - If the argument has a '|' character in it, the text on either side +# of the '|' is appended to the respective command lines. Note that +# you'll have to quote the arg or escape the '|' with a backslash +# so that the shell doesn't think you're doing a pipe. # -# % tracediff "m5.opt --option1" "m5.opt --option2" [common args] +# In other words, the arguments should look like the command line you +# want to run, with "|" used to list the alternatives for the parts +# that you want to differ between the two runs. +# +# For example: +# +# % tracediff m5.opt --opt1 "--opt2|--opt3" --opt4 +# would compare these two runs: +# m5.opt --opt1 --opt2 --opt4 +# m5.opt --opt1 --opt3 --opt4 +# +# If you want to compare two different simulator binaries, put a '|' +# in the first script argument ("path1/m5.opt|path2/m5.opt"). If you +# want to add arguments to one run only, just put a '|' in with text +# only on one side ("--onlyOn1|"). You can do this with multiple +# arguments together too ("|-a -b -c" adds three args to the second +# run only). # if (@ARGV < 2) { - die "Usage: tracediff sim1 sim2 [--root.trace.flags=X args...]\n"; + die "Usage: tracediff \"sim1|sim2\" [common-arg \"arg1|arg2\" ...]\n"; +} + +foreach $arg (@ARGV) { + @pair = split('\|', $arg, -1); # -1 enables null trailing fields + if ($#pair > 0) { + push @cmd1, $pair[0]; + push @cmd2, $pair[1]; + } else { + push @cmd1, $arg; + push @cmd2, $arg; + } } # First two args are the two simulator binaries to compare -$sim1 = shift; -$sim2 = shift; +$sim1 = shift @cmd1; +$sim2 = shift @cmd2; + +# Everything else is a simulator arg. +$args1 = join(' ', @cmd1); +$args2 = join(' ', @cmd2); -# Everything else on the command line is taken to be an m5 argument to -# be given to both invocations -$simargs = '"' . join('" "', @ARGV) . '"'; +# Common mistake: if you don't set any traceflags this often isn't +# doing what you want. +if ($args1 !~ /--trace-flags/) { + print "****\n"; + print "**** WARNING: no trace flags set... you may not be diffing much!\n"; + print "****\n"; +} # Run individual invocations in separate dirs so output and intermediate # files (particularly config.py and config.ini) don't conflict. @@ -58,8 +98,8 @@ $dir2 = "tracediff-$$-2"; mkdir($dir1) or die "Can't create dir $dir1\n"; mkdir($dir2) or die "Can't create dir $dir2\n"; -$cmd1 = "$sim1 -d $dir1 $simargs 2>&1 |"; -$cmd2 = "$sim2 -d $dir2 $simargs 2>&1 |"; +$cmd1 = "$sim1 -d $dir1 $args1 2>&1 |"; +$cmd2 = "$sim2 -d $dir2 $args2 2>&1 |"; # This only works if you have rundiff in your path. I just edit it # with an explicit path if necessary. |