diff options
290 files changed, 22055 insertions, 5773 deletions
diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 000000000..ec3de7bb2 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,42 @@ +Steven K. Reinhardt +----------------------- + +Nathan L. Binkert +----------------------- + +Erik G. Hallnor +----------------------- + +Steve E. Raasch +----------------------- + +Lisa R. Hsu +----------------------- + +Ali G. Saidi +----------------------- + +Andrew L. Schultz +----------------------- + +Kevin T. Lim +----------------------- + +Ronald G. Dreslinski Jr +----------------------- + +Gabriel Black +----------------------- + +Korey L. Sewell +----------------------- + +David Green +----------------------- + +Benjamin S. Nash +----------------------- + +Miguel J. Serrano +----------------------- + @@ -1,21 +1,27 @@ -This is release m5_1.1 of the M5 simulator. +This is release 2.0 of the M5 simulator. -This file contains brief "getting started" instructions. For more -information, see http://m5.eecs.umich.edu. If you have questions, -please send mail to m5sim-users@lists.sourceforge.net. +For information about building the simulator and getting started please refer +to: http://m5.eecs.umich.edu/ + +Specific Pages of Interest are: +http://m5.eecs.umich.edu/wiki/index.php/Compiling_M5 +http://m5.eecs.umich.edu/wiki/index.php/Running_M5 + +If you have questions, please send mail to m5sim-users@lists.sourceforge.net. WHAT'S INCLUDED (AND NOT) ------------------------- The basic source release includes these subdirectories: - - m5: the simulator itself - - m5-test: regression tests - - ext: less-common external packages needed to build m5 - - alpha-system: source for Alpha console and PALcode + - m5: + - src: source code of the m5 simulator + - test: regression tests + - ext: less-common external packages needed to build m5 + - system/alpha: source for Alpha console and PALcode To run full-system simulations, you will need compiled console, PALcode, and kernel binaries and one or more disk images. These files -are collected in a separate archive, m5_system_1.1.tar.bz2. This file +are collected in a separate archive, m5_system_2.0.tar.bz2. This file is included on the CD release, or you can download it separately from Sourceforge. @@ -31,66 +37,8 @@ set of Linux source patches (linux_m5-2.6.8.1.diff), and the scons program needed to build M5. If you do not have the CD, the same HTML documentation is available online at http://m5.eecs.umich.edu/docs, the Linux source patches are available at -http://m5.eecs.umich.edu/dist/linux_m5-2.6.8.1.diff, and the scons -program is available from http://www.scons.org. - -WHAT'S NEEDED -------------- -- GCC version 3.3 or newer -- Python 2.3 or newer -- SCons 0.96.1 or newer (see http://www.scons.org) - -WHAT'S RECOMMENDED ------------------- -- MySQL (for statistics complex statistics storage/retrieval) -- Python-MysqlDB (for statistics analysis) - -GETTING STARTED ---------------- - -There are two different build targets and three optimizations levels: - -Target: -------- -ALPHA_SE - Syscall emulation simulation -ALPHA_FS - Full system simulation - -Optimization: -------------- -m5.debug - debug version of the code with tracing and without optimization -m5.opt - optimized version of code with tracing -m5.fast - optimized version of the code without tracing and asserts - -Different targets are built in different subdirectories of m5/build. -Binaries with the same target but different optimization levels share -the same directory. Note that you can build m5 in any directory you -choose;p just configure the target directory using the 'mkbuilddir' -script in m5/build. - -The following steps will build and test the simulator. The variable -"$top" refers to the top directory where you've unpacked the files, -i.e., the one containing the m5, m5-test, and ext directories. If you -have a multiprocessor system, you should give scons a "-j N" argument (like -make) to run N jobs in parallel. - -To build and test the syscall-emulation simulator: - - cd $top/m5/build - scons ALPHA_SE/test/opt/quick - -This process takes under 10 minutes on a dual 3GHz Xeon system (using -the '-j 4' option). - -To build and test the full-system simulator: - -1. Unpack the full-system binaries from m5_system_1.1.tar.bz2. (See - above for directions on obtaining this file if you don't have it.) - This package includes disk images and kernel, palcode, and console - binaries for Linux and FreeBSD. -2. Edit the SYSTEMDIR search path in $top/m5-test/SysPaths.py to - include the path to your local copy of the binaries. -3. In $top/m5/build, run "scons ALPHA_FS/test/opt/quick". +http://m5.eecs.umich.edu/dist/linux_m5-2.6.8.1.diff, the scons +program is available from http://www.scons.org, and swig is available from +http://www.swig.org. -This process also takes under 10 minutes on a dual 3GHz Xeon system -(again using the '-j 4' option). diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 983c9b2e9..6eb9b1844 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,3 +1,11 @@ +XXX. X, 2006: m5_2.0 +-------------------- +Major update to M5 including: +- New CPU model +- Sew memory system +- More extensive python integration +- Preliminary syscall emulation support for MIPS and SPARC + Oct. 8, 2005: m5_1.1 -------------------- Update release for IOSCA workshop mini-tutorial. New features include: diff --git a/SConstruct b/SConstruct index 0cf15b1f9..b18fe66d3 100644 --- a/SConstruct +++ b/SConstruct @@ -39,17 +39,20 @@ # # You can build M5 in a different directory as long as there is a # 'build/<CONFIG>' somewhere along the target path. The build system -# expdects that all configs under the same build directory are being +# expects that all configs under the same build directory are being # built for the same host system. # # Examples: -# These two commands are equivalent. The '-u' option tells scons to -# search up the directory tree for this SConstruct file. +# +# The following two commands are equivalent. The '-u' option tells +# scons to search up the directory tree for this SConstruct file. # % cd <path-to-src>/m5 ; scons build/ALPHA_FS/m5.debug # % cd <path-to-src>/m5/build/ALPHA_FS; scons -u m5.debug -# These two commands are equivalent and demonstrate building in a -# directory outside of the source tree. The '-C' option tells scons -# to chdir to the specified directory to find this SConstruct file. +# +# The following two commands are equivalent and demonstrate building +# in a directory outside of the source tree. The '-C' option tells +# scons to chdir to the specified directory to find this SConstruct +# file. # % cd <path-to-src>/m5 ; scons /local/foo/build/ALPHA_FS/m5.debug # % cd /local/foo/build/ALPHA_FS; scons -C <path-to-src>/m5 m5.debug # @@ -156,7 +159,7 @@ env = Environment(ENV = os.environ, # inherit user's environment vars ROOT = ROOT, SRCDIR = SRCDIR) -env.SConsignFile("sconsign") +env.SConsignFile(os.path.join(build_root,"sconsign")) # Default duplicate option is to use hard links, but this messes up # when you use emacs to edit a file in the target dir, as emacs moves @@ -260,8 +263,8 @@ env['ALL_ISA_LIST'] = ['alpha', 'sparc', 'mips'] # Define the universe of supported CPU models env['ALL_CPU_LIST'] = ['AtomicSimpleCPU', 'TimingSimpleCPU', - 'FullCPU', 'AlphaFullCPU', - 'OzoneSimpleCPU', 'OzoneCPU', 'CheckerCPU'] + 'FullCPU', 'O3CPU', + 'OzoneCPU'] # Sticky options get saved in the options file so they persist from # one invocation to the next (unless overridden, in which case the new @@ -289,6 +292,7 @@ sticky_opts.AddOptions( False), BoolOption('USE_MYSQL', 'Use MySQL for stats output', have_mysql), BoolOption('USE_FENV', 'Use <fenv.h> IEEE mode control', have_fenv), + BoolOption('USE_CHECKER', 'Use checker for detailed CPU models', False), ('CC', 'C compiler', os.environ.get('CC', env['CC'])), ('CXX', 'C++ compiler', os.environ.get('CXX', env['CXX'])), BoolOption('BATCH', 'Use batch pool for build and tests', False), @@ -301,9 +305,10 @@ nonsticky_opts.AddOptions( BoolOption('update_ref', 'Update test reference outputs', False) ) -# These options get exported to #defines in config/*.hh (see m5/SConscript). +# These options get exported to #defines in config/*.hh (see src/SConscript). env.ExportOptions = ['FULL_SYSTEM', 'ALPHA_TLASER', 'USE_FENV', \ - 'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP'] + 'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP', \ + 'USE_CHECKER'] # Define a handy 'no-op' action def no_action(target, source, env): @@ -482,7 +487,7 @@ for build_path in build_paths: if env['USE_SSE2']: env.Append(CCFLAGS='-msse2') - # The m5/SConscript file sets up the build rules in 'env' according + # The src/SConscript file sets up the build rules in 'env' according # to the configured options. It returns a list of environments, # one for each variant build (debug, opt, etc.) envList = SConscript('src/SConscript', build_dir = build_path, diff --git a/configs/test/SysPaths.py b/configs/test/SysPaths.py index c7c7db4e7..3f96a546f 100644 --- a/configs/test/SysPaths.py +++ b/configs/test/SysPaths.py @@ -1,32 +1,40 @@ -from m5 import * - -import os.path -import sys - -# Edit the following list to include the possible paths to the binary -# and disk image directories. The first directory on the list that -# exists will be selected. -SYSTEMDIR_PATH = ['/n/poolfs/z/dist/m5/system'] - -SYSTEMDIR = None -for d in SYSTEMDIR_PATH: - if os.path.exists(d): - SYSTEMDIR = d - break - -if not SYSTEMDIR: - print >>sys.stderr, "Can't find a path to system files." - sys.exit(1) - -BINDIR = SYSTEMDIR + '/binaries' -DISKDIR = SYSTEMDIR + '/disks' +import os, sys +from os.path import isdir, join as joinpath +from os import environ as env def disk(file): - return os.path.join(DISKDIR, file) + system() + return joinpath(disk.dir, file) def binary(file): - return os.path.join(BINDIR, file) + system() + return joinpath(binary.dir, file) def script(file): - return os.path.join(SYSTEMDIR, 'boot', file) - + system() + return joinpath(script.dir, file) + +def system(): + if not system.dir: + try: + path = env['M5_PATH'].split(':') + except KeyError: + path = [ '/dist/m5/system', '/n/poolfs/z/dist/m5/system' ] + + for system.dir in path: + if os.path.isdir(system.dir): + break + else: + raise ImportError, "Can't find a path to system files." + + if not binary.dir: + binary.dir = joinpath(system.dir, 'binaries') + if not disk.dir: + disk.dir = joinpath(system.dir, 'disks') + if not script.dir: + script.dir = joinpath(system.dir, 'boot') + +system.dir = None +binary.dir = None +disk.dir = None +script.dir = None diff --git a/configs/test/fs.py b/configs/test/fs.py index 55e7003a4..41c3f8cc0 100644 --- a/configs/test/fs.py +++ b/configs/test/fs.py @@ -1,9 +1,10 @@ +import optparse, os, sys + import m5 from m5.objects import * -import os from SysPaths import * -parser = optparse.OptionParser(option_list=m5.standardOptions) +parser = optparse.OptionParser() parser.add_option("-t", "--timing", action="store_true") @@ -16,6 +17,8 @@ if args: # Base for tests is directory containing this file. test_base = os.path.dirname(__file__) +script.dir = '/z/saidi/work/m5.newmem/configs/boot' + linux_image = env.get('LINUX_IMAGE', disk('linux-latest.img')) class IdeControllerPciData(PciConfigData): @@ -97,7 +100,7 @@ class SpecwebFilesetDisk(IdeDisk): class BaseTsunami(Tsunami): cchip = TsunamiCChip(pio_addr=0x801a0000000) pchip = TsunamiPChip(pio_addr=0x80180000000) - pciconfig = PciConfigAll(pio_addr=0x801fe000000) + pciconfig = PciConfigAll() fake_sm_chip = IsaFake(pio_addr=0x801fc000370) fake_uart1 = IsaFake(pio_addr=0x801fc0002f8) @@ -129,17 +132,7 @@ class BaseTsunami(Tsunami): ethernet = NSGigE(configdata=NSGigEPciData(), pci_bus=0, pci_dev=1, pci_func=0) etherint = NSGigEInt(device=Parent.ethernet) -# ethernet = Sinic(configdata=SinicPciData(), -# pci_bus=0, pci_dev=1, pci_func=0) -# etherint = SinicInt(device=Parent.ethernet) console = AlphaConsole(pio_addr=0x80200000000, disk=Parent.simple_disk) -# bridge = PciFake(configdata=BridgePciData(), pci_bus=0, pci_dev=2, pci_func=0) - -#class FreeBSDTsunami(BaseTsunami): -# disk0 = FreeBSDRootDisk(delay='0us', driveID='master') -# ide = IdeController(disks=[Parent.disk0], -# configdata=IdeControllerPciData(), -# pci_func=0, pci_dev=0, pci_bus=0) class LinuxTsunami(BaseTsunami): disk0 = LinuxRootDisk(driveID='master') @@ -149,56 +142,62 @@ class LinuxTsunami(BaseTsunami): configdata=IdeControllerPciData(), pci_func=0, pci_dev=0, pci_bus=0) -class LinuxAlphaSystem(LinuxAlphaSystem): +class MyLinuxAlphaSystem(LinuxAlphaSystem): magicbus = Bus(bus_id=0) magicbus2 = Bus(bus_id=1) bridge = Bridge() physmem = PhysicalMemory(range = AddrRange('128MB')) - c0a = Connector(side_a=Parent.magicbus, side_b=Parent.bridge, side_b_name="side_a") - c0b = Connector(side_a=Parent.magicbus2, side_b=Parent.bridge, side_b_name="side_b") - c1 = Connector(side_a=Parent.physmem, side_b=Parent.magicbus2) + bridge.side_a = magicbus.port + bridge.side_b = magicbus2.port + physmem.port = magicbus2.port tsunami = LinuxTsunami() - c2 = Connector(side_a=Parent.tsunami.cchip, side_a_name='pio', side_b=Parent.magicbus) - c3 = Connector(side_a=Parent.tsunami.pchip, side_a_name='pio', side_b=Parent.magicbus) - c4 = Connector(side_a=Parent.tsunami.pciconfig, side_a_name='pio', side_b=Parent.magicbus) - c5 = Connector(side_a=Parent.tsunami.fake_sm_chip, side_a_name='pio', side_b=Parent.magicbus) - c6 = Connector(side_a=Parent.tsunami.ethernet, side_a_name='pio', side_b=Parent.magicbus) - c6a = Connector(side_a=Parent.tsunami.ethernet, side_a_name='dma', side_b=Parent.magicbus) - c7 = Connector(side_a=Parent.tsunami.fake_uart1, side_a_name='pio', side_b=Parent.magicbus) - c8 = Connector(side_a=Parent.tsunami.fake_uart2, side_a_name='pio', side_b=Parent.magicbus) - c9 = Connector(side_a=Parent.tsunami.fake_uart3, side_a_name='pio', side_b=Parent.magicbus) - c10 = Connector(side_a=Parent.tsunami.fake_uart4, side_a_name='pio', side_b=Parent.magicbus) - c11 = Connector(side_a=Parent.tsunami.ide, side_a_name='pio', side_b=Parent.magicbus) - c13 = Connector(side_a=Parent.tsunami.ide, side_a_name='dma', side_b=Parent.magicbus) - c12 = Connector(side_a=Parent.tsunami.fake_ppc, side_a_name='pio', side_b=Parent.magicbus) - c14 = Connector(side_a=Parent.tsunami.fake_OROM, side_a_name='pio', side_b=Parent.magicbus) - c16 = Connector(side_a=Parent.tsunami.fake_pnp_addr, side_a_name='pio', side_b=Parent.magicbus) - c17 = Connector(side_a=Parent.tsunami.fake_pnp_write, side_a_name='pio', side_b=Parent.magicbus) - c18 = Connector(side_a=Parent.tsunami.fake_pnp_read0, side_a_name='pio', side_b=Parent.magicbus) - c19 = Connector(side_a=Parent.tsunami.fake_pnp_read1, side_a_name='pio', side_b=Parent.magicbus) - c20 = Connector(side_a=Parent.tsunami.fake_pnp_read2, side_a_name='pio', side_b=Parent.magicbus) - c21 = Connector(side_a=Parent.tsunami.fake_pnp_read3, side_a_name='pio', side_b=Parent.magicbus) - c22 = Connector(side_a=Parent.tsunami.fake_pnp_read4, side_a_name='pio', side_b=Parent.magicbus) - c23 = Connector(side_a=Parent.tsunami.fake_pnp_read5, side_a_name='pio', side_b=Parent.magicbus) - c24 = Connector(side_a=Parent.tsunami.fake_pnp_read6, side_a_name='pio', side_b=Parent.magicbus) - c25 = Connector(side_a=Parent.tsunami.fake_pnp_read7, side_a_name='pio', side_b=Parent.magicbus) - c27 = Connector(side_a=Parent.tsunami.fake_ata0, side_a_name='pio', side_b=Parent.magicbus) - c28 = Connector(side_a=Parent.tsunami.fake_ata1, side_a_name='pio', side_b=Parent.magicbus) - c30 = Connector(side_a=Parent.tsunami.fb, side_a_name='pio', side_b=Parent.magicbus) - c31 = Connector(side_a=Parent.tsunami.io, side_a_name='pio', side_b=Parent.magicbus) - c32 = Connector(side_a=Parent.tsunami.uart, side_a_name='pio', side_b=Parent.magicbus) - c33 = Connector(side_a=Parent.tsunami.console, side_a_name='pio', side_b=Parent.magicbus) + tsunami.cchip.pio = magicbus.port + tsunami.pchip.pio = magicbus.port + tsunami.pciconfig.pio = magicbus.default + tsunami.fake_sm_chip.pio = magicbus.port + tsunami.ethernet.pio = magicbus.port + tsunami.ethernet.dma = magicbus.port + tsunami.ethernet.config = magicbus.port + tsunami.fake_uart1.pio = magicbus.port + tsunami.fake_uart2.pio = magicbus.port + tsunami.fake_uart3.pio = magicbus.port + tsunami.fake_uart4.pio = magicbus.port + tsunami.ide.pio = magicbus.port + tsunami.ide.dma = magicbus.port + tsunami.ide.config = magicbus.port + tsunami.fake_ppc.pio = magicbus.port + tsunami.fake_OROM.pio = magicbus.port + tsunami.fake_pnp_addr.pio = magicbus.port + tsunami.fake_pnp_write.pio = magicbus.port + tsunami.fake_pnp_read0.pio = magicbus.port + tsunami.fake_pnp_read1.pio = magicbus.port + tsunami.fake_pnp_read2.pio = magicbus.port + tsunami.fake_pnp_read3.pio = magicbus.port + tsunami.fake_pnp_read4.pio = magicbus.port + tsunami.fake_pnp_read5.pio = magicbus.port + tsunami.fake_pnp_read6.pio = magicbus.port + tsunami.fake_pnp_read7.pio = magicbus.port + tsunami.fake_ata0.pio = magicbus.port + tsunami.fake_ata1.pio = magicbus.port + tsunami.fb.pio = magicbus.port + tsunami.io.pio = magicbus.port + tsunami.uart.pio = magicbus.port + tsunami.console.pio = magicbus.port raw_image = RawDiskImage(image_file=disk('linux-latest.img'), read_only=True) simple_disk = SimpleDisk(disk=Parent.raw_image) intrctrl = IntrControl() if options.timing: cpu = TimingSimpleCPU() + mem_mode = 'timing' else: cpu = AtomicSimpleCPU() - cpu.mem = Parent.magicbus2 + cpu.mem = magicbus2 + cpu.icache_port = magicbus2.port + cpu.dcache_port = magicbus2.port cpu.itb = AlphaITB() cpu.dtb = AlphaDTB() + cpu.clock = '2GHz' sim_console = SimConsole(listener=ConsoleListener(port=3456)) kernel = binary('vmlinux') pal = binary('ts_osfpal') @@ -221,14 +220,23 @@ def DualRoot(clientSystem, serverSystem): self.etherlink = EtherLink(int1 = Parent.client.tsunami.etherint[0], int2 = Parent.server.tsunami.etherint[0], dump = Parent.etherdump) - self.clock = '5GHz' + self.clock = '1THz' return self -root = DualRoot(LinuxAlphaSystem(readfile=script('netperf-stream-nt-client.rcS')), - LinuxAlphaSystem(readfile=script('netperf-server.rcS'))) +root = DualRoot( + MyLinuxAlphaSystem(readfile=script('netperf-stream-nt-client.rcS')), + MyLinuxAlphaSystem(readfile=script('netperf-server.rcS'))) m5.instantiate(root) +#exit_event = m5.simulate(2600000000000) +#if exit_event.getCause() != "user interrupt received": +# m5.checkpoint(root, 'cpt') +# exit_event = m5.simulate(300000000000) +# if exit_event.getCause() != "user interrupt received": +# m5.checkpoint(root, 'cptA') + + exit_event = m5.simulate() -print 'Exiting @', m5.curTick(), 'because', exit_event.getCause() +print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() diff --git a/configs/test/test.py b/configs/test/test.py index 8c5b06e6a..feb44e2d1 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -4,14 +4,27 @@ # MIPS: "m5 test.py -a Mips -c hello_mips" import os, optparse, sys + import m5 from m5.objects import * +from FullO3Config import * # parse command-line arguments -parser = optparse.OptionParser(option_list=m5.standardOptions) +parser = optparse.OptionParser() -parser.add_option("-c", "--cmd", default="hello") -parser.add_option("-t", "--timing", action="store_true") +parser.add_option("-c", "--cmd", default="hello", + help="The binary to run in syscall emulation mode.") +parser.add_option("-o", "--options", default="", + help="The options to pass to the binary, use \" \" around the entire\ + string.") +parser.add_option("-i", "--input", default="", + help="A file of input to give to the binary.") +parser.add_option("-t", "--timing", action="store_true", + help="Use simple timing CPU.") +parser.add_option("-d", "--detailed", action="store_true", + help="Use detailed CPU.") +parser.add_option("-m", "--maxtick", type="int", + help="Set the maximum number of ticks to run for") (options, args) = parser.parse_args() @@ -24,27 +37,64 @@ this_dir = os.path.dirname(__file__) process = LiveProcess() process.executable = os.path.join(this_dir, options.cmd) -process.cmd = options.cmd +process.cmd = options.cmd + " " + options.options +if options.input != "": + process.input = options.input magicbus = Bus() mem = PhysicalMemory() +if options.timing and options.detailed: + print "Error: you may only specify one cpu model"; + sys.exit(1) + if options.timing: cpu = TimingSimpleCPU() +elif options.detailed: + #check for SMT workload + workloads = options.cmd.split(';') + if len(workloads) > 1: + process = [] + smt_idx = 0 + inputs = [] + + if options.input != "": + inputs = options.input.split(';') + + for wrkld in workloads: + smt_process = LiveProcess() + smt_process.executable = os.path.join(this_dir, wrkld) + smt_process.cmd = wrkld + " " + options.options + if inputs and inputs[smt_idx]: + smt_process.input = inputs[smt_idx] + process += [smt_process, ] + smt_idx += 1 + + cpu = DetailedO3CPU() else: cpu = AtomicSimpleCPU() cpu.workload = process cpu.mem = magicbus +cpu.icache_port=magicbus.port +cpu.dcache_port=magicbus.port system = System(physmem = mem, cpu = cpu) -system.c1 = Connector(side_a = mem, side_b = magicbus) + +if options.timing or options.detailed: + system.mem_mode = 'timing' + + +mem.port = magicbus.port root = Root(system = system) # instantiate configuration m5.instantiate(root) # simulate until program terminates -exit_event = m5.simulate() +if options.maxtick: + exit_event = m5.simulate(options.maxtick) +else: + exit_event = m5.simulate() -print 'Exiting @', m5.curTick(), 'because', exit_event.getCause() +print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() diff --git a/src/SConscript b/src/SConscript index a1c18711c..9825cafe7 100644 --- a/src/SConscript +++ b/src/SConscript @@ -62,7 +62,6 @@ base_sources = Split(''' base/range.cc base/random.cc base/sat_counter.cc - base/serializer.cc base/socket.cc base/statistics.cc base/str.cc @@ -85,33 +84,54 @@ base_sources = Split(''' cpu/base.cc cpu/cpuevent.cc cpu/exetrace.cc + cpu/func_unit.cc cpu/op_class.cc cpu/pc_event.cc cpu/quiesce_event.cc cpu/static_inst.cc - cpu/sampler/sampler.cc cpu/simple_thread.cc cpu/thread_state.cc - encumbered/cpu/full/fu_pool.cc - mem/bridge.cc mem/bus.cc - mem/connector.cc mem/mem_object.cc mem/packet.cc mem/physical.cc mem/port.cc + mem/cache/base_cache.cc + mem/cache/cache.cc + mem/cache/coherence/coherence_protocol.cc + mem/cache/coherence/uni_coherence.cc + mem/cache/miss/blocking_buffer.cc + mem/cache/miss/miss_queue.cc + mem/cache/miss/mshr.cc + mem/cache/miss/mshr_queue.cc + mem/cache/prefetch/base_prefetcher.cc + mem/cache/prefetch/ghb_prefetcher.cc + mem/cache/prefetch/prefetcher.cc + mem/cache/prefetch/stride_prefetcher.cc + mem/cache/prefetch/tagged_prefetcher.cc + mem/cache/tags/base_tags.cc + mem/cache/tags/cache_tags.cc + mem/cache/tags/fa_lru.cc + mem/cache/tags/iic.cc + mem/cache/tags/lru.cc + mem/cache/tags/repl/gen.cc + mem/cache/tags/repl/repl.cc + mem/cache/tags/split.cc + mem/cache/tags/split_lifo.cc + mem/cache/tags/split_lru.cc + + mem/cache/cache_builder.cc + sim/builder.cc - sim/configfile.cc sim/debug.cc sim/eventq.cc sim/faults.cc sim/main.cc - python/swig/main_wrap.cc + python/swig/cc_main_wrap.cc sim/param.cc - sim/profile.cc sim/root.cc sim/serialize.cc sim/sim_events.cc diff --git a/src/arch/SConscript b/src/arch/SConscript index c90694a68..bc517341a 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -28,7 +28,7 @@ # # Authors: Steve Reinhardt -import os.path +import os.path, sys # Import build environment variable from SConstruct. Import('env') @@ -128,13 +128,19 @@ isa_desc_gen_files = Split('decoder.cc decoder.hh') isa_desc_gen_files += [CpuModel.dict[cpu].filename for cpu in env['CPU_MODELS']] +# Also include the CheckerCPU as one of the models if it is being +# enabled via command line. +if env['USE_CHECKER']: + isa_desc_gen_files += [CpuModel.dict['CheckerCPU'].filename] + # The emitter patches up the sources & targets to include the # autogenerated files as targets and isa parser itself as a source. def isa_desc_emitter(target, source, env): return (isa_desc_gen_files, [isa_parser, cpu_models_file] + source) # Pieces are in place, so create the builder. -isa_desc_builder = Builder(action='python2.4 $SOURCES $TARGET.dir $CPU_MODELS', +python = sys.executable # use same Python binary used to run scons +isa_desc_builder = Builder(action=python + ' $SOURCES $TARGET.dir $CPU_MODELS', emitter = isa_desc_emitter) env.Append(BUILDERS = { 'ISADesc' : isa_desc_builder }) diff --git a/src/arch/alpha/ev5.cc b/src/arch/alpha/ev5.cc index 7d6894733..ae3b668ea 100644 --- a/src/arch/alpha/ev5.cc +++ b/src/arch/alpha/ev5.cc @@ -59,8 +59,12 @@ AlphaISA::initCPU(ThreadContext *tc, int cpuId) tc->setIntReg(16, cpuId); tc->setIntReg(0, cpuId); - tc->setPC(tc->readMiscReg(IPR_PAL_BASE) + (new ResetFault)->vect()); + AlphaFault *reset = new ResetFault; + + tc->setPC(tc->readMiscReg(IPR_PAL_BASE) + reset->vect()); tc->setNextPC(tc->readPC() + sizeof(MachInst)); + + delete reset; } //////////////////////////////////////////////////////////////////////// diff --git a/src/arch/alpha/faults.cc b/src/arch/alpha/faults.cc index 8493223ff..eef4361fd 100644 --- a/src/arch/alpha/faults.cc +++ b/src/arch/alpha/faults.cc @@ -35,6 +35,9 @@ #include "base/trace.hh" #if FULL_SYSTEM #include "arch/alpha/ev5.hh" +#else +#include "sim/process.hh" +#include "mem/page_table.hh" #endif namespace AlphaISA @@ -56,6 +59,12 @@ FaultName ArithmeticFault::_name = "arith"; FaultVect ArithmeticFault::_vect = 0x0501; FaultStat ArithmeticFault::_count; +#if !FULL_SYSTEM +FaultName PageTableFault::_name = "page_table_fault"; +FaultVect PageTableFault::_vect = 0x0000; +FaultStat PageTableFault::_count; +#endif + FaultName InterruptFault::_name = "interrupt"; FaultVect InterruptFault::_vect = 0x0101; FaultStat InterruptFault::_count; @@ -173,6 +182,30 @@ void ItbFault::invoke(ThreadContext * tc) AlphaFault::invoke(tc); } +#else //!FULL_SYSTEM + +void PageTableFault::invoke(ThreadContext *tc) +{ + Process *p = tc->getProcessPtr(); + + // address is higher than the stack region or in the current stack region + if (vaddr > p->stack_base || vaddr > p->stack_min) + FaultBase::invoke(tc); + + // We've accessed the next page + if (vaddr > p->stack_min - PageBytes) { + warn("Increasing stack %#x:%#x to %#x:%#x because of access to %#x", + p->stack_min, p->stack_base, p->stack_min - PageBytes, + p->stack_base, vaddr); + p->stack_min -= PageBytes; + if (p->stack_base - p->stack_min > 8*1024*1024) + fatal("Over max stack size for one thread\n"); + p->pTable->allocate(p->stack_min, PageBytes); + } else { + FaultBase::invoke(tc); + } +} + #endif } // namespace AlphaISA diff --git a/src/arch/alpha/faults.hh b/src/arch/alpha/faults.hh index f952cf9d6..11a568174 100644 --- a/src/arch/alpha/faults.hh +++ b/src/arch/alpha/faults.hh @@ -81,6 +81,29 @@ class AlignmentFault : public AlphaFault bool isAlignmentFault() {return true;} }; +#if !FULL_SYSTEM +class PageTableFault : public AlphaFault +{ + private: + Addr vaddr; + static FaultName _name; + static FaultVect _vect; + static FaultStat _count; + public: + PageTableFault(Addr va) + : vaddr(va) {} + FaultName name() {return _name;} + FaultVect vect() {return _vect;} + FaultStat & countStat() {return _count;} + void invoke(ThreadContext * tc); +}; + +static inline Fault genPageTableFault(Addr va) +{ + return new PageTableFault(va); +} +#endif + static inline Fault genMachineCheckFault() { return new MachineCheckFault; diff --git a/src/arch/alpha/freebsd/system.cc b/src/arch/alpha/freebsd/system.cc index 7cf68e0db..8d50e1612 100644 --- a/src/arch/alpha/freebsd/system.cc +++ b/src/arch/alpha/freebsd/system.cc @@ -97,6 +97,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(FreebsdAlphaSystem) Param<Tick> boot_cpu_frequency; SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; Param<string> kernel; Param<string> console; @@ -115,6 +116,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(FreebsdAlphaSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -133,6 +136,7 @@ CREATE_SIM_OBJECT(FreebsdAlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa index fbdb119b6..f449d2d69 100644 --- a/src/arch/alpha/isa/decoder.isa +++ b/src/arch/alpha/isa/decoder.isa @@ -47,9 +47,11 @@ decode OPCODE default Unknown::unknown() { 0x23: ldt({{ Fa = Mem.df; }}); 0x2a: ldl_l({{ Ra.sl = Mem.sl; }}, mem_flags = LOCKED); 0x2b: ldq_l({{ Ra.uq = Mem.uq; }}, mem_flags = LOCKED); +#ifdef USE_COPY 0x20: MiscPrefetch::copy_load({{ EA = Ra; }}, {{ fault = xc->copySrcTranslate(EA); }}, inst_flags = [IsMemRef, IsLoad, IsCopy]); +#endif } format LoadOrPrefetch { @@ -69,9 +71,11 @@ decode OPCODE default Unknown::unknown() { 0x0f: stq_u({{ Mem.uq = Ra.uq; }}, {{ EA = (Rb + disp) & ~7; }}); 0x26: sts({{ Mem.ul = t_to_s(Fa.uq); }}); 0x27: stt({{ Mem.df = Fa; }}); +#ifdef USE_COPY 0x24: MiscPrefetch::copy_store({{ EA = Rb; }}, {{ fault = xc->copy(EA); }}, inst_flags = [IsMemRef, IsStore, IsCopy]); +#endif } format StoreCond { @@ -659,11 +663,11 @@ decode OPCODE default Unknown::unknown() { 0xe000: rc({{ Ra = xc->readIntrFlag(); xc->setIntrFlag(0); - }}, IsNonSpeculative); + }}, IsNonSpeculative, IsUnverifiable); 0xf000: rs({{ Ra = xc->readIntrFlag(); xc->setIntrFlag(1); - }}, IsNonSpeculative); + }}, IsNonSpeculative, IsUnverifiable); } #else format FailUnimpl { @@ -701,7 +705,7 @@ decode OPCODE default Unknown::unknown() { }}, IsNonSpeculative); 0x83: callsys({{ xc->syscall(R0); - }}, IsNonSpeculative); + }}, IsSerializeAfter, IsNonSpeculative); // Read uniq reg into ABI return value register (r0) 0x9e: rduniq({{ R0 = Runiq; }}, IsIprAccess); // Write uniq reg with value from ABI arg register (r16) diff --git a/src/arch/alpha/linux/system.cc b/src/arch/alpha/linux/system.cc index bb35f046d..ef4e18cb5 100644 --- a/src/arch/alpha/linux/system.cc +++ b/src/arch/alpha/linux/system.cc @@ -150,9 +150,6 @@ LinuxAlphaSystem::~LinuxAlphaSystem() delete debugPrintkEvent; delete idleStartEvent; delete printThreadEvent; - delete intStartEvent; - delete intEndEvent; - delete intEndEvent2; } @@ -194,6 +191,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(LinuxAlphaSystem) Param<Tick> boot_cpu_frequency; SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; Param<string> kernel; Param<string> console; @@ -212,6 +210,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(LinuxAlphaSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -230,6 +230,7 @@ CREATE_SIM_OBJECT(LinuxAlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/alpha/regfile.hh b/src/arch/alpha/regfile.hh index 1025412cd..9ecad6f42 100644 --- a/src/arch/alpha/regfile.hh +++ b/src/arch/alpha/regfile.hh @@ -112,6 +112,10 @@ namespace AlphaISA lock_flag = 0; lock_addr = 0; } + + void serialize(std::ostream &os); + + void unserialize(Checkpoint *cp, const std::string §ion); #if FULL_SYSTEM protected: typedef uint64_t InternalProcReg; diff --git a/src/arch/alpha/system.cc b/src/arch/alpha/system.cc index 3aaba7d58..a7e615531 100644 --- a/src/arch/alpha/system.cc +++ b/src/arch/alpha/system.cc @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Ali Saidi + * Nathan Binkert */ #include "arch/alpha/ev5.hh" @@ -220,6 +221,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaSystem) Param<Tick> boot_cpu_frequency; SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; Param<std::string> kernel; Param<std::string> console; @@ -238,6 +240,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -256,6 +260,7 @@ CREATE_SIM_OBJECT(AlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/alpha/system.hh b/src/arch/alpha/system.hh index b26a5e301..0f4f64581 100644 --- a/src/arch/alpha/system.hh +++ b/src/arch/alpha/system.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Ali Saidi + * Nathan Binkert */ #ifndef __ARCH_ALPHA_SYSTEM_HH__ diff --git a/src/arch/alpha/tru64/system.cc b/src/arch/alpha/tru64/system.cc index 6c0edc1ee..3ef1e4d3c 100644 --- a/src/arch/alpha/tru64/system.cc +++ b/src/arch/alpha/tru64/system.cc @@ -95,6 +95,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Tru64AlphaSystem) Param<Tick> boot_cpu_frequency; SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; Param<string> kernel; Param<string> console; @@ -113,6 +114,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Tru64AlphaSystem) INIT_PARAM(boot_cpu_frequency, "frequency of the boot cpu"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -131,6 +134,7 @@ CREATE_SIM_OBJECT(Tru64AlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/mips/SConscript b/src/arch/mips/SConscript index 6295a6c11..8353bcde7 100644 --- a/src/arch/mips/SConscript +++ b/src/arch/mips/SConscript @@ -52,8 +52,7 @@ base_sources = Split(''' # Full-system sources full_system_sources = Split(''' - memory.cc - mips34k.cc + #Insert Full-System Files Here ''') # Syscall emulation (non-full-system) sources diff --git a/src/arch/mips/faults.cc b/src/arch/mips/faults.cc index 810c3fed4..cfeb045eb 100644 --- a/src/arch/mips/faults.cc +++ b/src/arch/mips/faults.cc @@ -32,6 +32,10 @@ #include "cpu/thread_context.hh" #include "cpu/base.hh" #include "base/trace.hh" +#if !FULL_SYSTEM +#include "sim/process.hh" +#include "mem/page_table.hh" +#endif namespace MipsISA { @@ -52,6 +56,12 @@ FaultName ArithmeticFault::_name = "arith"; FaultVect ArithmeticFault::_vect = 0x0501; FaultStat ArithmeticFault::_count; +#if !FULL_SYSTEM +FaultName PageTableFault::_name = "page_table_fault"; +FaultVect PageTableFault::_vect = 0x0000; +FaultStat PageTableFault::_count; +#endif + FaultName InterruptFault::_name = "interrupt"; FaultVect InterruptFault::_vect = 0x0101; FaultStat InterruptFault::_count; @@ -127,7 +137,28 @@ void ArithmeticFault::invoke(ThreadContext * tc) panic("Arithmetic traps are unimplemented!"); } -#endif +#else //!FULL_SYSTEM +void PageTableFault::invoke(ThreadContext *tc) +{ + Process *p = tc->getProcessPtr(); + + // address is higher than the stack region or in the current stack region + if (vaddr > p->stack_base || vaddr > p->stack_min) + FaultBase::invoke(tc); + + // We've accessed the next page + if (vaddr > p->stack_min - PageBytes) { + p->stack_min -= PageBytes; + if (p->stack_base - p->stack_min > 8*1024*1024) + fatal("Over max stack size for one thread\n"); + p->pTable->allocate(p->stack_min, PageBytes); + warn("Increasing stack size by one page."); + } else { + FaultBase::invoke(tc); + } +} + +#endif } // namespace MipsISA diff --git a/src/arch/mips/faults.hh b/src/arch/mips/faults.hh index d8bf59cc1..95c61cfbc 100644 --- a/src/arch/mips/faults.hh +++ b/src/arch/mips/faults.hh @@ -79,6 +79,30 @@ class AlignmentFault : public MipsFault bool isAlignmentFault() {return true;} }; +#if !FULL_SYSTEM +class PageTableFault : public MipsFault +{ + private: + Addr vaddr; + static FaultName _name; + static FaultVect _vect; + static FaultStat _count; + public: + PageTableFault(Addr va) + : vaddr(va) {} + FaultName name() {return _name;} + FaultVect vect() {return _vect;} + FaultStat & countStat() {return _count;} + void invoke(ThreadContext * tc); +}; + +static inline Fault genPageTableFault(Addr va) +{ + return new PageTableFault(va); +} +#endif + + static inline Fault genMachineCheckFault() { return new MachineCheckFault; diff --git a/src/arch/mips/isa/base.isa b/src/arch/mips/isa/base.isa index b733da7da..f07b06e03 100644 --- a/src/arch/mips/isa/base.isa +++ b/src/arch/mips/isa/base.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -38,7 +38,6 @@ output header {{ using namespace MipsISA; - /** * Base class for all MIPS static instructions. */ diff --git a/src/arch/mips/isa/bitfields.isa b/src/arch/mips/isa/bitfields.isa index e8d4578c7..35815bf1f 100644 --- a/src/arch/mips/isa/bitfields.isa +++ b/src/arch/mips/isa/bitfields.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa index a64f74c4f..9ac982e34 100644 --- a/src/arch/mips/isa/decoder.isa +++ b/src/arch/mips/isa/decoder.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -158,14 +158,16 @@ decode OPCODE_HI default Unknown::unknown() { } format HiLoMiscOp { - 0x2: div({{ - HI = Rs.sd % Rt.sd; - LO = Rs.sd / Rt.sd; - }}); - 0x3: divu({{ - HI = Rs.ud % Rt.ud; - LO = Rs.ud / Rt.ud; - }}); + 0x2: div({{ if (Rt.sd != 0) { + HI = Rs.sd % Rt.sd; + LO = Rs.sd / Rt.sd; + } + }}); + 0x3: divu({{ if (Rt.ud != 0) { + HI = Rs.ud % Rt.ud; + LO = Rs.ud / Rt.ud; + } + }}); } } @@ -333,7 +335,7 @@ decode OPCODE_HI default Unknown::unknown() { 0x0: decode RS_HI { 0x0: decode RS_LO { format CP1Control { - 0x0: mfc1 ({{ Rt.uw = Fs.uw<31:0>; }}); + 0x0: mfc1 ({{ Rt.uw = Fs.uw; }}); 0x2: cfc1({{ switch (FS) @@ -438,9 +440,10 @@ decode OPCODE_HI default Unknown::unknown() { 0x3: div_s({{ Fd.sf = Fs.sf / Ft.sf;}}); 0x4: sqrt_s({{ Fd.sf = sqrt(Fs.sf);}}); 0x5: abs_s({{ Fd.sf = fabs(Fs.sf);}}); - 0x6: mov_s({{ Fd.sf = Fs.sf;}}); 0x7: neg_s({{ Fd.sf = -Fs.sf;}}); } + + 0x6: BasicOp::mov_s({{ Fd.sf = Fs.sf;}}); } 0x1: decode FUNCTION_LO { @@ -549,9 +552,10 @@ decode OPCODE_HI default Unknown::unknown() { 0x3: div_d({{ Fd.df = Fs.df / Ft.df; }}); 0x4: sqrt_d({{ Fd.df = sqrt(Fs.df); }}); 0x5: abs_d({{ Fd.df = fabs(Fs.df); }}); - 0x6: mov_d({{ Fd.df = Fs.df; }}); 0x7: neg_d({{ Fd.df = -1 * Fs.df; }}); } + + 0x6: BasicOp::mov_d({{ Fd.df = Fs.df; }}); } 0x1: decode FUNCTION_LO { @@ -853,17 +857,19 @@ decode OPCODE_HI default Unknown::unknown() { 0x3: decode FUNCTION_HI { 0x0: decode FUNCTION_LO { format LoadIndexedMemory { - 0x0: lwxc1({{ Ft.uw = Mem.uw;}}); - 0x1: ldxc1({{ Ft.ud = Mem.ud;}}); - 0x5: luxc1({{ Ft.uw = Mem.ud;}}); + 0x0: lwxc1({{ Fd.uw = Mem.uw;}}); + 0x1: ldxc1({{ Fd.ud = Mem.ud;}}); + 0x5: luxc1({{ Fd.ud = Mem.ud;}}, + {{ EA = (Rs + Rt) & ~7; }}); } } 0x1: decode FUNCTION_LO { format StoreIndexedMemory { - 0x0: swxc1({{ Mem.uw = Ft.uw;}}); - 0x1: sdxc1({{ Mem.ud = Ft.ud;}}); - 0x5: suxc1({{ Mem.ud = Ft.ud;}}); + 0x0: swxc1({{ Mem.uw = Fs.uw;}}); + 0x1: sdxc1({{ Mem.ud = Fs.ud;}}); + 0x5: suxc1({{ Mem.ud = Fs.ud;}}, + {{ EA = (Rs + Rt) & ~7; }}); } 0x7: Prefetch::prefx({{ EA = Rs + Rt; }}); @@ -991,7 +997,7 @@ decode OPCODE_HI default Unknown::unknown() { 0x7: decode FUNCTION_HI { 0x0: decode FUNCTION_LO { format BasicOp { - 0x1: ext({{ Rt.uw = bits(Rs.uw, MSB+LSB, LSB); }}); + 0x0: ext({{ Rt.uw = bits(Rs.uw, MSB+LSB, LSB); }}); 0x4: ins({{ Rt.uw = bits(Rt.uw, 31, MSB+1) << (MSB+1) | bits(Rs.uw, MSB-LSB, 0) << LSB | bits(Rt.uw, LSB-1, 0); @@ -1014,8 +1020,8 @@ decode OPCODE_HI default Unknown::unknown() { Rt.uw<7:0> << 8 | Rt.uw<15:8>; }}); - 0x10: seb({{ Rd.sw = Rt.sw<7:0>}}); - 0x18: seh({{ Rd.sw = Rt.sw<15:0>}}); + 0x10: seb({{ Rd.sw = Rt.sb; }}); + 0x18: seh({{ Rd.sw = Rt.sh; }}); } } diff --git a/src/arch/mips/isa/formats/basic.isa b/src/arch/mips/isa/formats/basic.isa index 35ce09205..29dafd541 100644 --- a/src/arch/mips/isa/formats/basic.isa +++ b/src/arch/mips/isa/formats/basic.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/branch.isa b/src/arch/mips/isa/formats/branch.isa index 827e3ccf0..5230ce9cc 100644 --- a/src/arch/mips/isa/formats/branch.isa +++ b/src/arch/mips/isa/formats/branch.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/control.isa b/src/arch/mips/isa/formats/control.isa index 509ee7e87..6c7d396f3 100644 --- a/src/arch/mips/isa/formats/control.isa +++ b/src/arch/mips/isa/formats/control.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/fp.isa b/src/arch/mips/isa/formats/fp.isa index d05b04d0e..cdb892b3f 100644 --- a/src/arch/mips/isa/formats/fp.isa +++ b/src/arch/mips/isa/formats/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -142,10 +142,10 @@ output exec {{ cpu->setFloatRegBits(inst, 0, mips_nan, size); //Read FCSR from FloatRegFile - uint32_t fcsr_bits = cpu->tc->readFloatRegBits(FCSR); + uint32_t fcsr_bits = cpu->tcBase()->readFloatRegBits(FCSR); //Write FCSR from FloatRegFile - cpu->tc->setFloatRegBits(FCSR, genInvalidVector(fcsr_bits)); + cpu->tcBase()->setFloatRegBits(FCSR, genInvalidVector(fcsr_bits)); if (traceData) { traceData->setData(mips_nan); } return true; @@ -158,12 +158,12 @@ output exec {{ fpResetCauseBits(%(CPU_exec_context)s *cpu) { //Read FCSR from FloatRegFile - uint32_t fcsr = cpu->tc->readFloatRegBits(FCSR); + uint32_t fcsr = cpu->tcBase()->readFloatRegBits(FCSR); fcsr = bits(fcsr, 31, 18) << 18 | bits(fcsr, 11, 0); //Write FCSR from FloatRegFile - cpu->tc->setFloatRegBits(FCSR, fcsr); + cpu->tcBase()->setFloatRegBits(FCSR, fcsr); } }}; @@ -176,8 +176,9 @@ def template FloatingPointExecute {{ //When is the right time to reset cause bits? //start of every instruction or every cycle? +#if FULL_SYSTEM fpResetCauseBits(xc); - +#endif %(op_decl)s; %(op_rd)s; @@ -192,7 +193,10 @@ def template FloatingPointExecute {{ //---- //Check for IEEE 754 FP Exceptions //fault = fpNanOperands((FPOp*)this, xc, Fd, traceData); - if (!fpInvalidOp((FPOp*)this, xc, Fd, traceData) && + if ( +#if FULL_SYSTEM + !fpInvalidOp((FPOp*)this, xc, Fd, traceData) && +#endif fault == NoFault) { %(op_wb)s; diff --git a/src/arch/mips/isa/formats/int.isa b/src/arch/mips/isa/formats/int.isa index 7b5affb5c..56a4ec204 100644 --- a/src/arch/mips/isa/formats/int.isa +++ b/src/arch/mips/isa/formats/int.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -228,7 +228,7 @@ def format IntOp(code, *opt_flags) {{ iop = InstObjParams(name, Name, 'IntOp', CodeBlock(code), opt_flags) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) - decode_block = OperateNopCheckDecode.subst(iop) + decode_block = RegNopCheckDecode.subst(iop) exec_output = BasicExecute.subst(iop) }}; @@ -236,7 +236,7 @@ def format IntImmOp(code, *opt_flags) {{ iop = InstObjParams(name, Name, 'IntImmOp', CodeBlock(code), opt_flags) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) - decode_block = OperateNopCheckDecode.subst(iop) + decode_block = ImmNopCheckDecode.subst(iop) exec_output = BasicExecute.subst(iop) }}; @@ -252,7 +252,7 @@ def format HiLoOp(code, *opt_flags) {{ iop = InstObjParams(name, Name, 'HiLoOp', CodeBlock(code), opt_flags) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) - decode_block = OperateNopCheckDecode.subst(iop) + decode_block = BasicDecode.subst(iop) exec_output = HiLoExecute.subst(iop) }}; @@ -260,7 +260,7 @@ def format HiLoMiscOp(code, *opt_flags) {{ iop = InstObjParams(name, Name, 'HiLoMiscOp', CodeBlock(code), opt_flags) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) - decode_block = OperateNopCheckDecode.subst(iop) + decode_block = BasicDecode.subst(iop) exec_output = HiLoExecute.subst(iop) }}; diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa index f52247056..f03f7becd 100644 --- a/src/arch/mips/isa/formats/mem.isa +++ b/src/arch/mips/isa/formats/mem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2005 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -58,14 +58,8 @@ output header {{ StaticInstPtr _memAccPtr = nullStaticInstPtr) : MipsStaticInst(mnem, _machInst, __opClass), memAccessFlags(0), eaCompPtr(_eaCompPtr), memAccPtr(_memAccPtr), - disp(OFFSET) + disp(sext<16>(OFFSET)) { - //If Bit 15 is 1 then Sign Extend - int32_t temp = disp & 0x00008000; - - if (temp > 0) { - disp |= 0xFFFF0000; - } } std::string @@ -77,6 +71,24 @@ output header {{ const StaticInstPtr &memAccInst() const { return memAccPtr; } }; + /** + * Base class for a few miscellaneous memory-format insts + * that don't interpret the disp field + */ + class MemoryNoDisp : public Memory + { + protected: + /// Constructor + MemoryNoDisp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + StaticInstPtr _eaCompPtr = nullStaticInstPtr, + StaticInstPtr _memAccPtr = nullStaticInstPtr) + : Memory(mnem, _machInst, __opClass, _eaCompPtr, _memAccPtr) + { + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const; + }; }}; @@ -84,10 +96,18 @@ output decoder {{ std::string Memory::generateDisassembly(Addr pc, const SymbolTable *symtab) const { - return csprintf("%-10s %c%d,%d(r%d)", mnemonic, + return csprintf("%-10s %c%d, %d(r%d)", mnemonic, flags[IsFloating] ? 'f' : 'r', RT, disp, RS); } + std::string + MemoryNoDisp::generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + return csprintf("%-10s %c%d, r%d(r%d)", mnemonic, + flags[IsFloating] ? 'f' : 'r', + flags[IsFloating] ? FD : RD, + RS, RT); + } }}; def template LoadStoreDeclare {{ @@ -479,23 +499,11 @@ def template MiscCompleteAcc {{ } }}; -// load instructions use Rt as dest, so check for -// Rt == 0 to detect nops -def template LoadNopCheckDecode {{ - { - MipsStaticInst *i = new %(class_name)s(machInst); - if (RT == 0) { - i = makeNop(i); - } - return i; - } -}}; - def format LoadMemory(memacc_code, ea_code = {{ EA = Rs + disp; }}, mem_flags = [], inst_flags = []) {{ (header_output, decoder_output, decode_block, exec_output) = \ LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, - decode_template = LoadNopCheckDecode, + decode_template = ImmNopCheckDecode, exec_template_base = 'Load') }}; @@ -510,7 +518,7 @@ def format LoadIndexedMemory(memacc_code, ea_code = {{ EA = Rs + Rt; }}, mem_flags = [], inst_flags = []) {{ (header_output, decoder_output, decode_block, exec_output) = \ LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, - decode_template = LoadNopCheckDecode, + decode_template = ImmNopCheckDecode, exec_template_base = 'Load') }}; @@ -534,7 +542,7 @@ def format LoadUnalignedMemory(memacc_code, ea_code = {{ EA = (Rs + disp) & ~3; (header_output, decoder_output, decode_block, exec_output) = \ LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, - decode_template = LoadNopCheckDecode, + decode_template = ImmNopCheckDecode, exec_template_base = 'Load') }}; @@ -551,7 +559,6 @@ def format StoreUnalignedMemory(memacc_code, ea_code = {{ EA = (Rs + disp) & ~3; (header_output, decoder_output, decode_block, exec_output) = \ LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, - decode_template = LoadNopCheckDecode, exec_template_base = 'Store') }}; diff --git a/src/arch/mips/isa/formats/mt.isa b/src/arch/mips/isa/formats/mt.isa index 521b01123..96435f8c9 100644 --- a/src/arch/mips/isa/formats/mt.isa +++ b/src/arch/mips/isa/formats/mt.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,14 +35,15 @@ output header {{ /** - * Base class for integer operations. + * Base class for MIPS MT ASE operations. */ class MT : public MipsStaticInst { protected: /// Constructor - MT(const char *mnem, MachInst _machInst, OpClass __opClass) : MipsStaticInst(mnem, _machInst, __opClass) + MT(const char *mnem, MachInst _machInst, OpClass __opClass) : + MipsStaticInst(mnem, _machInst, __opClass) { } diff --git a/src/arch/mips/isa/formats/noop.isa b/src/arch/mips/isa/formats/noop.isa index 4fd8235e4..7f3d313ad 100644 --- a/src/arch/mips/isa/formats/noop.isa +++ b/src/arch/mips/isa/formats/noop.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -73,7 +73,8 @@ output decoder {{ MipsStaticInst * makeNop(MipsStaticInst *inst) { - MipsStaticInst *nop = new Nop(inst->disassemble(0), inst->machInst); + std::string nop_str = "(" + inst->disassemble(0) + ")"; + MipsStaticInst *nop = new Nop(nop_str, inst->machInst); delete inst; return nop; } @@ -87,16 +88,36 @@ output exec {{ } }}; -// integer & FP operate instructions use RT as dest, so check for -// RT == 0 to detect nops -def template OperateNopCheckDecode {{ +// Int & FP operate instructions use RD as dest, so check for +// RD == 0 to detect nops +def template RegNopCheckDecode {{ { MipsStaticInst *i = new %(class_name)s(machInst); + //if (RD == 0) { + //i = makeNop(i); + //} + return i; + } +}}; +def template OperateNopCheckDecode {{ + { + MipsStaticInst *i = new %(class_name)s(machInst); //if (RD == 0) { - // i = makeNop(i); + // i = makeNop(i); //} + return i; + } +}}; +// IntImm & Memory instructions use Rt as dest, so check for +// Rt == 0 to detect nops +def template ImmNopCheckDecode {{ + { + MipsStaticInst *i = new %(class_name)s(machInst); + //if (RT == 0) { + // i = makeNop(i); + // } return i; } }}; diff --git a/src/arch/mips/isa/formats/tlbop.isa b/src/arch/mips/isa/formats/tlbop.isa index 75ab71c48..b974ccbed 100644 --- a/src/arch/mips/isa/formats/tlbop.isa +++ b/src/arch/mips/isa/formats/tlbop.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/trap.isa b/src/arch/mips/isa/formats/trap.isa index 574b808cc..b9066f374 100644 --- a/src/arch/mips/isa/formats/trap.isa +++ b/src/arch/mips/isa/formats/trap.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -70,7 +70,7 @@ def template TrapExecute {{ }}; def format Trap(code, *flags) {{ - code = 'panic(\"' + code = 'warn(\"' code += 'Trap Exception Handler Is Currently Not Implemented.' code += '\");' iop = InstObjParams(name, Name, 'MipsStaticInst', CodeBlock(code), flags) diff --git a/src/arch/mips/isa/formats/unimp.isa b/src/arch/mips/isa/formats/unimp.isa index e17b5f832..03068fa74 100644 --- a/src/arch/mips/isa/formats/unimp.isa +++ b/src/arch/mips/isa/formats/unimp.isa @@ -1,7 +1,7 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2005 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/unknown.isa b/src/arch/mips/isa/formats/unknown.isa index 41387adca..70b3901e9 100644 --- a/src/arch/mips/isa/formats/unknown.isa +++ b/src/arch/mips/isa/formats/unknown.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/util.isa b/src/arch/mips/isa/formats/util.isa index b67a02d07..0cc375af3 100644 --- a/src/arch/mips/isa/formats/util.isa +++ b/src/arch/mips/isa/formats/util.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/includes.isa b/src/arch/mips/isa/includes.isa index 555cec255..6b5f3c588 100644 --- a/src/arch/mips/isa/includes.isa +++ b/src/arch/mips/isa/includes.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/main.isa b/src/arch/mips/isa/main.isa index 9da3fc0db..2d7c63cd5 100644 --- a/src/arch/mips/isa/main.isa +++ b/src/arch/mips/isa/main.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2005 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/operands.isa b/src/arch/mips/isa/operands.isa index 316552ef4..3843dc053 100644 --- a/src/arch/mips/isa/operands.isa +++ b/src/arch/mips/isa/operands.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -53,8 +53,8 @@ def operands {{ 'R2': ('IntReg', 'uw','2', 'IsInteger', 5), #Special Integer Reg operands - 'HI': ('IntReg', 'uw','32', 'IsInteger', 6), - 'LO': ('IntReg', 'uw','33', 'IsInteger', 7), + 'HI': ('IntReg', 'uw','MipsISA::HI', 'IsInteger', 6), + 'LO': ('IntReg', 'uw','MipsISA::LO', 'IsInteger', 7), #Immediate Value operand 'IntImm': ('IntReg', 'uw', 'INTIMM', 'IsInteger', 3), @@ -66,11 +66,11 @@ def operands {{ 'Fr': ('FloatReg', 'sf', 'FR', 'IsFloating', 3), #Special Floating Point Control Reg Operands - 'FIR': ('FloatReg', 'uw', '32', 'IsFloating', 1), - 'FCCR': ('FloatReg', 'uw', '33', 'IsFloating', 2), - 'FEXR': ('FloatReg', 'uw', '34', 'IsFloating', 3), - 'FENR': ('FloatReg', 'uw', '35', 'IsFloating', 3), - 'FCSR': ('FloatReg', 'uw', '36', 'IsFloating', 3), + 'FIR': ('FloatReg', 'uw', 'MipsISA::FIR', 'IsFloating', 1), + 'FCCR': ('FloatReg', 'uw', 'MipsISA::FCCR', 'IsFloating', 2), + 'FEXR': ('FloatReg', 'uw', 'MipsISA::FEXR', 'IsFloating', 3), + 'FENR': ('FloatReg', 'uw', 'MipsISA::FENR', 'IsFloating', 3), + 'FCSR': ('FloatReg', 'uw', 'MipsISA::FCSR', 'IsFloating', 3), #Operands For Paired Singles FP Operations 'Fd1': ('FloatReg', 'sf', 'FD', 'IsFloating', 4), diff --git a/src/arch/mips/isa_traits.cc b/src/arch/mips/isa_traits.cc index 9f3817a60..a8b41270e 100644 --- a/src/arch/mips/isa_traits.cc +++ b/src/arch/mips/isa_traits.cc @@ -30,7 +30,7 @@ */ #include "arch/mips/isa_traits.hh" -#include "config/full_system.hh" +//#include "config/full_system.hh" #include "cpu/static_inst.hh" #include "sim/serialize.hh" #include "base/bitfield.hh" @@ -43,39 +43,20 @@ void MipsISA::copyRegs(ThreadContext *src, ThreadContext *dest) { panic("Copy Regs Not Implemented Yet\n"); - /*fpcr = xc->readMiscReg(MipsISA::Fpcr_DepTag); - uniq = xc->readMiscReg(MipsISA::Uniq_DepTag); - lock_flag = xc->readMiscReg(MipsISA::Lock_Flag_DepTag); - lock_addr = xc->readMiscReg(MipsISA::Lock_Addr_DepTag); +} -#if FULL_SYSTEM - copyIprs(xc); - #endif*/ +void +MipsISA::copyMiscRegs(ThreadContext *src, ThreadContext *dest) +{ + panic("Copy Misc. Regs Not Implemented Yet\n"); } void MipsISA::MiscRegFile::copyMiscRegs(ThreadContext *tc) { panic("Copy Misc. Regs Not Implemented Yet\n"); - /*fpcr = xc->readMiscReg(MipsISA::Fpcr_DepTag); - uniq = xc->readMiscReg(MipsISA::Uniq_DepTag); - lock_flag = xc->readMiscReg(MipsISA::Lock_Flag_DepTag); - lock_addr = xc->readMiscReg(MipsISA::Lock_Addr_DepTag); - - #endif*/ } -#if FULL_SYSTEM - -static inline Addr -TruncPage(Addr addr) -{ return addr & ~(MipsISA::PageBytes - 1); } - -static inline Addr -RoundPage(Addr addr) -{ return (addr + MipsISA::PageBytes - 1) & ~(MipsISA::PageBytes - 1); } -#endif - void IntRegFile::serialize(std::ostream &os) { @@ -100,12 +81,6 @@ RegFile::serialize(std::ostream &os) SERIALIZE_SCALAR(pc); SERIALIZE_SCALAR(npc); SERIALIZE_SCALAR(nnpc); -#if FULL_SYSTEM - SERIALIZE_ARRAY(palregs, NumIntRegs); - SERIALIZE_ARRAY(ipr, NumInternalProcRegs); - SERIALIZE_SCALAR(intrflag); - SERIALIZE_SCALAR(pal_shadow); -#endif } @@ -121,43 +96,5 @@ RegFile::unserialize(Checkpoint *cp, const std::string §ion) UNSERIALIZE_SCALAR(pc); UNSERIALIZE_SCALAR(npc); UNSERIALIZE_SCALAR(nnpc); -#if FULL_SYSTEM - UNSERIALIZE_ARRAY(palregs, NumIntRegs); - UNSERIALIZE_ARRAY(ipr, NumInternalProcRegs); - UNSERIALIZE_SCALAR(intrflag); - UNSERIALIZE_SCALAR(pal_shadow); -#endif -} - -#if FULL_SYSTEM -void -PTE::serialize(std::ostream &os) -{ - SERIALIZE_SCALAR(tag); - SERIALIZE_SCALAR(ppn); - SERIALIZE_SCALAR(xre); - SERIALIZE_SCALAR(xwe); - SERIALIZE_SCALAR(asn); - SERIALIZE_SCALAR(asma); - SERIALIZE_SCALAR(fonr); - SERIALIZE_SCALAR(fonw); - SERIALIZE_SCALAR(valid); } - - -void -PTE::unserialize(Checkpoint *cp, const std::string §ion) -{ - UNSERIALIZE_SCALAR(tag); - UNSERIALIZE_SCALAR(ppn); - UNSERIALIZE_SCALAR(xre); - UNSERIALIZE_SCALAR(xwe); - UNSERIALIZE_SCALAR(asn); - UNSERIALIZE_SCALAR(asma); - UNSERIALIZE_SCALAR(fonr); - UNSERIALIZE_SCALAR(fonw); - UNSERIALIZE_SCALAR(valid); -} - -#endif //FULL_SYSTEM diff --git a/src/arch/mips/isa_traits.hh b/src/arch/mips/isa_traits.hh index dc8b6758a..2f485c7fd 100644 --- a/src/arch/mips/isa_traits.hh +++ b/src/arch/mips/isa_traits.hh @@ -57,12 +57,6 @@ namespace LittleEndianGuest {}; class StaticInst; class StaticInstPtr; -namespace MIPS34K { -int DTB_ASN_ASN(uint64_t reg); -int ITB_ASN_ASN(uint64_t reg); -}; - -#if !FULL_SYSTEM class SyscallReturn { public: template <class T> @@ -95,7 +89,6 @@ class SyscallReturn { uint64_t retval; bool success; }; -#endif namespace MipsISA { @@ -136,16 +129,10 @@ namespace MipsISA template <class TC> void zeroRegisters(TC *tc); - const Addr MaxAddr = (Addr)-1; +// const Addr MaxAddr = (Addr)-1; void copyRegs(ThreadContext *src, ThreadContext *dest); - uint64_t fpConvert(double fp_val, ConvertType cvt_type); - double roundFP(double val, int digits); - double truncFP(double val); - bool getFPConditionCode(uint32_t fcsr_reg, int cc); - uint32_t makeCCVector(uint32_t fcsr, int num, bool val); - // Machine operations void saveMachineReg(AnyReg &savereg, const RegFile ®_file, @@ -191,12 +178,6 @@ namespace MipsISA }; -#if FULL_SYSTEM - -#include "arch/mips/mips34k.hh" - -#endif - using namespace MipsISA; #endif // __ARCH_MIPS_ISA_TRAITS_HH__ diff --git a/src/arch/mips/process.cc b/src/arch/mips/process.cc index 7762c2fa0..cb847fe04 100644 --- a/src/arch/mips/process.cc +++ b/src/arch/mips/process.cc @@ -1,3 +1,4 @@ + /* * Copyright (c) 2003-2004 The Regents of The University of Michigan * All rights reserved. @@ -40,6 +41,8 @@ using namespace std; using namespace MipsISA; +Addr MipsLiveProcess::stack_start = 0x7FFFFFFF; + MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile, System *_system, int stdin_fd, int stdout_fd, int stderr_fd, std::vector<std::string> &argv, std::vector<std::string> &envp) @@ -48,10 +51,11 @@ MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile, { // Set up stack. On MIPS, stack starts at the top of kuseg // user address space. MIPS stack grows down from here - stack_base = 0x7FFFFFFF; + stack_base = stack_start; // Set pointer for next thread stack. Reserve 8M for main stack. next_thread_stack_base = stack_base - (8 * 1024 * 1024); + stack_start = next_thread_stack_base; // Set up break point (Top of Heap) brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize(); diff --git a/src/arch/mips/process.hh b/src/arch/mips/process.hh index b0ef20399..4baee134b 100644 --- a/src/arch/mips/process.hh +++ b/src/arch/mips/process.hh @@ -50,6 +50,9 @@ class MipsLiveProcess : public LiveProcess std::vector<std::string> &envp); void startup(); + + + static Addr stack_start; }; diff --git a/src/arch/mips/regfile/float_regfile.hh b/src/arch/mips/regfile/float_regfile.hh index d1a60298a..61efbb416 100644 --- a/src/arch/mips/regfile/float_regfile.hh +++ b/src/arch/mips/regfile/float_regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,8 +24,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Korey Sewell */ #ifndef __ARCH_MIPS_FLOAT_REGFILE_HH__ @@ -34,13 +32,14 @@ #include "arch/mips/types.hh" #include "arch/mips/constants.hh" #include "base/misc.hh" +#include "base/bitfield.hh" #include "config/full_system.hh" #include "sim/byteswap.hh" #include "sim/faults.hh" #include "sim/host.hh" class Checkpoint; -class ThreadContext; +class ExecContext; class Regfile; namespace MipsISA @@ -101,8 +100,9 @@ namespace MipsISA } } - Fault setReg(int floatReg, const FloatReg &val, int width) + Fault setReg(int floatReg, const FloatRegVal &val, int width) { + using namespace std; switch(width) { case SingleWidth: @@ -117,8 +117,8 @@ namespace MipsISA { const void *double_ptr = &val; FloatReg64 temp_double = *(FloatReg64 *) double_ptr; - regs[floatReg + 1] = temp_double >> 32; - regs[floatReg] = 0x0000FFFF & temp_double; + regs[floatReg + 1] = bits(temp_double, 63, 32); + regs[floatReg] = bits(temp_double, 31, 0); break; } @@ -140,8 +140,8 @@ namespace MipsISA break; case DoubleWidth: - regs[floatReg + 1] = val >> 32; - regs[floatReg] = val; + regs[floatReg + 1] = bits(val, 63, 32); + regs[floatReg] = bits(val, 31, 0); break; default: diff --git a/src/arch/mips/regfile/int_regfile.hh b/src/arch/mips/regfile/int_regfile.hh index dc82a3c26..5add1b7be 100644 --- a/src/arch/mips/regfile/int_regfile.hh +++ b/src/arch/mips/regfile/int_regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -65,11 +65,6 @@ namespace MipsISA }; - enum MiscIntRegNums { - HI = NumIntArchRegs, - LO - }; - } // namespace MipsISA #endif diff --git a/src/arch/mips/regfile/misc_regfile.hh b/src/arch/mips/regfile/misc_regfile.hh index f8aeab8cb..87961f97e 100644 --- a/src/arch/mips/regfile/misc_regfile.hh +++ b/src/arch/mips/regfile/misc_regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,18 +45,12 @@ namespace MipsISA protected: uint64_t fpcr; // floating point condition codes - uint64_t uniq; // process-unique register bool lock_flag; // lock flag for LL/SC Addr lock_addr; // lock address for LL/SC MiscReg miscRegFile[NumMiscRegs]; public: - //These functions should be removed once the simplescalar cpu model - //has been replaced. - int getInstAsid(); - int getDataAsid(); - void copyMiscRegs(ThreadContext *tc); MiscReg readReg(int misc_reg) @@ -80,17 +74,6 @@ namespace MipsISA miscRegFile[misc_reg] = val; return NoFault; } -#if FULL_SYSTEM - void clearIprs() { } - - protected: - InternalProcReg ipr[NumInternalProcRegs]; // Internal processor regs - - private: - MiscReg readIpr(int idx, Fault &fault, ThreadContext *tc) { } - - Fault setIpr(int idx, uint64_t val, ThreadContext *tc) { } -#endif friend class RegFile; }; } // namespace MipsISA diff --git a/src/arch/mips/regfile/regfile.hh b/src/arch/mips/regfile/regfile.hh index af61e62cd..a68120299 100644 --- a/src/arch/mips/regfile/regfile.hh +++ b/src/arch/mips/regfile/regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -81,12 +81,12 @@ namespace MipsISA return miscRegFile.setRegWithEffect(miscReg, val, tc); } - FloatReg readFloatReg(int floatReg) + FloatRegVal readFloatReg(int floatReg) { return floatRegFile.readReg(floatReg,SingleWidth); } - FloatReg readFloatReg(int floatReg, int width) + FloatRegVal readFloatReg(int floatReg, int width) { return floatRegFile.readReg(floatReg,width); } @@ -101,12 +101,12 @@ namespace MipsISA return floatRegFile.readRegBits(floatReg,width); } - Fault setFloatReg(int floatReg, const FloatReg &val) + Fault setFloatReg(int floatReg, const FloatRegVal &val) { return floatRegFile.setReg(floatReg, val, SingleWidth); } - Fault setFloatReg(int floatReg, const FloatReg &val, int width) + Fault setFloatReg(int floatReg, const FloatRegVal &val, int width) { return floatRegFile.setReg(floatReg, val, width); } @@ -168,16 +168,6 @@ namespace MipsISA nnpc = val; } - -#if FULL_SYSTEM - IntReg palregs[NumIntRegs]; // PAL shadow registers - InternalProcReg ipr[NumInternalProcRegs]; // internal processor regs - int intrflag; // interrupt flag - bool pal_shadow; // using pal_shadow registers - inline int instAsid() { return MIPS34K::ITB_ASN_ASN(ipr[IPR_ITB_ASN]); } - inline int dataAsid() { return MIPS34K::DTB_ASN_ASN(ipr[IPR_DTB_ASN]); } -#endif // FULL_SYSTEM - void serialize(std::ostream &os); void unserialize(Checkpoint *cp, const std::string §ion); @@ -193,9 +183,6 @@ namespace MipsISA void copyMiscRegs(ThreadContext *src, ThreadContext *dest); -#if FULL_SYSTEM - void copyIprs(ThreadContext *src, ThreadContext *dest); -#endif } // namespace MipsISA #endif diff --git a/src/arch/mips/stacktrace.hh b/src/arch/mips/stacktrace.hh index 38767cef7..f9e092dbd 100644 --- a/src/arch/mips/stacktrace.hh +++ b/src/arch/mips/stacktrace.hh @@ -25,11 +25,11 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Korey Sewell + * Authors: Ali Saidi */ -#ifndef __ARCH_ALPHA_STACKTRACE_HH__ -#define __ARCH_ALPHA_STACKTRACE_HH__ +#ifndef __ARCH_MIPS_STACKTRACE_HH__ +#define __ARCH_MIPS_STACKTRACE_HH__ #include "base/trace.hh" #include "cpu/static_inst.hh" @@ -118,4 +118,4 @@ StackTrace::trace(ThreadContext *tc, StaticInstPtr inst) return true; } -#endif // __ARCH_ALPHA_STACKTRACE_HH__ +#endif // __ARCH_MIPS_STACKTRACE_HH__ diff --git a/src/arch/mips/types.hh b/src/arch/mips/types.hh index 7cd2eed0c..6330044d9 100644 --- a/src/arch/mips/types.hh +++ b/src/arch/mips/types.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,14 +42,15 @@ namespace MipsISA typedef uint32_t IntReg; // floating point register file entry type - typedef double FloatReg; typedef uint32_t FloatReg32; typedef uint64_t FloatReg64; typedef uint64_t FloatRegBits; + typedef double FloatRegVal; + typedef double FloatReg; + // cop-0/cop-1 system control register typedef uint64_t MiscReg; - typedef uint64_t InternalProcReg; typedef union { IntReg intreg; diff --git a/src/arch/sparc/faults.cc b/src/arch/sparc/faults.cc index 57b4d4d86..7b7765935 100644 --- a/src/arch/sparc/faults.cc +++ b/src/arch/sparc/faults.cc @@ -33,6 +33,10 @@ #include "cpu/thread_context.hh" #include "cpu/base.hh" #include "base/trace.hh" +#if !FULL_SYSTEM +#include "sim/process.hh" +#include "mem/page_table.hh" +#endif namespace SparcISA { @@ -218,6 +222,13 @@ TrapType TrapInstruction::_baseTrapType = 0x100; FaultPriority TrapInstruction::_priority = 16; FaultStat TrapInstruction::_count; +#if !FULL_SYSTEM +FaultName PageTableFault::_name = "page_table_fault"; +TrapType PageTableFault::_trapType = 0x0000; +FaultPriority PageTableFault::_priority = 0; +FaultStat PageTableFault::_count; +#endif + #if FULL_SYSTEM void SparcFault::invoke(ThreadContext * tc) @@ -249,9 +260,28 @@ void SparcFault::invoke(ThreadContext * tc) void TrapInstruction::invoke(ThreadContext * tc) { - tc->syscall(syscall_num); + // Should be handled in ISA. } +void PageTableFault::invoke(ThreadContext *tc) +{ + Process *p = tc->getProcessPtr(); + + // address is higher than the stack region or in the current stack region + if (vaddr > p->stack_base || vaddr > p->stack_min) + FaultBase::invoke(tc); + + // We've accessed the next page + if (vaddr > p->stack_min - PageBytes) { + p->stack_min -= PageBytes; + if (p->stack_base - p->stack_min > 8*1024*1024) + fatal("Over max stack size for one thread\n"); + p->pTable->allocate(p->stack_min, PageBytes); + warn("Increasing stack size by one page."); + } else { + FaultBase::invoke(tc); + } +} #endif } // namespace SparcISA diff --git a/src/arch/sparc/faults.hh b/src/arch/sparc/faults.hh index 9f595a28b..b279f4911 100644 --- a/src/arch/sparc/faults.hh +++ b/src/arch/sparc/faults.hh @@ -83,6 +83,31 @@ class MemAddressNotAligned : public SparcFault bool isAlignmentFault() {return true;} }; +#if !FULL_SYSTEM +class PageTableFault : public SparcFault +{ + private: + Addr vaddr; + static FaultName _name; + static TrapType _trapType; + static FaultPriority _priority; + static FaultStat _count; + public: + PageTableFault(Addr va) + : vaddr(va) {} + FaultName name() {return _name;} + TrapType trapType() {return _trapType;} + FaultPriority priority() {return _priority;} + FaultStat & countStat() {return _count;} + void invoke(ThreadContext * tc); +}; + +static inline Fault genPageTableFault(Addr va) +{ + return new PageTableFault(va); +} +#endif + static inline Fault genMachineCheckFault() { return new InternalProcessorError; @@ -589,6 +614,7 @@ class TrapInstruction : public EnumeratedFault #endif }; + } // SparcISA namespace #endif // __FAULTS_HH__ diff --git a/src/arch/sparc/system.cc b/src/arch/sparc/system.cc index e197e7918..63cbbe057 100644 --- a/src/arch/sparc/system.cc +++ b/src/arch/sparc/system.cc @@ -141,6 +141,7 @@ SparcSystem::unserialize(Checkpoint *cp, const std::string §ion) BEGIN_DECLARE_SIM_OBJECT_PARAMS(SparcSystem) SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; Param<std::string> kernel; Param<std::string> reset_bin; @@ -161,6 +162,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SparcSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(reset_bin, "file that contains the reset code"), INIT_PARAM(hypervisor_bin, "file that contains the hypervisor code"), @@ -183,6 +186,7 @@ CREATE_SIM_OBJECT(SparcSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->reset_bin = reset_bin; p->hypervisor_bin = hypervisor_bin; diff --git a/src/base/fast_alloc.cc b/src/base/fast_alloc.cc index 455fb8ed7..610dff66c 100644 --- a/src/base/fast_alloc.cc +++ b/src/base/fast_alloc.cc @@ -180,13 +180,11 @@ FastAlloc::dump_oldest(int n) // C interfaces to FastAlloc::dump_summary() and FastAlloc::dump_oldest(). // gdb seems to have trouble with calling C++ functions directly. // -extern "C" void fast_alloc_summary() { FastAlloc::dump_summary(); } -extern "C" void fast_alloc_oldest(int n) { FastAlloc::dump_oldest(n); diff --git a/src/base/timebuf.hh b/src/base/timebuf.hh index 6a326d25a..a484a3179 100644 --- a/src/base/timebuf.hh +++ b/src/base/timebuf.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Kevin Lim + * Authors: Nathan Binkert + * Kevin Lim */ #ifndef __BASE_TIMEBUF_HH__ @@ -214,6 +215,11 @@ class TimeBuffer { return wire(this, 0); } + + int getSize() + { + return size; + } }; #endif // __BASE_TIMEBUF_HH__ diff --git a/src/base/trace.cc b/src/base/trace.cc index 50426b992..9fa615f4d 100644 --- a/src/base/trace.cc +++ b/src/base/trace.cc @@ -247,7 +247,6 @@ DebugOut() // // Dump trace buffer to specified file (cout if NULL) // -extern "C" void dumpTrace(const char *filename) { @@ -269,7 +268,6 @@ dumpTrace(const char *filename) // same facility as the "trace to file" feature, and will print error // messages rather than clobbering an existing ostream pointer. // -extern "C" void echoTrace(bool on) { @@ -289,7 +287,6 @@ echoTrace(bool on) } } -extern "C" void printTraceFlags() { @@ -338,14 +335,12 @@ tweakTraceFlag(const char *string, bool value) cprintf("could not find flag %s\n", string); } -extern "C" void setTraceFlag(const char *string) { tweakTraceFlag(string, true); } -extern "C" void clearTraceFlag(const char *string) { diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 7ff68bcaf..27c24107c 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -48,8 +48,10 @@ ccfilename = sys.argv[1] + '.cc' # To define a new flag, simply add it to this list. # baseFlags = [ + 'Activity', 'AlphaConsole', 'BADADDR', + 'BE', 'BPredRAS', 'Bus', 'BusAddrRanges', @@ -84,6 +86,7 @@ baseFlags = [ 'EthernetPIO', 'EthernetSM', 'Event', + 'FE', 'Fault', 'Fetch', 'Flow', @@ -97,6 +100,7 @@ baseFlags = [ 'GDBSend', 'GDBWrite', 'HWPrefetch', + 'IBE', 'IEW', 'IIC', 'IICMore', @@ -115,10 +119,8 @@ baseFlags = [ 'MSHR', 'Mbox', 'MemDepUnit', + 'O3CPU', 'OzoneCPU', - 'FE', - 'IBE', - 'BE', 'OzoneLSQ', 'PCEvent', 'PCIA', @@ -132,6 +134,7 @@ baseFlags = [ 'RenameMap', 'SQL', 'Sampler', + 'Scoreboard', 'ScsiCtrl', 'ScsiDisk', 'ScsiNone', @@ -155,8 +158,6 @@ baseFlags = [ 'Uart', 'VtoPhys', 'WriteBarrier', - 'Activity', - 'Scoreboard', 'Writeback', ] @@ -175,7 +176,7 @@ compoundFlagMap = { 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'EthernetNoData' : [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ], - 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'Activity','Scoreboard','Writeback'], + 'O3CPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'O3CPU', 'Activity','Scoreboard','Writeback'], 'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU'] } diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 34bad132c..bc4ec7923 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -68,6 +68,13 @@ mem_comp_sig_template = ''' virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::InstRecord *traceData) const { panic("Not defined!"); return NoFault; }; ''' +# Generate a temporary CPU list, including the CheckerCPU if +# it's enabled. This isn't used for anything else other than StaticInst +# headers. +temp_cpu_list = env['CPU_MODELS'] +if env['USE_CHECKER']: + temp_cpu_list.append('CheckerCPU') + # Generate header. def gen_cpu_exec_signatures(target, source, env): f = open(str(target[0]), 'w') @@ -75,7 +82,7 @@ def gen_cpu_exec_signatures(target, source, env): #ifndef __CPU_STATIC_INST_EXEC_SIGS_HH__ #define __CPU_STATIC_INST_EXEC_SIGS_HH__ ''' - for cpu in env['CPU_MODELS']: + for cpu in temp_cpu_list: xc_type = CpuModel.dict[cpu].strings['CPU_exec_context'] print >> f, exec_sig_template % (xc_type, xc_type, xc_type) print >> f, ''' @@ -85,12 +92,19 @@ def gen_cpu_exec_signatures(target, source, env): # Generate string that gets printed when header is rebuilt def gen_sigs_string(target, source, env): return "Generating static_inst_exec_sigs.hh: " \ - + ', '.join(env['CPU_MODELS']) + + ', '.join(temp_cpu_list) # Add command to generate header to environment. env.Command('static_inst_exec_sigs.hh', models_db, Action(gen_cpu_exec_signatures, gen_sigs_string, - varlist = ['CPU_MODELS'])) + varlist = temp_cpu_list)) + +env.Depends('static_inst_exec_sigs.hh', Value(env['USE_CHECKER'])) +env.Depends('static_inst_exec_sigs.hh', Value(env['CPU_MODELS'])) + +# List of suppported CPUs by the Checker. Errors out if USE_CHECKER=True +# and one of these are not being used. +CheckerSupportedCPUList = ['O3CPU', 'OzoneCPU'] ################################################################# # @@ -116,15 +130,13 @@ if need_simple_base: if 'FastCPU' in env['CPU_MODELS']: sources += Split('fast/cpu.cc') -if 'AlphaFullCPU' in env['CPU_MODELS']: +need_bp_unit = False +if 'O3CPU' in env['CPU_MODELS']: + need_bp_unit = True + sources += SConscript('o3/SConscript', exports = 'env') sources += Split(''' - base_dyn_inst.cc - o3/2bit_local_pred.cc - o3/alpha_dyn_inst.cc - o3/alpha_cpu.cc - o3/alpha_cpu_builder.cc + o3/base_dyn_inst.cc o3/bpred_unit.cc - o3/btb.cc o3/commit.cc o3/decode.cc o3/fetch.cc @@ -136,40 +148,54 @@ if 'AlphaFullCPU' in env['CPU_MODELS']: o3/lsq_unit.cc o3/lsq.cc o3/mem_dep_unit.cc - o3/ras.cc o3/rename.cc o3/rename_map.cc o3/rob.cc o3/scoreboard.cc o3/store_set.cc - o3/tournament_pred.cc ''') + if env['USE_CHECKER']: + sources += Split('o3/checker_builder.cc') -if 'OzoneSimpleCPU' in env['CPU_MODELS']: +if 'OzoneCPU' in env['CPU_MODELS']: + need_bp_unit = True sources += Split(''' + ozone/base_dyn_inst.cc + ozone/bpred_unit.cc ozone/cpu.cc ozone/cpu_builder.cc ozone/dyn_inst.cc ozone/front_end.cc - ozone/inorder_back_end.cc - ozone/inst_queue.cc - ozone/rename_table.cc - ''') - -if 'OzoneCPU' in env['CPU_MODELS']: - sources += Split(''' - ozone/lsq_unit.cc ozone/lw_back_end.cc ozone/lw_lsq.cc + ozone/rename_table.cc ''') + if env['USE_CHECKER']: + sources += Split('ozone/checker_builder.cc') -if 'CheckerCPU' in env['CPU_MODELS']: +if need_bp_unit: sources += Split(''' - checker/cpu.cc - checker/o3_cpu_builder.cc + o3/2bit_local_pred.cc + o3/btb.cc + o3/ras.cc + o3/tournament_pred.cc ''') -# FullCPU sources are included from m5/SConscript since they're not +if env['USE_CHECKER']: + sources += Split('checker/cpu.cc') + checker_supports = False + for i in CheckerSupportedCPUList: + if i in env['CPU_MODELS']: + checker_supports = True + if not checker_supports: + print "Checker only supports CPU models", + for i in CheckerSupportedCPUList: + print i, + print ", please set USE_CHECKER=False or use one of those CPU models" + Exit(1) + + +# FullCPU sources are included from src/SConscript since they're not # below this point in the file hierarchy. # Convert file names to SCons File objects. This takes care of the diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 55c04c498..ce440aeff 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -41,7 +41,6 @@ #include "cpu/cpuevent.hh" #include "cpu/thread_context.hh" #include "cpu/profile.hh" -#include "cpu/sampler/sampler.hh" #include "sim/param.hh" #include "sim/process.hh" #include "sim/sim_events.hh" @@ -60,11 +59,11 @@ int maxThreadsPerCPU = 1; #if FULL_SYSTEM BaseCPU::BaseCPU(Params *p) - : SimObject(p->name), clock(p->clock), checkInterrupts(true), + : MemObject(p->name), clock(p->clock), checkInterrupts(true), params(p), number_of_threads(p->numberOfThreads), system(p->system) #else BaseCPU::BaseCPU(Params *p) - : SimObject(p->name), clock(p->clock), params(p), + : MemObject(p->name), clock(p->clock), params(p), number_of_threads(p->numberOfThreads), system(p->system) #endif { @@ -237,7 +236,7 @@ BaseCPU::registerThreadContexts() void -BaseCPU::switchOut(Sampler *sampler) +BaseCPU::switchOut() { panic("This CPU doesn't support sampling!"); } diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 43122f238..2be6e4e81 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -36,17 +36,17 @@ #include "base/statistics.hh" #include "config/full_system.hh" -#include "cpu/sampler/sampler.hh" #include "sim/eventq.hh" -#include "sim/sim_object.hh" +#include "mem/mem_object.hh" #include "arch/isa_traits.hh" class BranchPred; class CheckerCPU; class ThreadContext; class System; +class Port; -class BaseCPU : public SimObject +class BaseCPU : public MemObject { protected: // CPU's clock period in terms of the number of ticks of curTime. @@ -148,7 +148,7 @@ class BaseCPU : public SimObject /// Prepare for another CPU to take over execution. When it is /// is ready (drained pipe) it signals the sampler. - virtual void switchOut(Sampler *); + virtual void switchOut(); /// Take over execution from the given CPU. Used for warm-up and /// sampling. diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 948ee058a..9cc61f74c 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,6 +31,7 @@ #ifndef __CPU_BASE_DYN_INST_HH__ #define __CPU_BASE_DYN_INST_HH__ +#include <bitset> #include <list> #include <string> @@ -44,12 +45,6 @@ #include "cpu/static_inst.hh" #include "mem/packet.hh" #include "sim/system.hh" -/* -#include "encumbered/cpu/full/bpred_update.hh" -#include "encumbered/cpu/full/spec_memory.hh" -#include "encumbered/cpu/full/spec_state.hh" -#include "encumbered/mem/functional/main.hh" -*/ /** * @file @@ -64,8 +59,8 @@ class BaseDynInst : public FastAlloc, public RefCounted { public: // Typedef for the CPU. - typedef typename Impl::FullCPU FullCPU; - typedef typename FullCPU::ImplState ImplState; + typedef typename Impl::CPUType ImplCPU; + typedef typename ImplCPU::ImplState ImplState; // Binary machine instruction type. typedef TheISA::MachInst MachInst; @@ -132,56 +127,34 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The sequence number of the instruction. */ InstSeqNum seqNum; - /** Is the instruction in the IQ */ - bool iqEntry; - - /** Is the instruction in the ROB */ - bool robEntry; - - /** Is the instruction in the LSQ */ - bool lsqEntry; - - /** Is the instruction completed. */ - bool completed; - - /** Is the instruction's result ready. */ - bool resultReady; - - /** Can this instruction issue. */ - bool canIssue; - - /** Has this instruction issued. */ - bool issued; - - /** Has this instruction executed (or made it through execute) yet. */ - bool executed; - - /** Can this instruction commit. */ - bool canCommit; - - /** Is this instruction committed. */ - bool committed; - - /** Is this instruction squashed. */ - bool squashed; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInIQ; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInLSQ; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInROB; - - /** Is this a recover instruction. */ - bool recoverInst; - - /** Is this a thread blocking instruction. */ - bool blockingInst; /* this inst has called thread_block() */ + enum Status { + IqEntry, /// Instruction is in the IQ + RobEntry, /// Instruction is in the ROB + LsqEntry, /// Instruction is in the LSQ + Completed, /// Instruction has completed + ResultReady, /// Instruction has its result + CanIssue, /// Instruction can issue and execute + Issued, /// Instruction has issued + Executed, /// Instruction has executed + CanCommit, /// Instruction can commit + AtCommit, /// Instruction has reached commit + Committed, /// Instruction has committed + Squashed, /// Instruction is squashed + SquashedInIQ, /// Instruction is squashed in the IQ + SquashedInLSQ, /// Instruction is squashed in the LSQ + SquashedInROB, /// Instruction is squashed in the ROB + RecoverInst, /// Is a recover instruction + BlockingInst, /// Is a blocking instruction + ThreadsyncWait, /// Is a thread synchronization instruction + SerializeBefore, /// Needs to serialize on + /// instructions ahead of it + SerializeAfter, /// Needs to serialize instructions behind it + SerializeHandled, /// Serialization has been handled + NumStatus + }; - /** Is this a thread syncrhonization instruction. */ - bool threadsyncWait; + /** The status of this BaseDynInst. Several bits can be set. */ + std::bitset<NumStatus> status; /** The thread this instruction is from. */ short threadNumber; @@ -192,8 +165,8 @@ class BaseDynInst : public FastAlloc, public RefCounted /** How many source registers are ready. */ unsigned readyRegs; - /** Pointer to the FullCPU object. */ - FullCPU *cpu; + /** Pointer to the Impl's CPU object. */ + ImplCPU *cpu; /** Pointer to the thread state. */ ImplState *thread; @@ -202,10 +175,9 @@ class BaseDynInst : public FastAlloc, public RefCounted Fault fault; /** The memory request. */ -// MemReqPtr req; Request *req; -// Packet pkt; + /** Pointer to the data for the memory access. */ uint8_t *memData; /** The effective virtual address (lds & stores only). */ @@ -223,12 +195,6 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The memory request flags (from translation). */ unsigned memReqFlags; - /** The size of the data to be stored. */ - int storeSize; - - /** The data to be stored. */ - IntReg storeData; - union Result { uint64_t integer; float fp; @@ -273,7 +239,7 @@ class BaseDynInst : public FastAlloc, public RefCounted * @param cpu Pointer to the instruction's CPU. */ BaseDynInst(ExtMachInst inst, Addr PC, Addr pred_PC, InstSeqNum seq_num, - FullCPU *cpu); + ImplCPU *cpu); /** BaseDynInst constructor given a StaticInst pointer. * @param _staticInst The StaticInst for this BaseDynInst. @@ -288,21 +254,6 @@ class BaseDynInst : public FastAlloc, public RefCounted void initVars(); public: - /** - * @todo: Make this function work; currently it is a dummy function. - * @param fault Last fault. - * @param cmd Last command. - * @param addr Virtual address of access. - * @param p Memory accessed. - * @param nbytes Access size. - */ -// void -// trace_mem(Fault fault, -// MemCmd cmd, -// Addr addr, -// void *p, -// int nbytes); - /** Dumps out contents of this BaseDynInst. */ void dump(); @@ -360,9 +311,9 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isThreadSync() const { return staticInst->isThreadSync(); } bool isSerializing() const { return staticInst->isSerializing(); } bool isSerializeBefore() const - { return staticInst->isSerializeBefore() || serializeBefore; } + { return staticInst->isSerializeBefore() || status[SerializeBefore]; } bool isSerializeAfter() const - { return staticInst->isSerializeAfter() || serializeAfter; } + { return staticInst->isSerializeAfter() || status[SerializeAfter]; } bool isMemBarrier() const { return staticInst->isMemBarrier(); } bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } @@ -371,41 +322,32 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isUnverifiable() const { return staticInst->isUnverifiable(); } /** Temporarily sets this instruction as a serialize before instruction. */ - void setSerializeBefore() { serializeBefore = true; } + void setSerializeBefore() { status.set(SerializeBefore); } /** Clears the serializeBefore part of this instruction. */ - void clearSerializeBefore() { serializeBefore = false; } + void clearSerializeBefore() { status.reset(SerializeBefore); } /** Checks if this serializeBefore is only temporarily set. */ - bool isTempSerializeBefore() { return serializeBefore; } - - /** Tracks if instruction has been externally set as serializeBefore. */ - bool serializeBefore; + bool isTempSerializeBefore() { return status[SerializeBefore]; } /** Temporarily sets this instruction as a serialize after instruction. */ - void setSerializeAfter() { serializeAfter = true; } + void setSerializeAfter() { status.set(SerializeAfter); } /** Clears the serializeAfter part of this instruction.*/ - void clearSerializeAfter() { serializeAfter = false; } + void clearSerializeAfter() { status.reset(SerializeAfter); } /** Checks if this serializeAfter is only temporarily set. */ - bool isTempSerializeAfter() { return serializeAfter; } + bool isTempSerializeAfter() { return status[SerializeAfter]; } - /** Tracks if instruction has been externally set as serializeAfter. */ - bool serializeAfter; + /** Sets the serialization part of this instruction as handled. */ + void setSerializeHandled() { status.set(SerializeHandled); } /** Checks if the serialization part of this instruction has been * handled. This does not apply to the temporary serializing * state; it only applies to this instruction's own permanent * serializing state. */ - bool isSerializeHandled() { return serializeHandled; } - - /** Sets the serialization part of this instruction as handled. */ - void setSerializeHandled() { serializeHandled = true; } - - /** Whether or not the serialization of this instruction has been handled. */ - bool serializeHandled; + bool isSerializeHandled() { return status[SerializeHandled]; } /** Returns the opclass of this instruction. */ OpClass opClass() const { return staticInst->opClass(); } @@ -439,11 +381,13 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns the result of a floating point (double) instruction. */ double readDoubleResult() { return instResult.dbl; } + /** Records an integer register being set to a value. */ void setIntReg(const StaticInst *si, int idx, uint64_t val) { instResult.integer = val; } + /** Records an fp register being set to a value. */ void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) { if (width == 32) @@ -454,16 +398,19 @@ class BaseDynInst : public FastAlloc, public RefCounted panic("Unsupported width!"); } + /** Records an fp register being set to a value. */ void setFloatReg(const StaticInst *si, int idx, FloatReg val) { instResult.fp = val; } + /** Records an fp register being set to an integer value. */ void setFloatRegBits(const StaticInst *si, int idx, uint64_t val, int width) { instResult.integer = val; } + /** Records an fp register being set to an integer value. */ void setFloatRegBits(const StaticInst *si, int idx, uint64_t val) { instResult.integer = val; @@ -482,106 +429,112 @@ class BaseDynInst : public FastAlloc, public RefCounted } /** Sets this instruction as completed. */ - void setCompleted() { completed = true; } + void setCompleted() { status.set(Completed); } /** Returns whether or not this instruction is completed. */ - bool isCompleted() const { return completed; } + bool isCompleted() const { return status[Completed]; } - void setResultReady() { resultReady = true; } + /** Marks the result as ready. */ + void setResultReady() { status.set(ResultReady); } - bool isResultReady() const { return resultReady; } + /** Returns whether or not the result is ready. */ + bool isResultReady() const { return status[ResultReady]; } /** Sets this instruction as ready to issue. */ - void setCanIssue() { canIssue = true; } + void setCanIssue() { status.set(CanIssue); } /** Returns whether or not this instruction is ready to issue. */ - bool readyToIssue() const { return canIssue; } + bool readyToIssue() const { return status[CanIssue]; } /** Sets this instruction as issued from the IQ. */ - void setIssued() { issued = true; } + void setIssued() { status.set(Issued); } /** Returns whether or not this instruction has issued. */ - bool isIssued() const { return issued; } + bool isIssued() const { return status[Issued]; } /** Sets this instruction as executed. */ - void setExecuted() { executed = true; } + void setExecuted() { status.set(Executed); } /** Returns whether or not this instruction has executed. */ - bool isExecuted() const { return executed; } + bool isExecuted() const { return status[Executed]; } /** Sets this instruction as ready to commit. */ - void setCanCommit() { canCommit = true; } + void setCanCommit() { status.set(CanCommit); } /** Clears this instruction as being ready to commit. */ - void clearCanCommit() { canCommit = false; } + void clearCanCommit() { status.reset(CanCommit); } /** Returns whether or not this instruction is ready to commit. */ - bool readyToCommit() const { return canCommit; } + bool readyToCommit() const { return status[CanCommit]; } + + void setAtCommit() { status.set(AtCommit); } + + bool isAtCommit() { return status[AtCommit]; } /** Sets this instruction as committed. */ - void setCommitted() { committed = true; } + void setCommitted() { status.set(Committed); } /** Returns whether or not this instruction is committed. */ - bool isCommitted() const { return committed; } + bool isCommitted() const { return status[Committed]; } /** Sets this instruction as squashed. */ - void setSquashed() { squashed = true; } + void setSquashed() { status.set(Squashed); } /** Returns whether or not this instruction is squashed. */ - bool isSquashed() const { return squashed; } + bool isSquashed() const { return status[Squashed]; } //Instruction Queue Entry //----------------------- /** Sets this instruction as a entry the IQ. */ - void setInIQ() { iqEntry = true; } + void setInIQ() { status.set(IqEntry); } /** Sets this instruction as a entry the IQ. */ - void removeInIQ() { iqEntry = false; } + void clearInIQ() { status.reset(IqEntry); } + + /** Returns whether or not this instruction has issued. */ + bool isInIQ() const { return status[IqEntry]; } /** Sets this instruction as squashed in the IQ. */ - void setSquashedInIQ() { squashedInIQ = true; squashed = true;} + void setSquashedInIQ() { status.set(SquashedInIQ); status.set(Squashed);} /** Returns whether or not this instruction is squashed in the IQ. */ - bool isSquashedInIQ() const { return squashedInIQ; } - - /** Returns whether or not this instruction has issued. */ - bool isInIQ() const { return iqEntry; } + bool isSquashedInIQ() const { return status[SquashedInIQ]; } //Load / Store Queue Functions //----------------------- /** Sets this instruction as a entry the LSQ. */ - void setInLSQ() { lsqEntry = true; } + void setInLSQ() { status.set(LsqEntry); } /** Sets this instruction as a entry the LSQ. */ - void removeInLSQ() { lsqEntry = false; } + void removeInLSQ() { status.reset(LsqEntry); } + + /** Returns whether or not this instruction is in the LSQ. */ + bool isInLSQ() const { return status[LsqEntry]; } /** Sets this instruction as squashed in the LSQ. */ - void setSquashedInLSQ() { squashedInLSQ = true;} + void setSquashedInLSQ() { status.set(SquashedInLSQ);} /** Returns whether or not this instruction is squashed in the LSQ. */ - bool isSquashedInLSQ() const { return squashedInLSQ; } - - /** Returns whether or not this instruction is in the LSQ. */ - bool isInLSQ() const { return lsqEntry; } + bool isSquashedInLSQ() const { return status[SquashedInLSQ]; } //Reorder Buffer Functions //----------------------- /** Sets this instruction as a entry the ROB. */ - void setInROB() { robEntry = true; } + void setInROB() { status.set(RobEntry); } /** Sets this instruction as a entry the ROB. */ - void removeInROB() { robEntry = false; } + void clearInROB() { status.reset(RobEntry); } + + /** Returns whether or not this instruction is in the ROB. */ + bool isInROB() const { return status[RobEntry]; } /** Sets this instruction as squashed in the ROB. */ - void setSquashedInROB() { squashedInROB = true; } + void setSquashedInROB() { status.set(SquashedInROB); } /** Returns whether or not this instruction is squashed in the ROB. */ - bool isSquashedInROB() const { return squashedInROB; } - - /** Returns whether or not this instruction is in the ROB. */ - bool isInROB() const { return robEntry; } + bool isSquashedInROB() const { return status[SquashedInROB]; } /** Read the PC of this instruction. */ const Addr readPC() const { return PC; } @@ -590,17 +543,18 @@ class BaseDynInst : public FastAlloc, public RefCounted void setNextPC(uint64_t val) { nextPC = val; -// instResult.integer = val; } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } - void setThread(unsigned tid) { threadNumber = tid; } + /** Sets the thread id. */ + void setTid(unsigned tid) { threadNumber = tid; } - void setState(ImplState *state) { thread = state; } + /** Sets the pointer to the thread state. */ + void setThreadState(ImplState *state) { thread = state; } - /** Returns the thread context. - */ + /** Returns the thread context. */ ThreadContext *tcBase() { return thread->getTC(); } private: @@ -637,8 +591,6 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Store queue index. */ int16_t sqIdx; - bool reachedCommit; - /** Iterator pointing to this BaseDynInst in the list of all insts. */ ListIt instListIt; diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst_impl.hh index 30fa10a6b..91424faad 100644 --- a/src/cpu/base_dyn_inst.cc +++ b/src/cpu/base_dyn_inst_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,10 +41,6 @@ #include "mem/request.hh" #include "cpu/base_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_cpu.hh" -//#include "cpu/ozone/simple_impl.hh" -//#include "cpu/ozone/ozone_impl.hh" using namespace std; using namespace TheISA; @@ -71,8 +67,8 @@ my_hash_t thishash; template <class Impl> BaseDynInst<Impl>::BaseDynInst(ExtMachInst machInst, Addr inst_PC, Addr pred_PC, InstSeqNum seq_num, - FullCPU *cpu) - : staticInst(machInst), traceData(NULL), cpu(cpu)/*, xc(cpu->xcBase())*/ + ImplCPU *cpu) + : staticInst(machInst), traceData(NULL), cpu(cpu) { seqNum = seq_num; @@ -99,37 +95,18 @@ BaseDynInst<Impl>::initVars() memData = NULL; effAddr = 0; physEffAddr = 0; - storeSize = 0; readyRegs = 0; - // May want to turn this into a bit vector or something. - completed = false; - resultReady = false; - canIssue = false; - issued = false; - executed = false; - canCommit = false; - committed = false; - squashed = false; - squashedInIQ = false; - squashedInLSQ = false; - squashedInROB = false; + instResult.integer = 0; + + status.reset(); + eaCalcDone = false; memOpDone = false; + lqIdx = -1; sqIdx = -1; - reachedCommit = false; - - blockingInst = false; - recoverInst = false; - - iqEntry = false; - robEntry = false; - - serializeBefore = false; - serializeAfter = false; - serializeHandled = false; // Eventually make this a parameter. threadNumber = 0; @@ -242,31 +219,7 @@ template <class Impl> void BaseDynInst<Impl>::writeHint(Addr addr, int size, unsigned flags) { - // Need to create a MemReq here so we can do a translation. This - // will casue a TLB miss trap if necessary... not sure whether - // that's the best thing to do or not. We don't really need the - // MemReq otherwise, since wh64 has no functional effect. -/* - MemReqPtr req = new MemReq(addr, thread->getXCProxy(), size, flags); - req->asid = asid; - - fault = cpu->translateDataWriteReq(req); - - if (fault == NoFault && !(req->flags & UNCACHEABLE)) { - // Record key MemReq parameters so we can generate another one - // just like it for the timing access without calling translate() - // again (which might mess up the TLB). - effAddr = req->vaddr; - physEffAddr = req->paddr; - memReqFlags = req->flags; - } else { - // ignore faults & accesses to uncacheable space... treat as no-op - effAddr = physEffAddr = MemReq::inval_addr; - } - - storeSize = size; - storeData = 0; -*/ + // Not currently supported. } /** @@ -276,22 +229,7 @@ template <class Impl> Fault BaseDynInst<Impl>::copySrcTranslate(Addr src) { -/* - MemReqPtr req = new MemReq(src, thread->getXCProxy(), 64); - req->asid = asid; - - // translate to physical address - Fault fault = cpu->translateDataReadReq(req); - - if (fault == NoFault) { - thread->copySrcAddr = src; - thread->copySrcPhysAddr = req->paddr; - } else { - thread->copySrcAddr = 0; - thread->copySrcPhysAddr = 0; - } - return fault; -*/ + // Not currently supported. return NoFault; } @@ -302,26 +240,7 @@ template <class Impl> Fault BaseDynInst<Impl>::copy(Addr dest) { -/* - uint8_t data[64]; - FunctionalMemory *mem = thread->mem; - assert(thread->copySrcPhysAddr); - MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64); - req->asid = asid; - - // translate to physical address - Fault fault = cpu->translateDataWriteReq(req); - - if (fault == NoFault) { - Addr dest_addr = req->paddr; - // Need to read straight from memory since we have more than 8 bytes. - req->paddr = thread->copySrcPhysAddr; - mem->read(req, data); - req->paddr = dest_addr; - mem->write(req, data); - } - return fault; -*/ + // Not currently supported. return NoFault; } @@ -350,7 +269,7 @@ void BaseDynInst<Impl>::markSrcRegReady() { if (++readyRegs == numSrcRegs()) { - canIssue = true; + status.set(CanIssue); } } @@ -358,13 +277,9 @@ template <class Impl> void BaseDynInst<Impl>::markSrcRegReady(RegIndex src_idx) { - ++readyRegs; - _readySrcRegIdx[src_idx] = true; - if (readyRegs == numSrcRegs()) { - canIssue = true; - } + markSrcRegReady(); } template <class Impl> @@ -382,25 +297,3 @@ BaseDynInst<Impl>::eaSrcsReady() return true; } - -// Forward declaration -template class BaseDynInst<AlphaSimpleImpl>; - -template <> -int -BaseDynInst<AlphaSimpleImpl>::instcount = 0; -/* -// Forward declaration -template class BaseDynInst<SimpleImpl>; - -template <> -int -BaseDynInst<SimpleImpl>::instcount = 0; - -// Forward declaration -template class BaseDynInst<OzoneImpl>; - -template <> -int -BaseDynInst<OzoneImpl>::instcount = 0; -*/ diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index ebc02f7be..1540a6b94 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -31,27 +31,17 @@ #include <list> #include <string> -#include "base/refcnt.hh" #include "cpu/base.hh" -#include "cpu/base_dyn_inst.hh" #include "cpu/checker/cpu.hh" #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" #include "cpu/static_inst.hh" +#include "mem/packet_impl.hh" #include "sim/byteswap.hh" -#include "sim/sim_object.hh" -#include "sim/stats.hh" - -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" - -//#include "cpu/ozone/dyn_inst.hh" -//#include "cpu/ozone/ozone_impl.hh" -//#include "cpu/ozone/simple_impl.hh" #if FULL_SYSTEM -#include "sim/system.hh" #include "arch/vtophys.hh" +#include "kern/kernel_stats.hh" #endif // FULL_SYSTEM using namespace std; @@ -77,6 +67,7 @@ CheckerCPU::CheckerCPU(Params *p) changedPC = willChangePC = changedNextPC = false; exitOnError = p->exitOnError; + warnOnlyOnLoadError = p->warnOnlyOnLoadError; #if FULL_SYSTEM itb = p->itb; dtb = p->dtb; @@ -84,6 +75,8 @@ CheckerCPU::CheckerCPU(Params *p) #else process = p->process; #endif + + result.integer = 0; } CheckerCPU::~CheckerCPU() @@ -406,379 +399,10 @@ CheckerCPU::checkFlags(Request *req) } } -template <class DynInstPtr> void -Checker<DynInstPtr>::tick(DynInstPtr &completed_inst) +CheckerCPU::dumpAndExit() { - DynInstPtr inst; - - // Either check this instruction, or add it to a list of - // instructions waiting to be checked. Instructions must be - // checked in program order, so if a store has committed yet not - // completed, there may be some instructions that are waiting - // behind it that have completed and must be checked. - if (!instList.empty()) { - if (youngestSN < completed_inst->seqNum) { - DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", - completed_inst->seqNum, completed_inst->readPC()); - instList.push_back(completed_inst); - youngestSN = completed_inst->seqNum; - } - - if (!instList.front()->isCompleted()) { - return; - } else { - inst = instList.front(); - instList.pop_front(); - } - } else { - if (!completed_inst->isCompleted()) { - if (youngestSN < completed_inst->seqNum) { - DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", - completed_inst->seqNum, completed_inst->readPC()); - instList.push_back(completed_inst); - youngestSN = completed_inst->seqNum; - } - return; - } else { - if (youngestSN < completed_inst->seqNum) { - inst = completed_inst; - youngestSN = completed_inst->seqNum; - } else { - return; - } - } - } - - // Try to check all instructions that are completed, ending if we - // run out of instructions to check or if an instruction is not - // yet completed. - while (1) { - DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n", - inst->seqNum, inst->readPC()); - unverifiedResult.integer = inst->readIntResult(); - unverifiedReq = inst->req; - unverifiedMemData = inst->memData; - numCycles++; - - Fault fault = NoFault; - - // maintain $r0 semantics - thread->setIntReg(ZeroReg, 0); -#ifdef TARGET_ALPHA - thread->setFloatRegDouble(ZeroReg, 0.0); -#endif // TARGET_ALPHA - - // Check if any recent PC changes match up with anything we - // expect to happen. This is mostly to check if traps or - // PC-based events have occurred in both the checker and CPU. - if (changedPC) { - DPRINTF(Checker, "Changed PC recently to %#x\n", - thread->readPC()); - if (willChangePC) { - if (newPC == thread->readPC()) { - DPRINTF(Checker, "Changed PC matches expected PC\n"); - } else { - warn("%lli: Changed PC does not match expected PC, " - "changed: %#x, expected: %#x", - curTick, thread->readPC(), newPC); - handleError(); - } - willChangePC = false; - } - changedPC = false; - } - if (changedNextPC) { - DPRINTF(Checker, "Changed NextPC recently to %#x\n", - thread->readNextPC()); - changedNextPC = false; - } - - // Try to fetch the instruction - -#if FULL_SYSTEM -#define IFETCH_FLAGS(pc) ((pc) & 1) ? PHYSICAL : 0 -#else -#define IFETCH_FLAGS(pc) 0 -#endif - - uint64_t fetch_PC = thread->readPC() & ~3; - - // set up memory request for instruction fetch - memReq = new Request(inst->threadNumber, fetch_PC, - sizeof(uint32_t), - IFETCH_FLAGS(thread->readPC()), - fetch_PC, thread->readCpuId(), inst->threadNumber); - - bool succeeded = translateInstReq(memReq); - - if (!succeeded) { - if (inst->getFault() == NoFault) { - // In this case the instruction was not a dummy - // instruction carrying an ITB fault. In the single - // threaded case the ITB should still be able to - // translate this instruction; in the SMT case it's - // possible that its ITB entry was kicked out. - warn("%lli: Instruction PC %#x was not found in the ITB!", - curTick, thread->readPC()); - handleError(); - - // go to the next instruction - thread->setPC(thread->readNextPC()); - thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); - - return; - } else { - // The instruction is carrying an ITB fault. Handle - // the fault and see if our results match the CPU on - // the next tick(). - fault = inst->getFault(); - } - } - - if (fault == NoFault) { - Packet *pkt = new Packet(memReq, Packet::ReadReq, - Packet::Broadcast); - - pkt->dataStatic(&machInst); - - icachePort->sendFunctional(pkt); - - delete pkt; - - // keep an instruction count - numInst++; - - // decode the instruction - machInst = gtoh(machInst); - // Checks that the instruction matches what we expected it to be. - // Checks both the machine instruction and the PC. - validateInst(inst); - - curStaticInst = StaticInst::decode(makeExtMI(machInst, - thread->readPC())); - -#if FULL_SYSTEM - thread->setInst(machInst); -#endif // FULL_SYSTEM - - fault = inst->getFault(); - } - - // Discard fetch's memReq. - delete memReq; - memReq = NULL; - - // Either the instruction was a fault and we should process the fault, - // or we should just go ahead execute the instruction. This assumes - // that the instruction is properly marked as a fault. - if (fault == NoFault) { - - thread->funcExeInst++; - - fault = curStaticInst->execute(this, NULL); - - // Checks to make sure instrution results are correct. - validateExecution(inst); - - if (curStaticInst->isLoad()) { - ++numLoad; - } - } - - if (fault != NoFault) { -#if FULL_SYSTEM - fault->invoke(tc); - willChangePC = true; - newPC = thread->readPC(); - DPRINTF(Checker, "Fault, PC is now %#x\n", newPC); -#else // !FULL_SYSTEM - fatal("fault (%d) detected @ PC 0x%08p", fault, thread->readPC()); -#endif // FULL_SYSTEM - } else { -#if THE_ISA != MIPS_ISA - // go to the next instruction - thread->setPC(thread->readNextPC()); - thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); -#else - // go to the next instruction - thread->setPC(thread->readNextPC()); - thread->setNextPC(thread->readNextNPC()); - thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); -#endif - - } - -#if FULL_SYSTEM - // @todo: Determine if these should happen only if the - // instruction hasn't faulted. In the SimpleCPU case this may - // not be true, but in the O3 or Ozone case this may be true. - Addr oldpc; - int count = 0; - do { - oldpc = thread->readPC(); - system->pcEventQueue.service(tc); - count++; - } while (oldpc != thread->readPC()); - if (count > 1) { - willChangePC = true; - newPC = thread->readPC(); - DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC); - } -#endif - - // @todo: Optionally can check all registers. (Or just those - // that have been modified). - validateState(); - - if (memReq) { - delete memReq; - memReq = NULL; - } - - // Continue verifying instructions if there's another completed - // instruction waiting to be verified. - if (instList.empty()) { - break; - } else if (instList.front()->isCompleted()) { - inst = instList.front(); - instList.pop_front(); - } else { - break; - } - } + warn("%lli: Checker PC:%#x, next PC:%#x", + curTick, thread->readPC(), thread->readNextPC()); + panic("Checker found an error!"); } - -template <class DynInstPtr> -void -Checker<DynInstPtr>::switchOut(Sampler *s) -{ - instList.clear(); -} - -template <class DynInstPtr> -void -Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU) -{ -} - -template <class DynInstPtr> -void -Checker<DynInstPtr>::validateInst(DynInstPtr &inst) -{ - if (inst->readPC() != thread->readPC()) { - warn("%lli: PCs do not match! Inst: %#x, checker: %#x", - curTick, inst->readPC(), thread->readPC()); - if (changedPC) { - warn("%lli: Changed PCs recently, may not be an error", - curTick); - } else { - handleError(); - } - } - - MachInst mi = static_cast<MachInst>(inst->staticInst->machInst); - - if (mi != machInst) { - warn("%lli: Binary instructions do not match! Inst: %#x, " - "checker: %#x", - curTick, mi, machInst); - handleError(); - } -} - -template <class DynInstPtr> -void -Checker<DynInstPtr>::validateExecution(DynInstPtr &inst) -{ - if (inst->numDestRegs()) { - // @todo: Support more destination registers. - if (inst->isUnverifiable()) { - // Unverifiable instructions assume they were executed - // properly by the CPU. Grab the result from the - // instruction and write it to the register. - RegIndex idx = inst->destRegIdx(0); - if (idx < TheISA::FP_Base_DepTag) { - thread->setIntReg(idx, inst->readIntResult()); - } else if (idx < TheISA::Fpcr_DepTag) { - thread->setFloatRegBits(idx, inst->readIntResult()); - } else { - thread->setMiscReg(idx, inst->readIntResult()); - } - } else if (result.integer != inst->readIntResult()) { - warn("%lli: Instruction results do not match! (Values may not " - "actually be integers) Inst: %#x, checker: %#x", - curTick, inst->readIntResult(), result.integer); - handleError(); - } - } - - if (inst->readNextPC() != thread->readNextPC()) { - warn("%lli: Instruction next PCs do not match! Inst: %#x, " - "checker: %#x", - curTick, inst->readNextPC(), thread->readNextPC()); - handleError(); - } - - // Checking side effect registers can be difficult if they are not - // checked simultaneously with the execution of the instruction. - // This is because other valid instructions may have modified - // these registers in the meantime, and their values are not - // stored within the DynInst. - while (!miscRegIdxs.empty()) { - int misc_reg_idx = miscRegIdxs.front(); - miscRegIdxs.pop(); - - if (inst->tcBase()->readMiscReg(misc_reg_idx) != - thread->readMiscReg(misc_reg_idx)) { - warn("%lli: Misc reg idx %i (side effect) does not match! " - "Inst: %#x, checker: %#x", - curTick, misc_reg_idx, - inst->tcBase()->readMiscReg(misc_reg_idx), - thread->readMiscReg(misc_reg_idx)); - handleError(); - } - } -} - -template <class DynInstPtr> -void -Checker<DynInstPtr>::validateState() -{ -} - -template <class DynInstPtr> -void -Checker<DynInstPtr>::dumpInsts() -{ - int num = 0; - - InstListIt inst_list_it = --(instList.end()); - - cprintf("Inst list size: %i\n", instList.size()); - - while (inst_list_it != instList.end()) - { - cprintf("Instruction:%i\n", - num); - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Completed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isCompleted()); - - cprintf("\n"); - - inst_list_it--; - ++num; - } - -} - -//template -//class Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >; - -template -class Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >; diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index c9986d228..a508c56ba 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -66,7 +66,6 @@ class ThreadContext; class MemInterface; class Checkpoint; class Request; -class Sampler; /** * CheckerCPU class. Dynamically verifies instructions as they are @@ -103,6 +102,7 @@ class CheckerCPU : public BaseCPU Process *process; #endif bool exitOnError; + bool warnOnlyOnLoadError; }; public: @@ -127,6 +127,12 @@ class CheckerCPU : public BaseCPU Port *dcachePort; + virtual Port *getPort(const std::string &name, int idx) + { + panic("Not supported on checker!"); + return NULL; + } + public: // Primary thread being run. SimpleThread *thread; @@ -335,10 +341,13 @@ class CheckerCPU : public BaseCPU void handleError() { if (exitOnError) - panic("Checker found error!"); + dumpAndExit(); } + bool checkFlags(Request *req); + void dumpAndExit(); + ThreadContext *tcBase() { return tc; } SimpleThread *threadBase() { return thread; } @@ -351,6 +360,7 @@ class CheckerCPU : public BaseCPU uint64_t newPC; bool changedNextPC; bool exitOnError; + bool warnOnlyOnLoadError; InstSeqNum youngestSN; }; @@ -369,15 +379,26 @@ class Checker : public CheckerCPU : CheckerCPU(p) { } - void switchOut(Sampler *s); + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); - void tick(DynInstPtr &inst); + void verify(DynInstPtr &inst); void validateInst(DynInstPtr &inst); void validateExecution(DynInstPtr &inst); void validateState(); + void copyResult(DynInstPtr &inst); + + private: + void handleError(DynInstPtr &inst) + { + if (exitOnError) + dumpAndExit(inst); + } + + void dumpAndExit(DynInstPtr &inst); + std::list<DynInstPtr> instList; typedef typename std::list<DynInstPtr>::iterator InstListIt; void dumpInsts(); diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh new file mode 100644 index 000000000..81f97726c --- /dev/null +++ b/src/cpu/checker/cpu_impl.hh @@ -0,0 +1,458 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include <list> +#include <string> + +#include "base/refcnt.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/checker/cpu.hh" +#include "cpu/simple_thread.hh" +#include "cpu/thread_context.hh" +#include "cpu/static_inst.hh" +#include "mem/packet_impl.hh" +#include "sim/byteswap.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#if FULL_SYSTEM +#include "arch/vtophys.hh" +#endif // FULL_SYSTEM + +using namespace std; +//The CheckerCPU does alpha only +using namespace AlphaISA; + +template <class DynInstPtr> +void +Checker<DynInstPtr>::verify(DynInstPtr &completed_inst) +{ + DynInstPtr inst; + + // Either check this instruction, or add it to a list of + // instructions waiting to be checked. Instructions must be + // checked in program order, so if a store has committed yet not + // completed, there may be some instructions that are waiting + // behind it that have completed and must be checked. + if (!instList.empty()) { + if (youngestSN < completed_inst->seqNum) { + DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", + completed_inst->seqNum, completed_inst->readPC()); + instList.push_back(completed_inst); + youngestSN = completed_inst->seqNum; + } + + if (!instList.front()->isCompleted()) { + return; + } else { + inst = instList.front(); + instList.pop_front(); + } + } else { + if (!completed_inst->isCompleted()) { + if (youngestSN < completed_inst->seqNum) { + DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", + completed_inst->seqNum, completed_inst->readPC()); + instList.push_back(completed_inst); + youngestSN = completed_inst->seqNum; + } + return; + } else { + if (youngestSN < completed_inst->seqNum) { + inst = completed_inst; + youngestSN = completed_inst->seqNum; + } else { + return; + } + } + } + + // Try to check all instructions that are completed, ending if we + // run out of instructions to check or if an instruction is not + // yet completed. + while (1) { + DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n", + inst->seqNum, inst->readPC()); + unverifiedResult.integer = inst->readIntResult(); + unverifiedReq = inst->req; + unverifiedMemData = inst->memData; + numCycles++; + + Fault fault = NoFault; + + // maintain $r0 semantics + thread->setIntReg(ZeroReg, 0); +#ifdef TARGET_ALPHA + thread->setFloatRegDouble(ZeroReg, 0.0); +#endif // TARGET_ALPHA + + // Check if any recent PC changes match up with anything we + // expect to happen. This is mostly to check if traps or + // PC-based events have occurred in both the checker and CPU. + if (changedPC) { + DPRINTF(Checker, "Changed PC recently to %#x\n", + thread->readPC()); + if (willChangePC) { + if (newPC == thread->readPC()) { + DPRINTF(Checker, "Changed PC matches expected PC\n"); + } else { + warn("%lli: Changed PC does not match expected PC, " + "changed: %#x, expected: %#x", + curTick, thread->readPC(), newPC); + CheckerCPU::handleError(); + } + willChangePC = false; + } + changedPC = false; + } + if (changedNextPC) { + DPRINTF(Checker, "Changed NextPC recently to %#x\n", + thread->readNextPC()); + changedNextPC = false; + } + + // Try to fetch the instruction + +#if FULL_SYSTEM +#define IFETCH_FLAGS(pc) ((pc) & 1) ? PHYSICAL : 0 +#else +#define IFETCH_FLAGS(pc) 0 +#endif + + uint64_t fetch_PC = thread->readPC() & ~3; + + // set up memory request for instruction fetch + memReq = new Request(inst->threadNumber, fetch_PC, + sizeof(uint32_t), + IFETCH_FLAGS(thread->readPC()), + fetch_PC, thread->readCpuId(), inst->threadNumber); + + bool succeeded = translateInstReq(memReq); + + if (!succeeded) { + if (inst->getFault() == NoFault) { + // In this case the instruction was not a dummy + // instruction carrying an ITB fault. In the single + // threaded case the ITB should still be able to + // translate this instruction; in the SMT case it's + // possible that its ITB entry was kicked out. + warn("%lli: Instruction PC %#x was not found in the ITB!", + curTick, thread->readPC()); + handleError(inst); + + // go to the next instruction + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); + + return; + } else { + // The instruction is carrying an ITB fault. Handle + // the fault and see if our results match the CPU on + // the next tick(). + fault = inst->getFault(); + } + } + + if (fault == NoFault) { + Packet *pkt = new Packet(memReq, Packet::ReadReq, + Packet::Broadcast); + + pkt->dataStatic(&machInst); + + icachePort->sendFunctional(pkt); + + delete pkt; + + // keep an instruction count + numInst++; + + // decode the instruction + machInst = gtoh(machInst); + // Checks that the instruction matches what we expected it to be. + // Checks both the machine instruction and the PC. + validateInst(inst); + + curStaticInst = StaticInst::decode(makeExtMI(machInst, + thread->readPC())); + +#if FULL_SYSTEM + thread->setInst(machInst); +#endif // FULL_SYSTEM + + fault = inst->getFault(); + } + + // Discard fetch's memReq. + delete memReq; + memReq = NULL; + + // Either the instruction was a fault and we should process the fault, + // or we should just go ahead execute the instruction. This assumes + // that the instruction is properly marked as a fault. + if (fault == NoFault) { + + thread->funcExeInst++; + + fault = curStaticInst->execute(this, NULL); + + // Checks to make sure instrution results are correct. + validateExecution(inst); + + if (curStaticInst->isLoad()) { + ++numLoad; + } + } + + if (fault != NoFault) { +#if FULL_SYSTEM + fault->invoke(tc); + willChangePC = true; + newPC = thread->readPC(); + DPRINTF(Checker, "Fault, PC is now %#x\n", newPC); +#endif + } else { +#if THE_ISA != MIPS_ISA + // go to the next instruction + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); +#else + // go to the next instruction + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextNPC()); + thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); +#endif + + } + +#if FULL_SYSTEM + // @todo: Determine if these should happen only if the + // instruction hasn't faulted. In the SimpleCPU case this may + // not be true, but in the O3 or Ozone case this may be true. + Addr oldpc; + int count = 0; + do { + oldpc = thread->readPC(); + system->pcEventQueue.service(tc); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + willChangePC = true; + newPC = thread->readPC(); + DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC); + } +#endif + + // @todo: Optionally can check all registers. (Or just those + // that have been modified). + validateState(); + + if (memReq) { + delete memReq; + memReq = NULL; + } + + // Continue verifying instructions if there's another completed + // instruction waiting to be verified. + if (instList.empty()) { + break; + } else if (instList.front()->isCompleted()) { + inst = instList.front(); + instList.pop_front(); + } else { + break; + } + } +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::switchOut() +{ + instList.clear(); +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU) +{ +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::validateInst(DynInstPtr &inst) +{ + if (inst->readPC() != thread->readPC()) { + warn("%lli: PCs do not match! Inst: %#x, checker: %#x", + curTick, inst->readPC(), thread->readPC()); + if (changedPC) { + warn("%lli: Changed PCs recently, may not be an error", + curTick); + } else { + handleError(inst); + } + } + + MachInst mi = static_cast<MachInst>(inst->staticInst->machInst); + + if (mi != machInst) { + warn("%lli: Binary instructions do not match! Inst: %#x, " + "checker: %#x", + curTick, mi, machInst); + handleError(inst); + } +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::validateExecution(DynInstPtr &inst) +{ + bool result_mismatch = false; + if (inst->numDestRegs()) { + // @todo: Support more destination registers. + if (inst->isUnverifiable()) { + // Unverifiable instructions assume they were executed + // properly by the CPU. Grab the result from the + // instruction and write it to the register. + copyResult(inst); + } else if (result.integer != inst->readIntResult()) { + result_mismatch = true; + } + } + + if (result_mismatch) { + warn("%lli: Instruction results do not match! (Values may not " + "actually be integers) Inst: %#x, checker: %#x", + curTick, inst->readIntResult(), result.integer); + + // It's useful to verify load values from memory, but in MP + // systems the value obtained at execute may be different than + // the value obtained at completion. Similarly DMA can + // present the same problem on even UP systems. Thus there is + // the option to only warn on loads having a result error. + if (inst->isLoad() && warnOnlyOnLoadError) { + copyResult(inst); + } else { + handleError(inst); + } + } + + if (inst->readNextPC() != thread->readNextPC()) { + warn("%lli: Instruction next PCs do not match! Inst: %#x, " + "checker: %#x", + curTick, inst->readNextPC(), thread->readNextPC()); + handleError(inst); + } + + // Checking side effect registers can be difficult if they are not + // checked simultaneously with the execution of the instruction. + // This is because other valid instructions may have modified + // these registers in the meantime, and their values are not + // stored within the DynInst. + while (!miscRegIdxs.empty()) { + int misc_reg_idx = miscRegIdxs.front(); + miscRegIdxs.pop(); + + if (inst->tcBase()->readMiscReg(misc_reg_idx) != + thread->readMiscReg(misc_reg_idx)) { + warn("%lli: Misc reg idx %i (side effect) does not match! " + "Inst: %#x, checker: %#x", + curTick, misc_reg_idx, + inst->tcBase()->readMiscReg(misc_reg_idx), + thread->readMiscReg(misc_reg_idx)); + handleError(inst); + } + } +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::validateState() +{ +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::copyResult(DynInstPtr &inst) +{ + RegIndex idx = inst->destRegIdx(0); + if (idx < TheISA::FP_Base_DepTag) { + thread->setIntReg(idx, inst->readIntResult()); + } else if (idx < TheISA::Fpcr_DepTag) { + thread->setFloatRegBits(idx, inst->readIntResult()); + } else { + thread->setMiscReg(idx, inst->readIntResult()); + } +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::dumpAndExit(DynInstPtr &inst) +{ + cprintf("Error detected, instruction information:\n"); + cprintf("PC:%#x, nextPC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Completed:%i\n", + inst->readPC(), + inst->readNextPC(), + inst->seqNum, + inst->threadNumber, + inst->isCompleted()); + inst->dump(); + CheckerCPU::dumpAndExit(); +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::dumpInsts() +{ + int num = 0; + + InstListIt inst_list_it = --(instList.end()); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Completed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isCompleted()); + + cprintf("\n"); + + inst_list_it--; + ++num; + } + +} diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index c0ac8f01d..c035e92ac 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -120,7 +120,7 @@ class CheckerThreadContext : public ThreadContext void suspend() { actualTC->suspend(); } /// Set the status to Unallocated. - void deallocate() { actualTC->deallocate(); } + void deallocate(int delay = 0) { actualTC->deallocate(delay); } /// Set the status to Halted. void halt() { actualTC->halt(); } diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py index 1a9724ca6..5b0c6c4da 100644 --- a/src/cpu/cpu_models.py +++ b/src/cpu/cpu_models.py @@ -26,6 +26,10 @@ # # Authors: Steve Reinhardt +import os +import os.path +import sys + ################ # CpuModel class # @@ -47,7 +51,6 @@ class CpuModel: # Add self to dict CpuModel.dict[name] = self - # # Define CPU models. # @@ -67,9 +70,6 @@ CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc', CpuModel('FullCPU', 'full_cpu_exec.cc', '#include "encumbered/cpu/full/dyn_inst.hh"', { 'CPU_exec_context': 'DynInst' }) -CpuModel('AlphaFullCPU', 'alpha_o3_exec.cc', - '#include "cpu/o3/alpha_dyn_inst.hh"', - { 'CPU_exec_context': 'AlphaDynInst<AlphaSimpleImpl>' }) CpuModel('OzoneSimpleCPU', 'ozone_simple_exec.cc', '#include "cpu/ozone/dyn_inst.hh"', { 'CPU_exec_context': 'OzoneDynInst<SimpleImpl>' }) @@ -79,4 +79,6 @@ CpuModel('OzoneCPU', 'ozone_exec.cc', CpuModel('CheckerCPU', 'checker_cpu_exec.cc', '#include "cpu/checker/cpu.hh"', { 'CPU_exec_context': 'CheckerCPU' }) - +CpuModel('O3CPU', 'o3_cpu_exec.cc', + '#include "cpu/o3/isa_specific.hh"', + { 'CPU_exec_context': 'O3DynInst' }) diff --git a/src/cpu/cpuevent.hh b/src/cpu/cpuevent.hh index 11ac7aafb..9dfae27cf 100644 --- a/src/cpu/cpuevent.hh +++ b/src/cpu/cpuevent.hh @@ -36,7 +36,7 @@ class ThreadContext; -/** This class creates a global list of events than need a pointer to an +/** This class creates a global list of events that need a pointer to a * thread context. When a switchover takes place the events can be migrated * to the new thread context, otherwise you could have a wake timer interrupt * go off on a switched out cpu or other unfortunate events. This object MUST be diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh new file mode 100644 index 000000000..f6e8d7c25 --- /dev/null +++ b/src/cpu/exec_context.hh @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#error "Cannot include this file" + +/** + * The ExecContext is not a usable class. It is simply here for + * documentation purposes. It shows the interface that is used by the + * ISA to access and change CPU state. + */ +class ExecContext { + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to reduce overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + /** Reads an integer register. */ + uint64_t readIntReg(const StaticInst *si, int idx); + + /** Reads a floating point register of a specific width. */ + FloatReg readFloatReg(const StaticInst *si, int idx, int width); + + /** Reads a floating point register of single register width. */ + FloatReg readFloatReg(const StaticInst *si, int idx); + + /** Reads a floating point register of a specific width in its + * binary format, instead of by value. */ + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width); + + /** Reads a floating point register in its binary format, instead + * of by value. */ + FloatRegBits readFloatRegBits(const StaticInst *si, int idx); + + /** Sets an integer register to a value. */ + void setIntReg(const StaticInst *si, int idx, uint64_t val); + + /** Sets a floating point register of a specific width to a value. */ + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width); + + /** Sets a floating point register of single width to a value. */ + void setFloatReg(const StaticInst *si, int idx, FloatReg val); + + /** Sets the bits of a floating point register of a specific width + * to a binary value. */ + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width); + + /** Sets the bits of a floating point register of single width + * to a binary value. */ + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val); + + /** Reads the PC. */ + uint64_t readPC(); + /** Reads the NextPC. */ + uint64_t readNextPC(); + /** Reads the Next-NextPC. Only for architectures like SPARC or MIPS. */ + uint64_t readNextNPC(); + + /** Sets the PC. */ + void setPC(uint64_t val); + /** Sets the NextPC. */ + void setNextPC(uint64_t val); + /** Sets the Next-NextPC. Only for architectures like SPARC or MIPS. */ + void setNextNPC(uint64_t val); + + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg); + + /** Reads a miscellaneous register, handling any architectural + * side effects due to reading that register. */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val); + + /** Sets a miscellaneous register, handling any architectural + * side effects due to writing that register. */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + + /** Records the effective address of the instruction. Only valid + * for memory ops. */ + void setEA(Addr EA); + /** Returns the effective address of the instruction. Only valid + * for memory ops. */ + Addr getEA(); + + /** Returns a pointer to the ThreadContext. */ + ThreadContext *tcBase(); + + /** Reads an address, creating a memory request with the given + * flags. Stores result of read in data. */ + template <class T> + Fault read(Addr addr, T &data, unsigned flags); + + /** Writes to an address, creating a memory request with the given + * flags. Writes data to memory. For store conditionals, returns + * the result of the store in res. */ + template <class T> + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + + /** Prefetches an address, creating a memory request with the + * given flags. */ + void prefetch(Addr addr, unsigned flags); + + /** Hints to the memory system that an address will be written to + * soon, with the given size. Creates a memory request with the + * given flags. */ + void writeHint(Addr addr, int size, unsigned flags); + +#if FULL_SYSTEM + /** Somewhat Alpha-specific function that handles returning from + * an error or interrupt. */ + Fault hwrei(); + /** Reads the interrupt flags. */ + int readIntrFlag(); + /** Sets the interrupt flags to a value. */ + void setIntrFlag(int val); + + /** + * Check for special simulator handling of specific PAL calls. If + * return value is false, actual PAL call will be suppressed. + */ + bool simPalCheck(int palFunc); +#else + /** Executes a syscall specified by the callnum. */ + void syscall(int64_t callnum); +#endif +}; diff --git a/src/cpu/func_unit.cc b/src/cpu/func_unit.cc new file mode 100644 index 000000000..c20578a43 --- /dev/null +++ b/src/cpu/func_unit.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2002-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Raasch + */ + +#include <sstream> + +#include "base/misc.hh" +#include "cpu/func_unit.hh" +#include "sim/builder.hh" + +using namespace std; + + +//////////////////////////////////////////////////////////////////////////// +// +// The funciton unit +// +FuncUnit::FuncUnit() +{ + capabilityList.reset(); +} + + +// Copy constructor +FuncUnit::FuncUnit(const FuncUnit &fu) +{ + + for (int i = 0; i < Num_OpClasses; ++i) { + opLatencies[i] = fu.opLatencies[i]; + issueLatencies[i] = fu.issueLatencies[i]; + } + + capabilityList = fu.capabilityList; +} + + +void +FuncUnit::addCapability(OpClass cap, unsigned oplat, unsigned issuelat) +{ + if (issuelat == 0 || oplat == 0) + panic("FuncUnit: you don't really want a zero-cycle latency do you?"); + + capabilityList.set(cap); + + opLatencies[cap] = oplat; + issueLatencies[cap] = issuelat; +} + +bool +FuncUnit::provides(OpClass capability) +{ + return capabilityList[capability]; +} + +bitset<Num_OpClasses> +FuncUnit::capabilities() +{ + return capabilityList; +} + +unsigned & +FuncUnit::opLatency(OpClass cap) +{ + return opLatencies[cap]; +} + +unsigned +FuncUnit::issueLatency(OpClass capability) +{ + return issueLatencies[capability]; +} + +//////////////////////////////////////////////////////////////////////////// +// +// The SimObjects we use to get the FU information into the simulator +// +//////////////////////////////////////////////////////////////////////////// + +// +// We use 2 objects to specify this data in the INI file: +// (1) OpDesc - Describes the operation class & latencies +// (multiple OpDesc objects can refer to the same +// operation classes) +// (2) FUDesc - Describes the operations available in the unit & +// the number of these units +// +// + + +// +// The operation-class description object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(OpDesc) + + SimpleEnumParam<OpClass> opClass; + Param<unsigned> opLat; + Param<unsigned> issueLat; + +END_DECLARE_SIM_OBJECT_PARAMS(OpDesc) + +BEGIN_INIT_SIM_OBJECT_PARAMS(OpDesc) + + INIT_ENUM_PARAM(opClass, "type of operation", opClassStrings), + INIT_PARAM(opLat, "cycles until result is available"), + INIT_PARAM(issueLat, "cycles until another can be issued") + +END_INIT_SIM_OBJECT_PARAMS(OpDesc) + + +CREATE_SIM_OBJECT(OpDesc) +{ + return new OpDesc(getInstanceName(), opClass, opLat, issueLat); +} + +REGISTER_SIM_OBJECT("OpDesc", OpDesc) + + +// +// The FuDesc object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUDesc) + + SimObjectVectorParam<OpDesc *> opList; + Param<unsigned> count; + +END_DECLARE_SIM_OBJECT_PARAMS(FUDesc) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(FUDesc) + + INIT_PARAM(opList, "list of operation classes for this FU type"), + INIT_PARAM(count, "number of these FU's available") + +END_INIT_SIM_OBJECT_PARAMS(FUDesc) + + +CREATE_SIM_OBJECT(FUDesc) +{ + return new FUDesc(getInstanceName(), opList, count); +} + +REGISTER_SIM_OBJECT("FUDesc", FUDesc) + diff --git a/src/cpu/func_unit.hh b/src/cpu/func_unit.hh new file mode 100644 index 000000000..780143096 --- /dev/null +++ b/src/cpu/func_unit.hh @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2002-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Raasch + */ + +#ifndef __CPU_FUNC_UNIT_HH__ +#define __CPU_FUNC_UNIT_HH__ + +#include <bitset> +#include <string> +#include <vector> + +#include "cpu/op_class.hh" +#include "sim/sim_object.hh" + +//////////////////////////////////////////////////////////////////////////// +// +// Structures used ONLY during the initialization phase... +// +// +// + +struct OpDesc : public SimObject +{ + OpClass opClass; + unsigned opLat; + unsigned issueLat; + + OpDesc(std::string name, OpClass c, unsigned o, unsigned i) + : SimObject(name), opClass(c), opLat(o), issueLat(i) {}; +}; + +struct FUDesc : public SimObject +{ + std::vector<OpDesc *> opDescList; + unsigned number; + + FUDesc(std::string name, std::vector<OpDesc *> l, unsigned n) + : SimObject(name), opDescList(l), number(n) {}; +}; + +typedef std::vector<OpDesc *>::iterator OPDDiterator; +typedef std::vector<FUDesc *>::iterator FUDDiterator; + + + + +//////////////////////////////////////////////////////////////////////////// +// +// The actual FU object +// +// +// +class FuncUnit +{ + private: + unsigned opLatencies[Num_OpClasses]; + unsigned issueLatencies[Num_OpClasses]; + std::bitset<Num_OpClasses> capabilityList; + + public: + FuncUnit(); + FuncUnit(const FuncUnit &fu); + + std::string name; + + void addCapability(OpClass cap, unsigned oplat, unsigned issuelat); + + bool provides(OpClass capability); + std::bitset<Num_OpClasses> capabilities(); + + unsigned &opLatency(OpClass capability); + unsigned issueLatency(OpClass capability); +}; + +#endif // __FU_POOL_HH__ diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript new file mode 100755 index 000000000..e65d41411 --- /dev/null +++ b/src/cpu/o3/SConscript @@ -0,0 +1,79 @@ +# -*- mode:python -*- + +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Korey Sewell + +import os +import os.path +import sys + +# Import build environment variable from SConstruct. +Import('env') + + +################################################################# +# +# Include ISA-specific files for the O3 CPU-model +# +################################################################# + +sources = [] + +if env['TARGET_ISA'] == 'alpha': + sources += Split(''' + alpha/dyn_inst.cc + alpha/cpu.cc + alpha/thread_context.cc + alpha/cpu_builder.cc + ''') +elif env['TARGET_ISA'] == 'mips': + sys.exit('O3 CPU does not support MIPS') + #sources += Split(''' + # mips/dyn_inst.cc + # mips/cpu.cc + # mips/thread_context.cc + # mips/cpu_builder.cc + # ''') +elif env['TARGET_ISA'] == 'sparc': + sys.exit('O3 CPU does not support MIPS') + #sources += Split(''' + # sparc/dyn_inst.cc + # sparc/cpu.cc + # sparc/thread_context.cc + # sparc/cpu_builder.cc + # ''') +else: + sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA']) + + +# Convert file names to SCons File objects. This takes care of the +# path relative to the top of the directory tree. +sources = [File(s) for s in sources] + +Return('sources') + diff --git a/src/cpu/o3/alpha_cpu.cc b/src/cpu/o3/alpha/cpu.cc index 39cae696b..ed10b2fd1 100644 --- a/src/cpu/o3/alpha_cpu.cc +++ b/src/cpu/o3/alpha/cpu.cc @@ -28,11 +28,11 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_cpu_impl.hh" -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha/impl.hh" +#include "cpu/o3/alpha/cpu_impl.hh" +#include "cpu/o3/alpha/dyn_inst.hh" -// Force instantiation of AlphaFullCPU for all the implemntations that are +// Force instantiation of AlphaO3CPU for all the implemntations that are // needed. Consider merging this and alpha_dyn_inst.cc, and maybe all // classes that depend on a certain impl, into one file (alpha_impl.cc?). -template class AlphaFullCPU<AlphaSimpleImpl>; +template class AlphaO3CPU<AlphaSimpleImpl>; diff --git a/src/cpu/o3/alpha/cpu.hh b/src/cpu/o3/alpha/cpu.hh new file mode 100644 index 000000000..b961341d5 --- /dev/null +++ b/src/cpu/o3/alpha/cpu.hh @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_ALPHA_CPU_HH__ +#define __CPU_O3_ALPHA_CPU_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/thread_context.hh" +#include "cpu/o3/cpu.hh" +#include "sim/byteswap.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * AlphaO3CPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ +template <class Impl> +class AlphaO3CPU : public FullO3CPU<Impl> +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: + typedef O3ThreadState<Impl> ImplState; + typedef O3ThreadState<Impl> Thread; + typedef typename Impl::Params Params; + + /** Constructs an AlphaO3CPU with the given parameters. */ + AlphaO3CPU(Params *params); + +#if FULL_SYSTEM + /** ITB pointer. */ + AlphaITB *itb; + /** DTB pointer. */ + AlphaDTB *dtb; +#endif + + /** Registers statistics. */ + void regStats(); + +#if FULL_SYSTEM + /** Translates instruction requestion. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return itb->translate(req, thread->getTC()); + } + + /** Translates data read request. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return dtb->translate(req, thread->getTC(), false); + } + + /** Translates data write request. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return dtb->translate(req, thread->getTC(), true); + } + +#else + /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + +#endif + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg, unsigned tid); + + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); + + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); + + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the TC. + */ + void squashFromTC(unsigned tid); + +#if FULL_SYSTEM + /** Posts an interrupt. */ + void post_interrupt(int int_num, int index); + /** Reads the interrupt flag. */ + int readIntrFlag(); + /** Sets the interrupt flags. */ + void setIntrFlag(int val); + /** HW return from error interrupt. */ + Fault hwrei(unsigned tid); + /** Returns if a specific PC is a PAL mode PC. */ + bool inPalMode(uint64_t PC) + { return AlphaISA::PcPAL(PC); } + + bool simPalCheck(int palFunc, unsigned tid); + + /** Processes any interrupts. */ + void processInterrupts(); + + /** Halts the CPU. */ + void halt() { panic("Halt not implemented!\n"); } +#endif + + /** Traps to handle given fault. */ + void trap(Fault fault, unsigned tid); + +#if !FULL_SYSTEM + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int64_t callnum, int tid); + /** Gets a syscall argument. */ + IntReg getSyscallArg(int i, int tid); + + /** Used to shift args for indirect syscall. */ + void setSyscallArg(int i, IntReg val, int tid); + + /** Sets the return value of a syscall. */ + void setSyscallReturn(SyscallReturn return_value, int tid); +#endif + + /** CPU read function, forwards read to LSQ. */ + template <class T> + Fault read(RequestPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + /** CPU write function, forwards write to LSQ. */ + template <class T> + Fault write(RequestPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + + Addr lockAddr; + + /** Temporary fix for the lock flag, works in the UP case. */ + bool lockFlag; +}; + +#endif // __CPU_O3_ALPHA_CPU_HH__ diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 828977ccb..5e767655d 100644 --- a/src/cpu/o3/alpha_cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -31,21 +31,21 @@ #include <string> #include "cpu/base.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/impl.hh" +#include "cpu/o3/alpha/params.hh" #include "cpu/o3/fu_pool.hh" #include "sim/builder.hh" -class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl> +class DerivO3CPU : public AlphaO3CPU<AlphaSimpleImpl> { public: - DerivAlphaFullCPU(AlphaSimpleParams *p) - : AlphaFullCPU<AlphaSimpleImpl>(p) + DerivO3CPU(AlphaSimpleParams *p) + : AlphaO3CPU<AlphaSimpleImpl>(p) { } }; -BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param<int> clock; Param<int> numThreads; @@ -91,12 +91,10 @@ Param<unsigned> renameWidth; Param<unsigned> commitToIEWDelay; Param<unsigned> renameToIEWDelay; Param<unsigned> issueToExecuteDelay; +Param<unsigned> dispatchWidth; Param<unsigned> issueWidth; -Param<unsigned> executeWidth; -Param<unsigned> executeIntWidth; -Param<unsigned> executeFloatWidth; -Param<unsigned> executeBranchWidth; -Param<unsigned> executeMemoryWidth; +Param<unsigned> wbWidth; +Param<unsigned> wbDepth; SimObjectParam<FUPool *> fuPool; Param<unsigned> iewToCommitDelay; @@ -104,7 +102,9 @@ Param<unsigned> renameToROBDelay; Param<unsigned> commitWidth; Param<unsigned> squashWidth; Param<Tick> trapLatency; -Param<Tick> fetchTrapLatency; + +Param<unsigned> backComSize; +Param<unsigned> forwardComSize; Param<std::string> predType; Param<unsigned> localPredictorSize; @@ -149,9 +149,9 @@ Param<bool> defer_registration; Param<bool> function_trace; Param<Tick> function_trace_start; -END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) -BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), INIT_PARAM(numThreads, "number of HW thread contexts"), @@ -212,12 +212,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) "Issue/Execute/Writeback delay"), INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -226,7 +224,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(commitWidth, "Commit width"), INIT_PARAM(squashWidth, "Squash width"), INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), - INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), + + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), @@ -271,11 +271,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") -END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) -CREATE_SIM_OBJECT(DerivAlphaFullCPU) +CREATE_SIM_OBJECT(DerivO3CPU) { - DerivAlphaFullCPU *cpu; + DerivO3CPU *cpu; #if FULL_SYSTEM // Full-system only supports a single thread for the moment. @@ -284,12 +284,12 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) // In non-full-system mode, we infer the number of threads from // the workload if it's not explicitly specified. int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); + (numThreads.isValid() && numThreads >= workload.size()) ? + numThreads : workload.size(); if (workload.size() == 0) { fatal("Must specify at least one workload!"); } - #endif AlphaSimpleParams *params = new AlphaSimpleParams; @@ -343,12 +343,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->commitToIEWDelay = commitToIEWDelay; params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; @@ -356,7 +354,9 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->commitWidth = commitWidth; params->squashWidth = squashWidth; params->trapLatency = trapLatency; - params->fetchTrapLatency = fetchTrapLatency; + + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; params->predType = predType; params->localPredictorSize = localPredictorSize; @@ -386,7 +386,16 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->numROBEntries = numROBEntries; params->smtNumFetchingThreads = smtNumFetchingThreads; - params->smtFetchPolicy = smtFetchPolicy; + + // Default smtFetchPolicy to "RoundRobin", if necessary. + std::string round_robin_policy = "RoundRobin"; + std::string single_thread = "SingleThread"; + + if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) + params->smtFetchPolicy = round_robin_policy; + else + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; params->smtLSQPolicy = smtLSQPolicy; params->smtLSQThreshold = smtLSQThreshold; @@ -401,10 +410,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; - cpu = new DerivAlphaFullCPU(params); + cpu = new DerivO3CPU(params); return cpu; } -REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU) +REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU) diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh new file mode 100644 index 000000000..0473e60c2 --- /dev/null +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "config/use_checker.hh" + +#include "arch/alpha/faults.hh" +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/checker/thread_context.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/params.hh" +#include "cpu/o3/alpha/thread_context.hh" +#include "cpu/o3/comm.hh" +#include "cpu/o3/thread_state.hh" + +#if FULL_SYSTEM +#include "arch/alpha/osfpal.hh" +#include "arch/isa_traits.hh" +#include "cpu/quiesce_event.hh" +#include "kern/kernel_stats.hh" +#include "sim/sim_exit.hh" +#include "sim/system.hh" +#endif + +using namespace TheISA; + +template <class Impl> +AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) +#if FULL_SYSTEM + : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb) +#else + : FullO3CPU<Impl>(params) +#endif +{ + DPRINTF(O3CPU, "Creating AlphaO3CPU object.\n"); + + // Setup any thread state. + this->thread.resize(this->numThreads); + + for (int i = 0; i < this->numThreads; ++i) { +#if FULL_SYSTEM + // SMT is not supported in FS mode yet. + assert(this->numThreads == 1); + this->thread[i] = new Thread(this, 0); + this->thread[i]->setStatus(ThreadContext::Suspended); +#else + if (i < params->workload.size()) { + DPRINTF(O3CPU, "Workload[%i] process is %#x", + i, this->thread[i]); + this->thread[i] = new Thread(this, i, params->workload[i], + i, params->mem); + + this->thread[i]->setStatus(ThreadContext::Suspended); + +#if !FULL_SYSTEM + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + TranslatingPort *trans_port; + trans_port = new TranslatingPort(csprintf("%s-%d-funcport", + name(), i), + params->workload[i]->pTable, + false); + mem_port = params->mem->getPort("functional"); + mem_port->setPeer(trans_port); + trans_port->setPeer(mem_port); + this->thread[i]->setMemPort(trans_port); +#endif + //usedTids[i] = true; + //threadMap[i] = i; + } else { + //Allocate Empty thread so M5 can use later + //when scheduling threads to CPU + Process* dummy_proc = NULL; + + this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); + //usedTids[i] = false; + } +#endif // !FULL_SYSTEM + + ThreadContext *tc; + + // Setup the TC that will serve as the interface to the threads/CPU. + AlphaTC<Impl> *alpha_tc = + new AlphaTC<Impl>; + + tc = alpha_tc; + + // If we're using a checker, then the TC should be the + // CheckerThreadContext. +#if USE_CHECKER + if (params->checker) { + tc = new CheckerThreadContext<AlphaTC<Impl> >( + alpha_tc, this->checker); + } +#endif + + alpha_tc->cpu = this; + alpha_tc->thread = this->thread[i]; + +#if FULL_SYSTEM + // Setup quiesce event. + this->thread[i]->quiesceEvent = new EndQuiesceEvent(tc); + + Port *mem_port; + FunctionalPort *phys_port; + VirtualPort *virt_port; + phys_port = new FunctionalPort(csprintf("%s-%d-funcport", + name(), i)); + mem_port = this->system->physmem->getPort("functional"); + mem_port->setPeer(phys_port); + phys_port->setPeer(mem_port); + + virt_port = new VirtualPort(csprintf("%s-%d-vport", + name(), i)); + mem_port = this->system->physmem->getPort("functional"); + mem_port->setPeer(virt_port); + virt_port->setPeer(mem_port); + + this->thread[i]->setPhysPort(phys_port); + this->thread[i]->setVirtPort(virt_port); +#endif + // Give the thread the TC. + this->thread[i]->tc = tc; + + // Add the TC to the CPU's list of TC's. + this->threadContexts.push_back(tc); + } + + for (int i=0; i < this->numThreads; i++) { + this->thread[i]->setFuncExeInst(0); + } + + // Sets CPU pointers. These must be set at this level because the CPU + // pointers are defined to be the highest level of CPU class. + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); + + this->rob.setCPU(this); + this->regFile.setCPU(this); + + lockAddr = 0; + lockFlag = false; +} + +template <class Impl> +void +AlphaO3CPU<Impl>::regStats() +{ + // Register stats for everything that has stats. + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); +} + + +template <class Impl> +MiscReg +AlphaO3CPU<Impl>::readMiscReg(int misc_reg, unsigned tid) +{ + return this->regFile.readMiscReg(misc_reg, tid); +} + +template <class Impl> +MiscReg +AlphaO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault, + unsigned tid) +{ + return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); +} + +template <class Impl> +Fault +AlphaO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) +{ + return this->regFile.setMiscReg(misc_reg, val, tid); +} + +template <class Impl> +Fault +AlphaO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val, + unsigned tid) +{ + return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::squashFromTC(unsigned tid) +{ + this->thread[tid]->inSyscall = true; + this->commit.generateTCEvent(tid); +} + +#if FULL_SYSTEM + +template <class Impl> +void +AlphaO3CPU<Impl>::post_interrupt(int int_num, int index) +{ + BaseCPU::post_interrupt(int_num, index); + + if (this->thread[0]->status() == ThreadContext::Suspended) { + DPRINTF(IPI,"Suspended Processor awoke\n"); + this->threadContexts[0]->activate(); + } +} + +template <class Impl> +int +AlphaO3CPU<Impl>::readIntrFlag() +{ + return this->regFile.readIntrFlag(); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::setIntrFlag(int val) +{ + this->regFile.setIntrFlag(val); +} + +template <class Impl> +Fault +AlphaO3CPU<Impl>::hwrei(unsigned tid) +{ + // Need to clear the lock flag upon returning from an interrupt. + this->lockFlag = false; + + this->thread[tid]->kernelStats->hwrei(); + + this->checkInterrupts = true; + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template <class Impl> +bool +AlphaO3CPU<Impl>::simPalCheck(int palFunc, unsigned tid) +{ + if (this->thread[tid]->kernelStats) + this->thread[tid]->kernelStats->callpal(palFunc, + this->threadContexts[tid]); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + exitSimLoop("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (this->system->breakpoint()) + return false; + break; + } + + return true; +} + +template <class Impl> +void +AlphaO3CPU<Impl>::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that + // exists within isa_fullsys_traits.hh. Also assume that thread 0 + // is the one that handles the interrupts. + // @todo: Possibly consolidate the interrupt checking code. + // @todo: Allow other threads to handle interrupts. + + // Check if there are any outstanding interrupts + //Handle the interrupts + int ipl = 0; + int summary = 0; + + this->checkInterrupts = false; + + if (this->readMiscReg(IPR_ASTRR, 0)) + panic("asynchronous traps not implemented\n"); + + if (this->readMiscReg(IPR_SIRR, 0)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (this->readMiscReg(IPR_SIRR, 0) & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + uint64_t interrupts = this->intr_status(); + + if (interrupts) { + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + } + + if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) { + this->setMiscReg(IPR_ISR, summary, 0); + this->setMiscReg(IPR_INTID, ipl, 0); + // Checker needs to know these two registers were updated. +#if USE_CHECKER + if (this->checker) { + this->checker->threadBase()->setMiscReg(IPR_ISR, summary); + this->checker->threadBase()->setMiscReg(IPR_INTID, ipl); + } +#endif + this->trap(Fault(new InterruptFault), 0); + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + this->readMiscReg(IPR_IPLR, 0), ipl, summary); + } +} + +#endif // FULL_SYSTEM + +template <class Impl> +void +AlphaO3CPU<Impl>::trap(Fault fault, unsigned tid) +{ + // Pass the thread's TC into the invoke method. + fault->invoke(this->threadContexts[tid]); +} + +#if !FULL_SYSTEM + +template <class Impl> +void +AlphaO3CPU<Impl>::syscall(int64_t callnum, int tid) +{ + DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); + + DPRINTF(Activity,"Activity: syscall() called.\n"); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->thread[tid]->funcExeInst); + + // Execute the actual syscall. + this->thread[tid]->syscall(callnum); + + // Decrease funcExeInst by one as the normal commit will handle + // incrementing it. + --(this->thread[tid]->funcExeInst); +} + +template <class Impl> +TheISA::IntReg +AlphaO3CPU<Impl>::getSyscallArg(int i, int tid) +{ + return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid) +{ + this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid) +{ + // check for error condition. Alpha syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + if (return_value.successful()) { + // no error + this->setArchIntReg(SyscallSuccessReg, 0, tid); + this->setArchIntReg(ReturnValueReg, return_value.value(), tid); + } else { + // got an error, return details + this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); + this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); + } +} +#endif diff --git a/src/cpu/o3/alpha_dyn_inst.cc b/src/cpu/o3/alpha/dyn_inst.cc index 0c1723eec..97d2f3d08 100644 --- a/src/cpu/o3/alpha_dyn_inst.cc +++ b/src/cpu/o3/alpha/dyn_inst.cc @@ -28,8 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst_impl.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/dyn_inst_impl.hh" +#include "cpu/o3/alpha/impl.hh" // Force instantiation of AlphaDynInst for all the implementations that // are needed. diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh index 36a08c4a7..9dee610b6 100644 --- a/src/cpu/o3/alpha_dyn_inst.hh +++ b/src/cpu/o3/alpha/dyn_inst.hh @@ -34,8 +34,8 @@ #include "arch/isa_traits.hh" #include "cpu/base_dyn_inst.hh" #include "cpu/inst_seq.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/impl.hh" class Packet; @@ -51,7 +51,7 @@ class AlphaDynInst : public BaseDynInst<Impl> { public: /** Typedef for the CPU. */ - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; /** Binary machine instruction type. */ typedef TheISA::MachInst MachInst; @@ -74,7 +74,7 @@ class AlphaDynInst : public BaseDynInst<Impl> public: /** BaseDynInst constructor given a binary instruction. */ AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, - FullCPU *cpu); + O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ AlphaDynInst(StaticInstPtr &_staticInst); diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh index a73cf4a7d..2d1b4b309 100644 --- a/src/cpu/o3/alpha_dyn_inst_impl.hh +++ b/src/cpu/o3/alpha/dyn_inst_impl.hh @@ -28,11 +28,11 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha/dyn_inst.hh" template <class Impl> AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu) + InstSeqNum seq_num, O3CPU *cpu) : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu) { initVars(); @@ -102,15 +102,7 @@ template <class Impl> Fault AlphaDynInst<Impl>::completeAcc(Packet *pkt) { - if (this->isLoad()) { - this->fault = this->staticInst->completeAcc(pkt, this, - this->traceData); - } else if (this->isStore()) { - this->fault = this->staticInst->completeAcc(pkt, this, - this->traceData); - } else { - panic("Unknown type!"); - } + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); return this->fault; } diff --git a/src/cpu/o3/alpha_impl.hh b/src/cpu/o3/alpha/impl.hh index 52f7c2394..b928ae654 100644 --- a/src/cpu/o3/alpha_impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -33,20 +33,21 @@ #include "arch/alpha/isa_traits.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/params.hh" #include "cpu/o3/cpu_policy.hh" + // Forward declarations. template <class Impl> class AlphaDynInst; template <class Impl> -class AlphaFullCPU; +class AlphaO3CPU; /** Implementation specific struct that defines several key types to the * CPU, the stages within the CPU, the time buffers, and the DynInst. * The struct defines the ISA, the CPU policy, the specific DynInst, the - * specific FullCPU, and all of the structs from the time buffers to do + * specific O3CPU, and all of the structs from the time buffers to do * communication. * This is one of the key things that must be defined for each hardware * specific CPU implementation. @@ -67,8 +68,14 @@ struct AlphaSimpleImpl */ typedef RefCountingPtr<DynInst> DynInstPtr; - /** The FullCPU type to be used. */ - typedef AlphaFullCPU<AlphaSimpleImpl> FullCPU; + /** The O3CPU type to be used. */ + typedef AlphaO3CPU<AlphaSimpleImpl> O3CPU; + + /** Same typedef, but for CPUType. BaseDynInst may not always use + * an O3 CPU, so it's clearer to call it CPUType instead in that + * case. + */ + typedef O3CPU CPUType; /** The Params to be passed to each stage. */ typedef AlphaSimpleParams Params; @@ -79,4 +86,7 @@ struct AlphaSimpleImpl }; }; +/** The O3Impl to be used. */ +typedef AlphaSimpleImpl O3CPUImpl; + #endif // __CPU_O3_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh new file mode 100644 index 000000000..c618cee08 --- /dev/null +++ b/src/cpu/o3/alpha/params.hh @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_ALPHA_PARAMS_HH__ +#define __CPU_O3_ALPHA_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" +#include "cpu/o3/params.hh" + +//Forward declarations +class AlphaDTB; +class AlphaITB; +class MemObject; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the AlphaO3CPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams : public O3Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#endif +}; + +#endif // __CPU_O3_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/alpha/thread_context.cc b/src/cpu/o3/alpha/thread_context.cc new file mode 100755 index 000000000..4a02715bc --- /dev/null +++ b/src/cpu/o3/alpha/thread_context.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/o3/thread_context_impl.hh" + +template class O3ThreadContext<AlphaSimpleImpl>; + diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh new file mode 100644 index 000000000..ad52b0d2e --- /dev/null +++ b/src/cpu/o3/alpha/thread_context.hh @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" + +template <class Impl> +class AlphaTC : public O3ThreadContext<Impl> +{ + public: +#if FULL_SYSTEM + /** Returns a pointer to the ITB. */ + virtual AlphaITB *getITBPtr() { return this->cpu->itb; } + + /** Returns a pointer to the DTB. */ + virtual AlphaDTB *getDTBPtr() { return this->cpu->dtb; } + + /** Returns pointer to the quiesce event. */ + virtual EndQuiesceEvent *getQuiesceEvent() + { + return this->thread->quiesceEvent; + } + + /** Returns if the thread is currently in PAL mode, based on + * the PC's value. */ + virtual bool inPalMode() + { return TheISA::PcPAL(this->cpu->readPC(this->thread->readTid())); } +#endif + + virtual uint64_t readNextNPC() + { + panic("Alpha has no NextNPC!"); + return 0; + } + + virtual void setNextNPC(uint64_t val) + { + panic("Alpha has no NextNPC!"); + } + + virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Alpha!"); } + + + /** This function exits the thread context in the CPU and returns + * 1 if the CPU has no more active threads (meaning it's OK to exit); + * Used in syscall-emulation mode when a thread executes the 'exit' + * syscall. + */ + virtual int exit() + { + this->deallocate(); + + // If there are still threads executing in the system + if (this->cpu->numActiveThreads()) + return 0; // don't exit simulation + else + return 1; // exit simulation + } +}; diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh deleted file mode 100644 index f81837f3c..000000000 --- a/src/cpu/o3/alpha_cpu.hh +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - */ - -#ifndef __CPU_O3_ALPHA_FULL_CPU_HH__ -#define __CPU_O3_ALPHA_FULL_CPU_HH__ - -#include "arch/isa_traits.hh" -#include "cpu/thread_context.hh" -#include "cpu/o3/cpu.hh" -#include "sim/byteswap.hh" - -class EndQuiesceEvent; -namespace Kernel { - class Statistics; -}; - -class TranslatingPort; - -/** - * AlphaFullCPU class. Derives from the FullO3CPU class, and - * implements all ISA and implementation specific functions of the - * CPU. This is the CPU class that is used for the SimObjects, and is - * what is given to the DynInsts. Most of its state exists in the - * FullO3CPU; the state is has is mainly for ISA specific - * functionality. - */ -template <class Impl> -class AlphaFullCPU : public FullO3CPU<Impl> -{ - protected: - typedef TheISA::IntReg IntReg; - typedef TheISA::FloatReg FloatReg; - typedef TheISA::FloatRegBits FloatRegBits; - typedef TheISA::MiscReg MiscReg; - typedef TheISA::RegFile RegFile; - typedef TheISA::MiscRegFile MiscRegFile; - - public: - typedef O3ThreadState<Impl> ImplState; - typedef O3ThreadState<Impl> Thread; - typedef typename Impl::Params Params; - - /** Constructs an AlphaFullCPU with the given parameters. */ - AlphaFullCPU(Params *params); - - /** - * Derived ThreadContext class for use with the AlphaFullCPU. It - * provides the interface for any external objects to access a - * single thread's state and some general CPU state. Any time - * external objects try to update state through this interface, - * the CPU will create an event to squash all in-flight - * instructions in order to ensure state is maintained correctly. - * It must be defined specifically for the AlphaFullCPU because - * not all architectural state is located within the O3ThreadState - * (such as the commit PC, and registers), and specific actions - * must be taken when using this interface (such as squashing all - * in-flight instructions when doing a write to this interface). - */ - class AlphaTC : public ThreadContext - { - public: - /** Pointer to the CPU. */ - AlphaFullCPU<Impl> *cpu; - - /** Pointer to the thread state that this TC corrseponds to. */ - O3ThreadState<Impl> *thread; - - /** Returns a pointer to this CPU. */ - virtual BaseCPU *getCpuPtr() { return cpu; } - - /** Sets this CPU's ID. */ - virtual void setCpuId(int id) { cpu->cpu_id = id; } - - /** Reads this CPU's ID. */ - virtual int readCpuId() { return cpu->cpu_id; } - -#if FULL_SYSTEM - /** Returns a pointer to the system. */ - virtual System *getSystemPtr() { return cpu->system; } - - /** Returns a pointer to physical memory. */ - virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } - - /** Returns a pointer to the ITB. */ - virtual AlphaITB *getITBPtr() { return cpu->itb; } - - /** Returns a pointer to the DTB. */ - virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } - - /** Returns a pointer to this thread's kernel statistics. */ - virtual Kernel::Statistics *getKernelStats() - { return thread->kernelStats; } - - virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } - - virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL); - - void delVirtPort(VirtualPort *vp); -#else - virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } - - /** Returns a pointer to this thread's process. */ - virtual Process *getProcessPtr() { return thread->getProcessPtr(); } -#endif - /** Returns this thread's status. */ - virtual Status status() const { return thread->status(); } - - /** Sets this thread's status. */ - virtual void setStatus(Status new_status) - { thread->setStatus(new_status); } - - /** Set the status to Active. Optional delay indicates number of - * cycles to wait before beginning execution. */ - virtual void activate(int delay = 1); - - /** Set the status to Suspended. */ - virtual void suspend(); - - /** Set the status to Unallocated. */ - virtual void deallocate(); - - /** Set the status to Halted. */ - virtual void halt(); - -#if FULL_SYSTEM - /** Dumps the function profiling information. - * @todo: Implement. - */ - virtual void dumpFuncProfile(); -#endif - /** Takes over execution of a thread from another CPU. */ - virtual void takeOverFrom(ThreadContext *old_context); - - /** Registers statistics associated with this TC. */ - virtual void regStats(const std::string &name); - - /** Serializes state. */ - virtual void serialize(std::ostream &os); - /** Unserializes state. */ - virtual void unserialize(Checkpoint *cp, const std::string §ion); - -#if FULL_SYSTEM - /** Returns pointer to the quiesce event. */ - virtual EndQuiesceEvent *getQuiesceEvent(); - - /** Reads the last tick that this thread was activated on. */ - virtual Tick readLastActivate(); - /** Reads the last tick that this thread was suspended on. */ - virtual Tick readLastSuspend(); - - /** Clears the function profiling information. */ - virtual void profileClear(); - /** Samples the function profiling information. */ - virtual void profileSample(); -#endif - /** Returns this thread's ID number. */ - virtual int getThreadNum() { return thread->readTid(); } - - /** Returns the instruction this thread is currently committing. - * Only used when an instruction faults. - */ - virtual TheISA::MachInst getInst(); - - /** Copies the architectural registers from another TC into this TC. */ - virtual void copyArchRegs(ThreadContext *tc); - - /** Resets all architectural registers to 0. */ - virtual void clearArchRegs(); - - /** Reads an integer register. */ - virtual uint64_t readIntReg(int reg_idx); - - virtual FloatReg readFloatReg(int reg_idx, int width); - - virtual FloatReg readFloatReg(int reg_idx); - - virtual FloatRegBits readFloatRegBits(int reg_idx, int width); - - virtual FloatRegBits readFloatRegBits(int reg_idx); - - /** Sets an integer register to a value. */ - virtual void setIntReg(int reg_idx, uint64_t val); - - virtual void setFloatReg(int reg_idx, FloatReg val, int width); - - virtual void setFloatReg(int reg_idx, FloatReg val); - - virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); - - virtual void setFloatRegBits(int reg_idx, FloatRegBits val); - - /** Reads this thread's PC. */ - virtual uint64_t readPC() - { return cpu->readPC(thread->readTid()); } - - /** Sets this thread's PC. */ - virtual void setPC(uint64_t val); - - /** Reads this thread's next PC. */ - virtual uint64_t readNextPC() - { return cpu->readNextPC(thread->readTid()); } - - /** Sets this thread's next PC. */ - virtual void setNextPC(uint64_t val); - - virtual uint64_t readNextNPC() - { - panic("Alpha has no NextNPC!"); - return 0; - } - - virtual void setNextNPC(uint64_t val) - { } - - /** Reads a miscellaneous register. */ - virtual MiscReg readMiscReg(int misc_reg) - { return cpu->readMiscReg(misc_reg, thread->readTid()); } - - /** Reads a misc. register, including any side-effects the - * read might have as defined by the architecture. */ - virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) - { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); } - - /** Sets a misc. register. */ - virtual Fault setMiscReg(int misc_reg, const MiscReg &val); - - /** Sets a misc. register, including any side-effects the - * write might have as defined by the architecture. */ - virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); - - /** Returns the number of consecutive store conditional failures. */ - // @todo: Figure out where these store cond failures should go. - virtual unsigned readStCondFailures() - { return thread->storeCondFailures; } - - /** Sets the number of consecutive store conditional failures. */ - virtual void setStCondFailures(unsigned sc_failures) - { thread->storeCondFailures = sc_failures; } - -#if FULL_SYSTEM - /** Returns if the thread is currently in PAL mode, based on - * the PC's value. */ - virtual bool inPalMode() - { return TheISA::PcPAL(cpu->readPC(thread->readTid())); } -#endif - // Only really makes sense for old CPU model. Lots of code - // outside the CPU still checks this function, so it will - // always return false to keep everything working. - /** Checks if the thread is misspeculating. Because it is - * very difficult to determine if the thread is - * misspeculating, this is set as false. */ - virtual bool misspeculating() { return false; } - -#if !FULL_SYSTEM - /** Gets a syscall argument by index. */ - virtual IntReg getSyscallArg(int i); - - /** Sets a syscall argument. */ - virtual void setSyscallArg(int i, IntReg val); - - /** Sets the syscall return value. */ - virtual void setSyscallReturn(SyscallReturn return_value); - - /** Executes a syscall in SE mode. */ - virtual void syscall(int64_t callnum) - { return cpu->syscall(callnum, thread->readTid()); } - - /** Reads the funcExeInst counter. */ - virtual Counter readFuncExeInst() { return thread->funcExeInst; } -#endif - virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, - TheISA::RegFile::ContextVal val) - { panic("Not supported on Alpha!"); } - }; - -#if FULL_SYSTEM - /** ITB pointer. */ - AlphaITB *itb; - /** DTB pointer. */ - AlphaDTB *dtb; -#endif - - /** Registers statistics. */ - void regStats(); - -#if FULL_SYSTEM - /** Translates instruction requestion. */ - Fault translateInstReq(RequestPtr &req, Thread *thread) - { - return itb->translate(req, thread->getTC()); - } - - /** Translates data read request. */ - Fault translateDataReadReq(RequestPtr &req, Thread *thread) - { - return dtb->translate(req, thread->getTC(), false); - } - - /** Translates data write request. */ - Fault translateDataWriteReq(RequestPtr &req, Thread *thread) - { - return dtb->translate(req, thread->getTC(), true); - } - -#else - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - - /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - - /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - -#endif - /** Reads a miscellaneous register. */ - MiscReg readMiscReg(int misc_reg, unsigned tid); - - /** Reads a misc. register, including any side effects the read - * might have as defined by the architecture. - */ - MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); - - /** Sets a miscellaneous register. */ - Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); - - /** Sets a misc. register, including any side effects the write - * might have as defined by the architecture. - */ - Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); - - /** Initiates a squash of all in-flight instructions for a given - * thread. The source of the squash is an external update of - * state through the TC. - */ - void squashFromTC(unsigned tid); - -#if FULL_SYSTEM - /** Posts an interrupt. */ - void post_interrupt(int int_num, int index); - /** Reads the interrupt flag. */ - int readIntrFlag(); - /** Sets the interrupt flags. */ - void setIntrFlag(int val); - /** HW return from error interrupt. */ - Fault hwrei(unsigned tid); - /** Returns if a specific PC is a PAL mode PC. */ - bool inPalMode(uint64_t PC) - { return AlphaISA::PcPAL(PC); } - - /** Traps to handle given fault. */ - void trap(Fault fault, unsigned tid); - bool simPalCheck(int palFunc, unsigned tid); - - /** Processes any interrupts. */ - void processInterrupts(); - - /** Halts the CPU. */ - void halt() { panic("Halt not implemented!\n"); } -#endif - - -#if !FULL_SYSTEM - /** Executes a syscall. - * @todo: Determine if this needs to be virtual. - */ - void syscall(int64_t callnum, int tid); - /** Gets a syscall argument. */ - IntReg getSyscallArg(int i, int tid); - - /** Used to shift args for indirect syscall. */ - void setSyscallArg(int i, IntReg val, int tid); - - /** Sets the return value of a syscall. */ - void setSyscallReturn(SyscallReturn return_value, int tid); -#endif - - /** CPU read function, forwards read to LSQ. */ - template <class T> - Fault read(RequestPtr &req, T &data, int load_idx) - { - return this->iew.ldstQueue.read(req, data, load_idx); - } - - /** CPU write function, forwards write to LSQ. */ - template <class T> - Fault write(RequestPtr &req, T &data, int store_idx) - { - return this->iew.ldstQueue.write(req, data, store_idx); - } - - Addr lockAddr; - - /** Temporary fix for the lock flag, works in the UP case. */ - bool lockFlag; -}; - -#endif // __CPU_O3_ALPHA_FULL_CPU_HH__ diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh deleted file mode 100644 index 98290e57f..000000000 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ /dev/null @@ -1,872 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - */ - -#include "arch/alpha/faults.hh" -#include "base/cprintf.hh" -#include "base/statistics.hh" -#include "base/timebuf.hh" -#include "cpu/checker/thread_context.hh" -#include "sim/sim_events.hh" -#include "sim/stats.hh" - -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_params.hh" -#include "cpu/o3/comm.hh" -#include "cpu/o3/thread_state.hh" - -#if FULL_SYSTEM -#include "arch/alpha/osfpal.hh" -#include "arch/isa_traits.hh" -#include "cpu/quiesce_event.hh" -#include "kern/kernel_stats.hh" -#include "sim/system.hh" -#endif - -using namespace TheISA; - -template <class Impl> -AlphaFullCPU<Impl>::AlphaFullCPU(Params *params) -#if FULL_SYSTEM - : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb) -#else - : FullO3CPU<Impl>(params) -#endif -{ - DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); - - // Setup any thread state. - this->thread.resize(this->numThreads); - - for (int i = 0; i < this->numThreads; ++i) { -#if FULL_SYSTEM - // SMT is not supported in FS mode yet. - assert(this->numThreads == 1); - this->thread[i] = new Thread(this, 0); - this->thread[i]->setStatus(ThreadContext::Suspended); -#else - if (i < params->workload.size()) { - DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x", - i, this->thread[i]); - this->thread[i] = new Thread(this, i, params->workload[i], - i, params->mem); - - this->thread[i]->setStatus(ThreadContext::Suspended); - -#if !FULL_SYSTEM - /* Use this port to for syscall emulation writes to memory. */ - Port *mem_port; - TranslatingPort *trans_port; - trans_port = new TranslatingPort(csprintf("%s-%d-funcport", - name(), i), - params->workload[i]->pTable, - false); - mem_port = params->mem->getPort("functional"); - mem_port->setPeer(trans_port); - trans_port->setPeer(mem_port); - this->thread[i]->setMemPort(trans_port); -#endif - //usedTids[i] = true; - //threadMap[i] = i; - } else { - //Allocate Empty thread so M5 can use later - //when scheduling threads to CPU - Process* dummy_proc = NULL; - - this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); - //usedTids[i] = false; - } -#endif // !FULL_SYSTEM - - ThreadContext *tc; - - // Setup the TC that will serve as the interface to the threads/CPU. - AlphaTC *alpha_tc = new AlphaTC; - - // If we're using a checker, then the TC should be the - // CheckerThreadContext. - if (params->checker) { - tc = new CheckerThreadContext<AlphaTC>( - alpha_tc, this->checker); - } else { - tc = alpha_tc; - } - - alpha_tc->cpu = this; - alpha_tc->thread = this->thread[i]; - -#if FULL_SYSTEM - // Setup quiesce event. - this->thread[i]->quiesceEvent = new EndQuiesceEvent(tc); - - Port *mem_port; - FunctionalPort *phys_port; - VirtualPort *virt_port; - phys_port = new FunctionalPort(csprintf("%s-%d-funcport", - name(), i)); - mem_port = this->system->physmem->getPort("functional"); - mem_port->setPeer(phys_port); - phys_port->setPeer(mem_port); - - virt_port = new VirtualPort(csprintf("%s-%d-vport", - name(), i)); - mem_port = this->system->physmem->getPort("functional"); - mem_port->setPeer(virt_port); - virt_port->setPeer(mem_port); - - this->thread[i]->setPhysPort(phys_port); - this->thread[i]->setVirtPort(virt_port); -#endif - // Give the thread the TC. - this->thread[i]->tc = tc; - - // Add the TC to the CPU's list of TC's. - this->threadContexts.push_back(tc); - } - - for (int i=0; i < this->numThreads; i++) { - this->thread[i]->setFuncExeInst(0); - } - - // Sets CPU pointers. These must be set at this level because the CPU - // pointers are defined to be the highest level of CPU class. - this->fetch.setCPU(this); - this->decode.setCPU(this); - this->rename.setCPU(this); - this->iew.setCPU(this); - this->commit.setCPU(this); - - this->rob.setCPU(this); - this->regFile.setCPU(this); - - lockAddr = 0; - lockFlag = false; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::regStats() -{ - // Register stats for everything that has stats. - this->fullCPURegStats(); - this->fetch.regStats(); - this->decode.regStats(); - this->rename.regStats(); - this->iew.regStats(); - this->commit.regStats(); -} - -#if FULL_SYSTEM -template <class Impl> -VirtualPort * -AlphaFullCPU<Impl>::AlphaTC::getVirtPort(ThreadContext *src_tc) -{ - if (!src_tc) - return thread->getVirtPort(); - - VirtualPort *vp; - Port *mem_port; - - vp = new VirtualPort("tc-vport", src_tc); - mem_port = cpu->system->physmem->getPort("functional"); - mem_port->setPeer(vp); - vp->setPeer(mem_port); - return vp; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::dumpFuncProfile() -{ - // Currently not supported -} -#endif - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::takeOverFrom(ThreadContext *old_context) -{ - // some things should already be set up -#if FULL_SYSTEM - assert(getSystemPtr() == old_context->getSystemPtr()); -#else - assert(getProcessPtr() == old_context->getProcessPtr()); -#endif - - // copy over functional state - setStatus(old_context->status()); - copyArchRegs(old_context); - setCpuId(old_context->readCpuId()); - -#if !FULL_SYSTEM - thread->funcExeInst = old_context->readFuncExeInst(); -#else - EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); - if (other_quiesce) { - // Point the quiesce event's TC at this TC so that it wakes up - // the proper CPU. - other_quiesce->tc = this; - } - if (thread->quiesceEvent) { - thread->quiesceEvent->tc = this; - } - - // Transfer kernel stats from one CPU to the other. - thread->kernelStats = old_context->getKernelStats(); -// storeCondFailures = 0; - cpu->lockFlag = false; -#endif - - old_context->setStatus(ThreadContext::Unallocated); - - thread->inSyscall = false; - thread->trapPending = false; -} - -#if FULL_SYSTEM -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::delVirtPort(VirtualPort *vp) -{ - delete vp->getPeer(); - delete vp; -} -#endif - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::activate(int delay) -{ - DPRINTF(FullCPU, "Calling activate on AlphaTC\n"); - - if (thread->status() == ThreadContext::Active) - return; - -#if FULL_SYSTEM - thread->lastActivate = curTick; -#endif - - if (thread->status() == ThreadContext::Unallocated) { - cpu->activateWhenReady(thread->readTid()); - return; - } - - thread->setStatus(ThreadContext::Active); - - // status() == Suspended - cpu->activateContext(thread->readTid(), delay); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::suspend() -{ - DPRINTF(FullCPU, "Calling suspend on AlphaTC\n"); - - if (thread->status() == ThreadContext::Suspended) - return; - -#if FULL_SYSTEM - thread->lastActivate = curTick; - thread->lastSuspend = curTick; -#endif -/* -#if FULL_SYSTEM - // Don't change the status from active if there are pending interrupts - if (cpu->check_interrupts()) { - assert(status() == ThreadContext::Active); - return; - } -#endif -*/ - thread->setStatus(ThreadContext::Suspended); - cpu->suspendContext(thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::deallocate() -{ - DPRINTF(FullCPU, "Calling deallocate on AlphaTC\n"); - - if (thread->status() == ThreadContext::Unallocated) - return; - - thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::halt() -{ - DPRINTF(FullCPU, "Calling halt on AlphaTC\n"); - - if (thread->status() == ThreadContext::Halted) - return; - - thread->setStatus(ThreadContext::Halted); - cpu->haltContext(thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::regStats(const std::string &name) -{ -#if FULL_SYSTEM - thread->kernelStats = new Kernel::Statistics(cpu->system); - thread->kernelStats->regStats(name + ".kern"); -#endif -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::serialize(std::ostream &os) -{ -#if FULL_SYSTEM - if (thread->kernelStats) - thread->kernelStats->serialize(os); -#endif - -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::unserialize(Checkpoint *cp, const std::string §ion) -{ -#if FULL_SYSTEM - if (thread->kernelStats) - thread->kernelStats->unserialize(cp, section); -#endif - -} - -#if FULL_SYSTEM -template <class Impl> -EndQuiesceEvent * -AlphaFullCPU<Impl>::AlphaTC::getQuiesceEvent() -{ - return thread->quiesceEvent; -} - -template <class Impl> -Tick -AlphaFullCPU<Impl>::AlphaTC::readLastActivate() -{ - return thread->lastActivate; -} - -template <class Impl> -Tick -AlphaFullCPU<Impl>::AlphaTC::readLastSuspend() -{ - return thread->lastSuspend; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::profileClear() -{} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::profileSample() -{} -#endif - -template <class Impl> -TheISA::MachInst -AlphaFullCPU<Impl>::AlphaTC:: getInst() -{ - return thread->getInst(); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::copyArchRegs(ThreadContext *tc) -{ - // This function will mess things up unless the ROB is empty and - // there are no instructions in the pipeline. - unsigned tid = thread->readTid(); - PhysRegIndex renamed_reg; - - // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { - renamed_reg = cpu->renameMap[tid].lookup(i); - - DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " - "now has data %lli.\n", - renamed_reg, cpu->readIntReg(renamed_reg), - tc->readIntReg(i)); - - cpu->setIntReg(renamed_reg, tc->readIntReg(i)); - } - - // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { - renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag); - cpu->setFloatRegBits(renamed_reg, - tc->readFloatRegBits(i)); - } - - // Copy the misc regs. - copyMiscRegs(tc, this); - - // Then finally set the PC and the next PC. - cpu->setPC(tc->readPC(), tid); - cpu->setNextPC(tc->readNextPC(), tid); -#if !FULL_SYSTEM - this->thread->funcExeInst = tc->readFuncExeInst(); -#endif -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::clearArchRegs() -{} - -template <class Impl> -uint64_t -AlphaFullCPU<Impl>::AlphaTC::readIntReg(int reg_idx) -{ - return cpu->readArchIntReg(reg_idx, thread->readTid()); -} - -template <class Impl> -FloatReg -AlphaFullCPU<Impl>::AlphaTC::readFloatReg(int reg_idx, int width) -{ - switch(width) { - case 32: - return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); - case 64: - return cpu->readArchFloatRegDouble(reg_idx, thread->readTid()); - default: - panic("Unsupported width!"); - return 0; - } -} - -template <class Impl> -FloatReg -AlphaFullCPU<Impl>::AlphaTC::readFloatReg(int reg_idx) -{ - return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); -} - -template <class Impl> -FloatRegBits -AlphaFullCPU<Impl>::AlphaTC::readFloatRegBits(int reg_idx, int width) -{ - DPRINTF(Fault, "Reading floatint register through the TC!\n"); - return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); -} - -template <class Impl> -FloatRegBits -AlphaFullCPU<Impl>::AlphaTC::readFloatRegBits(int reg_idx) -{ - return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setIntReg(int reg_idx, uint64_t val) -{ - cpu->setArchIntReg(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatReg(int reg_idx, FloatReg val, int width) -{ - switch(width) { - case 32: - cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); - break; - case 64: - cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid()); - break; - } - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatReg(int reg_idx, FloatReg val) -{ - cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); - - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val, - int width) -{ - DPRINTF(Fault, "Setting floatint register through the TC!\n"); - cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val) -{ - cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setPC(uint64_t val) -{ - cpu->setPC(val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setNextPC(uint64_t val) -{ - cpu->setNextPC(val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::AlphaTC::setMiscReg(int misc_reg, const MiscReg &val) -{ - Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } - - return ret_fault; -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::AlphaTC::setMiscRegWithEffect(int misc_reg, - const MiscReg &val) -{ - Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, - thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } - - return ret_fault; -} - -#if !FULL_SYSTEM - -template <class Impl> -TheISA::IntReg -AlphaFullCPU<Impl>::AlphaTC::getSyscallArg(int i) -{ - return cpu->getSyscallArg(i, thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setSyscallArg(int i, IntReg val) -{ - cpu->setSyscallArg(i, val, thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setSyscallReturn(SyscallReturn return_value) -{ - cpu->setSyscallReturn(return_value, thread->readTid()); -} - -#endif // FULL_SYSTEM - -template <class Impl> -MiscReg -AlphaFullCPU<Impl>::readMiscReg(int misc_reg, unsigned tid) -{ - return this->regFile.readMiscReg(misc_reg, tid); -} - -template <class Impl> -MiscReg -AlphaFullCPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault, - unsigned tid) -{ - return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) -{ - return this->regFile.setMiscReg(misc_reg, val, tid); -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val, - unsigned tid) -{ - return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::squashFromTC(unsigned tid) -{ - this->thread[tid]->inSyscall = true; - this->commit.generateTCEvent(tid); -} - -#if FULL_SYSTEM - -template <class Impl> -void -AlphaFullCPU<Impl>::post_interrupt(int int_num, int index) -{ - BaseCPU::post_interrupt(int_num, index); - - if (this->thread[0]->status() == ThreadContext::Suspended) { - DPRINTF(IPI,"Suspended Processor awoke\n"); - this->threadContexts[0]->activate(); - } -} - -template <class Impl> -int -AlphaFullCPU<Impl>::readIntrFlag() -{ - return this->regFile.readIntrFlag(); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::setIntrFlag(int val) -{ - this->regFile.setIntrFlag(val); -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::hwrei(unsigned tid) -{ - // Need to clear the lock flag upon returning from an interrupt. - this->lockFlag = false; - - this->thread[tid]->kernelStats->hwrei(); - - this->checkInterrupts = true; - - // FIXME: XXX check for interrupts? XXX - return NoFault; -} - -template <class Impl> -bool -AlphaFullCPU<Impl>::simPalCheck(int palFunc, unsigned tid) -{ - if (this->thread[tid]->kernelStats) - this->thread[tid]->kernelStats->callpal(palFunc, - this->threadContexts[tid]); - - switch (palFunc) { - case PAL::halt: - halt(); - if (--System::numSystemsRunning == 0) - exitSimLoop("all cpus halted"); - break; - - case PAL::bpt: - case PAL::bugchk: - if (this->system->breakpoint()) - return false; - break; - } - - return true; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid) -{ - // Pass the thread's TC into the invoke method. - fault->invoke(this->threadContexts[tid]); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::processInterrupts() -{ - // Check for interrupts here. For now can copy the code that - // exists within isa_fullsys_traits.hh. Also assume that thread 0 - // is the one that handles the interrupts. - // @todo: Possibly consolidate the interrupt checking code. - // @todo: Allow other threads to handle interrupts. - - // Check if there are any outstanding interrupts - //Handle the interrupts - int ipl = 0; - int summary = 0; - - this->checkInterrupts = false; - - if (this->readMiscReg(IPR_ASTRR, 0)) - panic("asynchronous traps not implemented\n"); - - if (this->readMiscReg(IPR_SIRR, 0)) { - for (int i = INTLEVEL_SOFTWARE_MIN; - i < INTLEVEL_SOFTWARE_MAX; i++) { - if (this->readMiscReg(IPR_SIRR, 0) & (ULL(1) << i)) { - // See table 4-19 of the 21164 hardware reference - ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; - summary |= (ULL(1) << i); - } - } - } - - uint64_t interrupts = this->intr_status(); - - if (interrupts) { - for (int i = INTLEVEL_EXTERNAL_MIN; - i < INTLEVEL_EXTERNAL_MAX; i++) { - if (interrupts & (ULL(1) << i)) { - // See table 4-19 of the 21164 hardware reference - ipl = i; - summary |= (ULL(1) << i); - } - } - } - - if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) { - this->setMiscReg(IPR_ISR, summary, 0); - this->setMiscReg(IPR_INTID, ipl, 0); - // Checker needs to know these two registers were updated. - if (this->checker) { - this->checker->threadBase()->setMiscReg(IPR_ISR, summary); - this->checker->threadBase()->setMiscReg(IPR_INTID, ipl); - } - this->trap(Fault(new InterruptFault), 0); - DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", - this->readMiscReg(IPR_IPLR, 0), ipl, summary); - } -} - -#endif // FULL_SYSTEM - -#if !FULL_SYSTEM - -template <class Impl> -void -AlphaFullCPU<Impl>::syscall(int64_t callnum, int tid) -{ - DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid); - - DPRINTF(Activity,"Activity: syscall() called.\n"); - - // Temporarily increase this by one to account for the syscall - // instruction. - ++(this->thread[tid]->funcExeInst); - - // Execute the actual syscall. - this->thread[tid]->syscall(callnum); - - // Decrease funcExeInst by one as the normal commit will handle - // incrementing it. - --(this->thread[tid]->funcExeInst); -} - -template <class Impl> -TheISA::IntReg -AlphaFullCPU<Impl>::getSyscallArg(int i, int tid) -{ - return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::setSyscallArg(int i, IntReg val, int tid) -{ - this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid) -{ - // check for error condition. Alpha syscall convention is to - // indicate success/failure in reg a3 (r19) and put the - // return value itself in the standard return value reg (v0). - if (return_value.successful()) { - // no error - this->setArchIntReg(SyscallSuccessReg, 0, tid); - this->setArchIntReg(ReturnValueReg, return_value.value(), tid); - } else { - // got an error, return details - this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); - this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); - } -} -#endif diff --git a/src/cpu/o3/base_dyn_inst.cc b/src/cpu/o3/base_dyn_inst.cc new file mode 100644 index 000000000..0979c5c8f --- /dev/null +++ b/src/cpu/o3/base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/o3/isa_specific.hh" + +// Explicit instantiation +template class BaseDynInst<O3CPUImpl>; + +template <> +int +BaseDynInst<O3CPUImpl>::instcount = 0; diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index b33543bdc..08fd4e8ea 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -29,11 +29,6 @@ */ #include "cpu/o3/bpred_unit_impl.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/ozone/ozone_impl.hh" -//#include "cpu/ozone/simple_impl.hh" +#include "cpu/o3/isa_specific.hh" -template class BPredUnit<AlphaSimpleImpl>; -template class BPredUnit<OzoneImpl>; -//template class BPredUnit<SimpleImpl>; +template class BPredUnit<O3CPUImpl>; diff --git a/src/cpu/checker/o3_cpu_builder.cc b/src/cpu/o3/checker_builder.cc index 59a6c7158..782d963b0 100644 --- a/src/cpu/checker/o3_cpu_builder.cc +++ b/src/cpu/o3/checker_builder.cc @@ -30,16 +30,19 @@ #include <string> -#include "cpu/checker/cpu.hh" +#include "cpu/checker/cpu_impl.hh" #include "cpu/inst_seq.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/dyn_inst.hh" +#include "cpu/o3/alpha/impl.hh" #include "sim/builder.hh" #include "sim/process.hh" #include "sim/sim_object.hh" class MemObject; +template +class Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >; + /** * Specific non-templated derived class used for SimObject configuration. */ @@ -75,6 +78,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker) Param<bool> defer_registration; Param<bool> exitOnError; + Param<bool> warnOnlyOnLoadError; Param<bool> function_trace; Param<Tick> function_trace_start; @@ -105,6 +109,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker) INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(exitOnError, "exit on error"), + INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load " + "result errors", false), INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") @@ -121,6 +127,7 @@ CREATE_SIM_OBJECT(O3Checker) params->max_loads_any_thread = 0; params->max_loads_all_threads = 0; params->exitOnError = exitOnError; + params->warnOnlyOnLoadError = warnOnlyOnLoadError; params->deferRegistration = defer_registration; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index 770008a33..637d59f52 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/commit_impl.hh" -template class DefaultCommit<AlphaSimpleImpl>; +template class DefaultCommit<O3CPUImpl>; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index b7404c488..956b6ec3e 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_COMMIT_HH__ @@ -67,7 +68,7 @@ class DefaultCommit { public: // Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; typedef typename Impl::CPUPol CPUPol; @@ -145,7 +146,7 @@ class DefaultCommit void regStats(); /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the list of threads. */ void setThreads(std::vector<Thread *> &threads); @@ -161,10 +162,6 @@ class DefaultCommit /** Sets the pointer to the queue coming from IEW. */ void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); - void setFetchStage(Fetch *fetch_stage); - - Fetch *fetchStage; - /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); @@ -186,11 +183,14 @@ class DefaultCommit /** Initializes stage by sending back the number of free entries. */ void initStage(); - /** Initializes the switching out of commit. */ - void switchOut(); + /** Initializes the draining of commit. */ + bool drain(); + + /** Resumes execution after draining. */ + void resume(); /** Completes the switch out of commit. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -280,12 +280,20 @@ class DefaultCommit /** Sets the PC of a specific thread. */ void setPC(uint64_t val, unsigned tid) { PC[tid] = val; } - /** Reads the PC of a specific thread. */ + /** Reads the next PC of a specific thread. */ uint64_t readNextPC(unsigned tid) { return nextPC[tid]; } /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } +#if THE_ISA != ALPHA_ISA + /** Reads the next NPC of a specific thread. */ + uint64_t readNextPC(unsigned tid) { return nextNPC[tid]; } + + /** Sets the next NPC of a specific thread. */ + void setNextPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } +#endif + private: /** Time buffer interface. */ TimeBuffer<TimeStruct> *timeBuffer; @@ -317,16 +325,12 @@ class DefaultCommit ROB *rob; private: - /** Pointer to FullCPU. */ - FullCPU *cpu; + /** Pointer to O3CPU. */ + O3CPU *cpu; /** Vector of all of the threads. */ std::vector<Thread *> thread; - Fault fetchFault; - - int fetchTrapWait; - /** Records that commit has written to the time buffer this cycle. Used for * the CPU to determine if it can deschedule itself if there is no activity. */ @@ -365,11 +369,6 @@ class DefaultCommit */ unsigned renameWidth; - /** IEW width, in instructions. Used so ROB knows how many - * instructions to get from the IEW instruction queue. - */ - unsigned iewWidth; - /** Commit width, in instructions. */ unsigned commitWidth; @@ -379,8 +378,8 @@ class DefaultCommit /** Number of Active Threads */ unsigned numThreads; - /** Is a switch out pending. */ - bool switchPending; + /** Is a drain pending. */ + bool drainPending; /** Is commit switched out. */ bool switchedOut; @@ -390,10 +389,6 @@ class DefaultCommit */ Tick trapLatency; - Tick fetchTrapLatency; - - Tick fetchFaultTick; - /** The commit PC of each thread. Refers to the instruction that * is currently being processed/committed. */ @@ -402,6 +397,11 @@ class DefaultCommit /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; +#if THE_ISA != ALPHA_ISA + /** The next NPC of each thread. */ + Addr nextNPC[Impl::MaxThreads]; +#endif + /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index ceb2918e0..904af1071 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -28,16 +28,22 @@ * Authors: Kevin Lim */ +#include "config/full_system.hh" +#include "config/use_checker.hh" + #include <algorithm> #include <string> #include "base/loader/symtab.hh" #include "base/timebuf.hh" -#include "cpu/checker/cpu.hh" #include "cpu/exetrace.hh" #include "cpu/o3/commit.hh" #include "cpu/o3/thread_state.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + using namespace std; template <class Impl> @@ -72,13 +78,11 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) renameToROBDelay(params->renameToROBDelay), fetchToCommitDelay(params->commitToFetchDelay), renameWidth(params->renameWidth), - iewWidth(params->executeWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), - switchPending(false), + drainPending(false), switchedOut(false), - trapLatency(params->trapLatency), - fetchTrapLatency(params->fetchTrapLatency) + trapLatency(params->trapLatency) { _status = Active; _nextStatus = Inactive; @@ -118,9 +122,6 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) tcSquash[i] = false; PC[i] = nextPC[i] = 0; } - - fetchFaultTick = 0; - fetchTrapWait = 0; } template <class Impl> @@ -205,19 +206,6 @@ DefaultCommit<Impl>::regStats() .flags(total) ; - // - // Commit-Eligible instructions... - // - // -> The number of instructions eligible to commit in those - // cycles where we reached our commit BW limit (less the number - // actually committed) - // - // -> The average value is computed over ALL CYCLES... not just - // the BW limited cycles - // - // -> The standard deviation is computed only over cycles where - // we reached the BW limit - // commitEligible .init(cpu->number_of_threads) .name(name() + ".COM:bw_limited") @@ -233,17 +221,16 @@ DefaultCommit<Impl>::regStats() template <class Impl> void -DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Commit, "Commit: Setting CPU pointer.\n"); cpu = cpu_ptr; // Commit must broadcast the number of free entries it has at the start of // the simulation, so it starts as active. - cpu->activateStage(FullCPU::CommitIdx); + cpu->activateStage(O3CPU::CommitIdx); trapLatency = cpu->cycles(trapLatency); - fetchTrapLatency = cpu->cycles(fetchTrapLatency); } template <class Impl> @@ -302,13 +289,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) template <class Impl> void -DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage) -{ - fetchStage = fetch_stage; -} - -template <class Impl> -void DefaultCommit<Impl>::setIEWStage(IEW *iew_stage) { iewStage = iew_stage; @@ -358,23 +338,38 @@ DefaultCommit<Impl>::initStage() } template <class Impl> -void -DefaultCommit<Impl>::switchOut() +bool +DefaultCommit<Impl>::drain() { - switchPending = true; + drainPending = true; + + // If it's already drained, return true. + if (rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + return true; + } + + return false; } template <class Impl> void -DefaultCommit<Impl>::doSwitchOut() +DefaultCommit<Impl>::switchOut() { switchedOut = true; - switchPending = false; + drainPending = false; rob->switchOut(); } template <class Impl> void +DefaultCommit<Impl>::resume() +{ + drainPending = false; +} + +template <class Impl> +void DefaultCommit<Impl>::takeOverFrom() { switchedOut = false; @@ -409,10 +404,10 @@ DefaultCommit<Impl>::updateStatus() if (_nextStatus == Inactive && _status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::CommitIdx); + cpu->deactivateStage(O3CPU::CommitIdx); } else if (_nextStatus == Active && _status == Inactive) { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::CommitIdx); + cpu->activateStage(O3CPU::CommitIdx); } _status = _nextStatus; @@ -434,7 +429,7 @@ DefaultCommit<Impl>::setNextStatus() } } - assert(squashes == squashCounter); + squashCounter = squashes; // If commit is currently squashing, then it will have activity for the // next cycle. Set its next status as active. @@ -539,8 +534,6 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid) commitStatus[tid] = ROBSquashing; cpu->activityThisCycle(); - - ++squashCounter; } template <class Impl> @@ -558,8 +551,6 @@ DefaultCommit<Impl>::squashFromTC(unsigned tid) cpu->activityThisCycle(); tcSquash[tid] = false; - - ++squashCounter; } template <class Impl> @@ -569,11 +560,15 @@ DefaultCommit<Impl>::tick() wroteToTimeBuffer = false; _nextStatus = Inactive; - if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalSwitched(); + if (drainPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + drainPending = false; return; } + if ((*activeThreads).size() <= 0) + return; + list<unsigned>::iterator threads = (*activeThreads).begin(); // Check if any of the threads are done squashing. Change the @@ -585,10 +580,12 @@ DefaultCommit<Impl>::tick() if (rob->isDoneSquashing(tid)) { commitStatus[tid] = Running; - --squashCounter; } else { DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" - "insts this cycle.\n", tid); + " insts this cycle.\n", tid); + rob->doSquash(tid); + toIEW->commitInfo[tid].robSquashing = true; + wroteToTimeBuffer = true; } } } @@ -694,29 +691,7 @@ DefaultCommit<Impl>::commit() while (threads != (*activeThreads).end()) { unsigned tid = *threads++; -/* - if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { - // Record the fault. Wait until it's empty in the ROB. - // Then handle the trap. Ignore it if there's already a - // trap pending as fetch will be redirected. - fetchFault = fromFetch->fetchFault; - fetchFaultTick = curTick + fetchTrapLatency; - commitStatus[0] = FetchTrapPending; - DPRINTF(Commit, "Fault from fetch recorded. Will trap if the " - "ROB empties without squashing the fault.\n"); - fetchTrapWait = 0; - } - // Fetch may tell commit to clear the trap if it's been squashed. - if (fromFetch->clearFetchFault) { - DPRINTF(Commit, "Received clear fetch fault signal\n"); - fetchTrapWait = 0; - if (commitStatus[0] == FetchTrapPending) { - DPRINTF(Commit, "Clearing fault from fetch\n"); - commitStatus[0] = Running; - } - } -*/ // Not sure which one takes priority. I think if we have // both, that's a bad sign. if (trapSquash[tid] == true) { @@ -744,8 +719,6 @@ DefaultCommit<Impl>::commit() commitStatus[tid] = ROBSquashing; - ++squashCounter; - // If we want to include the squashing instruction in the squash, // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; @@ -947,7 +920,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // and committed this instruction. thread[tid]->funcExeInst--; - head_inst->reachedCommit = true; + head_inst->setAtCommit(); if (head_inst->isNonSpeculative() || head_inst->isStoreConditional() || @@ -1012,18 +985,19 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->setCompleted(); } +#if USE_CHECKER // Use checker prior to updating anything due to traps or PC // based events. if (cpu->checker) { - cpu->checker->tick(head_inst); + cpu->checker->verify(head_inst); } +#endif // Check if the instruction caused a fault. If so, trap. Fault inst_fault = head_inst->getFault(); if (inst_fault != NoFault) { head_inst->setCompleted(); -#if FULL_SYSTEM DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", head_inst->seqNum, head_inst->readPC()); @@ -1032,9 +1006,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } +#if USE_CHECKER if (cpu->checker && head_inst->isStore()) { - cpu->checker->tick(head_inst); + cpu->checker->verify(head_inst); } +#endif assert(!thread[tid]->inSyscall); @@ -1065,10 +1041,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) generateTrapEvent(tid); return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - head_inst->PC); -#endif // FULL_SYSTEM } updateComInstStats(head_inst); @@ -1256,7 +1228,8 @@ DefaultCommit<Impl>::roundRobin() unsigned tid = *pri_iter; if (commitStatus[tid] == Running || - commitStatus[tid] == Idle) { + commitStatus[tid] == Idle || + commitStatus[tid] == FetchTrapPending) { if (rob->isHeadReady(tid)) { priority_list.erase(pri_iter); diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 788c6b164..b407f4fcc 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -26,9 +26,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "config/full_system.hh" +#include "config/use_checker.hh" #if FULL_SYSTEM #include "sim/system.hh" @@ -37,26 +39,28 @@ #endif #include "cpu/activity.hh" -#include "cpu/checker/cpu.hh" #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/cpu.hh" #include "sim/root.hh" #include "sim/stat_control.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + using namespace std; using namespace TheISA; -BaseFullCPU::BaseFullCPU(Params *params) +BaseO3CPU::BaseO3CPU(Params *params) : BaseCPU(params), cpu_id(0) { } void -BaseFullCPU::regStats() +BaseO3CPU::regStats() { BaseCPU::regStats(); } @@ -82,8 +86,67 @@ FullO3CPU<Impl>::TickEvent::description() } template <class Impl> +FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::ActivateThreadEvent::init(int thread_num, + FullO3CPU<Impl> *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template <class Impl> +void +FullO3CPU<Impl>::ActivateThreadEvent::process() +{ + cpu->activateThread(tid); +} + +template <class Impl> +const char * +FullO3CPU<Impl>::ActivateThreadEvent::description() +{ + return "FullO3CPU \"Activate Thread\" event"; +} + +template <class Impl> +FullO3CPU<Impl>::DeallocateContextEvent::DeallocateContextEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::DeallocateContextEvent::init(int thread_num, + FullO3CPU<Impl> *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template <class Impl> +void +FullO3CPU<Impl>::DeallocateContextEvent::process() +{ + cpu->deactivateThread(tid); + cpu->removeThread(tid); +} + +template <class Impl> +const char * +FullO3CPU<Impl>::DeallocateContextEvent::description() +{ + return "FullO3CPU \"Deallocate Context\" event"; +} + +template <class Impl> FullO3CPU<Impl>::FullO3CPU(Params *params) - : BaseFullCPU(params), + : BaseO3CPU(params), tickEvent(this), removeInstsThisCycle(false), fetch(params), @@ -94,7 +157,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs), - freeList(params->numberOfThreads,//number of activeThreads + freeList(params->numberOfThreads, TheISA::NumIntRegs, params->numPhysIntRegs, TheISA::NumFloatRegs, params->numPhysFloatRegs), @@ -102,21 +165,20 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) params->smtROBPolicy, params->smtROBThreshold, params->numberOfThreads), - scoreboard(params->numberOfThreads,//number of activeThreads + scoreboard(params->numberOfThreads, TheISA::NumIntRegs, params->numPhysIntRegs, TheISA::NumFloatRegs, params->numPhysFloatRegs, TheISA::NumMiscRegs * number_of_threads, TheISA::ZeroReg), - // For now just have these time buffers be pretty big. - // @todo: Make these time buffer sizes parameters or derived - // from latencies - timeBuffer(5, 5), - fetchQueue(5, 5), - decodeQueue(5, 5), - renameQueue(5, 5), - iewQueue(5, 5), - activityRec(NumStages, 10, params->activity), + timeBuffer(params->backComSize, params->forwardComSize), + fetchQueue(params->backComSize, params->forwardComSize), + decodeQueue(params->backComSize, params->forwardComSize), + renameQueue(params->backComSize, params->forwardComSize), + iewQueue(params->backComSize, params->forwardComSize), + activityRec(NumStages, + params->backComSize + params->forwardComSize, + params->activity), globalSeqNum(1), @@ -125,21 +187,25 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) physmem(system->physmem), #endif // FULL_SYSTEM mem(params->mem), - switchCount(0), + drainCount(0), deferRegistration(params->deferRegistration), numThreads(number_of_threads) { _status = Idle; + checker = NULL; + if (params->checker) { +#if USE_CHECKER BaseCPU *temp_checker = params->checker; checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker); checker->setMemory(mem); #if FULL_SYSTEM checker->setSystem(params->system); #endif - } else { - checker = NULL; +#else + panic("Checker enabled but not compiled in!"); +#endif // USE_CHECKER } #if !FULL_SYSTEM @@ -177,13 +243,18 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) commit.setIEWQueue(&iewQueue); commit.setRenameQueue(&renameQueue); - commit.setFetchStage(&fetch); commit.setIEWStage(&iew); rename.setIEWStage(&iew); rename.setCommitStage(&commit); #if !FULL_SYSTEM int active_threads = params->workload.size(); + + if (active_threads > Impl::MaxThreads) { + panic("Workload Size too large. Increase the 'MaxThreads'" + "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) or " + "edit your workload size."); + } #else int active_threads = 1; #endif @@ -249,6 +320,8 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) lastRunningCycle = curTick; + lastActivatedCycle = -1; + contextSwitch = false; } @@ -261,9 +334,9 @@ template <class Impl> void FullO3CPU<Impl>::fullCPURegStats() { - BaseFullCPU::regStats(); + BaseO3CPU::regStats(); - // Register any of the FullCPU's stats here. + // Register any of the O3CPU's stats here. timesIdled .name(name() + ".timesIdled") .desc("Number of times that the entire CPU went into an idle state and" @@ -316,10 +389,22 @@ FullO3CPU<Impl>::fullCPURegStats() } template <class Impl> +Port * +FullO3CPU<Impl>::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return iew.getDcachePort(); + else if (if_name == "icache_port") + return fetch.getIcachePort(); + else + panic("No Such Port\n"); +} + +template <class Impl> void FullO3CPU<Impl>::tick() { - DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n"); + DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); ++numCycles; @@ -355,7 +440,8 @@ FullO3CPU<Impl>::tick() } if (!tickEvent.scheduled()) { - if (_status == SwitchedOut) { + if (_status == SwitchedOut || + getState() == SimObject::Drained) { // increment stat lastRunningCycle = curTick; } else if (!activityRec.active()) { @@ -416,16 +502,107 @@ FullO3CPU<Impl>::init() template <class Impl> void +FullO3CPU<Impl>::activateThread(unsigned tid) +{ + list<unsigned>::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive == activeThreads.end()) { + DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", + tid); + + activeThreads.push_back(tid); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::deactivateThread(unsigned tid) +{ + //Remove From Active List, if Active + list<unsigned>::iterator thread_it = + find(activeThreads.begin(), activeThreads.end(), tid); + + if (thread_it != activeThreads.end()) { + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(thread_it); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::activateContext(int tid, int delay) +{ + // Needs to set each stage to running as well. + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleActivateThreadEvent(tid, delay); + } else { + activateThread(tid); + } + + if(lastActivatedCycle < curTick) { + scheduleTickEvent(delay); + + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + fetch.wakeFromQuiesce(); + + lastActivatedCycle = curTick; + + _status = Running; + } +} + +template <class Impl> +void +FullO3CPU<Impl>::deallocateContext(int tid, int delay) +{ + // Schedule removal of thread data from CPU + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleDeallocateContextEvent(tid, delay); + } else { + deactivateThread(tid); + removeThread(tid); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::suspendContext(int tid) +{ + DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); + deactivateThread(tid); + if (activeThreads.size() == 0) + unscheduleTickEvent(); + _status = Idle; +} + +template <class Impl> +void +FullO3CPU<Impl>::haltContext(int tid) +{ + //For now, this is the same as deallocate + DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); + deallocateContext(tid, 1); +} + +template <class Impl> +void FullO3CPU<Impl>::insertThread(unsigned tid) { - DPRINTF(FullCPU,"[tid:%i] Initializing thread data"); + DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); // Will change now that the PC and thread state is internal to the CPU // and not in the ThreadContext. -#if 0 #if FULL_SYSTEM ThreadContext *src_tc = system->threadContexts[tid]; #else - ThreadContext *src_tc = thread[tid]; + ThreadContext *src_tc = tcBase(tid); #endif //Bind Int Regs to Rename Map @@ -445,11 +622,14 @@ FullO3CPU<Impl>::insertThread(unsigned tid) } //Copy Thread Data Into RegFile - this->copyFromTC(tid); + //this->copyFromTC(tid); - //Set PC/NPC - regFile.pc[tid] = src_tc->readPC(); - regFile.npc[tid] = src_tc->readNextPC(); + //Set PC/NPC/NNPC + setPC(src_tc->readPC(), tid); + setNextPC(src_tc->readNextPC(), tid); +#if THE_ISA != ALPHA_ISA + setNextNPC(src_tc->readNextNPC(), tid); +#endif src_tc->setStatus(ThreadContext::Active); @@ -458,16 +638,19 @@ FullO3CPU<Impl>::insertThread(unsigned tid) //Reset ROB/IQ/LSQ Entries commit.rob->resetEntries(); iew.resetEntries(); -#endif } template <class Impl> void FullO3CPU<Impl>::removeThread(unsigned tid) { - DPRINTF(FullCPU,"[tid:%i] Removing thread data"); -#if 0 - //Unbind Int Regs from Rename Map + DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); + + // Copy Thread Data From RegFile + // If thread is suspended, it might be re-allocated + //this->copyToTC(tid); + + // Unbind Int Regs from Rename Map for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(ireg); @@ -475,7 +658,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Unbind Float Regs from Rename Map + // Unbind Float Regs from Rename Map for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(freg); @@ -483,27 +666,20 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Copy Thread Data From RegFile - /* Fix Me: - * Do we really need to do this if we are removing a thread - * in the sense that it's finished (exiting)? If the thread is just - * being suspended we might... - */ -// this->copyToTC(tid); - - //Squash Throughout Pipeline + // Squash Throughout Pipeline fetch.squash(0,tid); decode.squash(tid); rename.squash(tid); + iew.squash(tid); + commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid); assert(iew.ldstQueue.getCount(tid) == 0); - //Reset ROB/IQ/LSQ Entries + // Reset ROB/IQ/LSQ Entries if (activeThreads.size() >= 1) { commit.rob->resetEntries(); iew.resetEntries(); } -#endif } @@ -511,37 +687,37 @@ template <class Impl> void FullO3CPU<Impl>::activateWhenReady(int tid) { - DPRINTF(FullCPU,"[tid:%i]: Checking if resources are available for incoming" + DPRINTF(O3CPU,"[tid:%i]: Checking if resources are available for incoming" "(e.g. PhysRegs/ROB/IQ/LSQ) \n", tid); bool ready = true; if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "Phys. Int. Regs.\n", tid); ready = false; } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "Phys. Float. Regs.\n", tid); ready = false; } else if (commit.rob->numFreeEntries() >= commit.rob->entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "ROB entries.\n", tid); ready = false; } else if (iew.instQueue.numFreeEntries() >= iew.instQueue.entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "IQ entries.\n", tid); ready = false; } else if (iew.ldstQueue.numFreeEntries() >= iew.ldstQueue.entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "LSQ entries.\n", tid); ready = false; @@ -559,6 +735,7 @@ FullO3CPU<Impl>::activateWhenReady(int tid) //blocks fetch contextSwitch = true; + //@todo: dont always add to waitlist //do waitlist cpuWaitList.push_back(tid); } @@ -566,133 +743,130 @@ FullO3CPU<Impl>::activateWhenReady(int tid) template <class Impl> void -FullO3CPU<Impl>::activateContext(int tid, int delay) +FullO3CPU<Impl>::serialize(std::ostream &os) { - // Needs to set each stage to running as well. - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive == activeThreads.end()) { - //May Need to Re-code this if the delay variable is the - //delay needed for thread to activate - DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", - tid); - - activeThreads.push_back(tid); + SERIALIZE_ENUM(_status); + BaseCPU::serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); + + // Use SimpleThread's ability to checkpoint to make it easier to + // write out the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + nameOut(os, csprintf("%s.xc.%i", name(), i)); + temp.copyTC(thread[i]->getTC()); + temp.serialize(os); } - - assert(_status == Idle || _status == SwitchedOut); - - scheduleTickEvent(delay); - - // Be sure to signal that there's some activity so the CPU doesn't - // deschedule itself. - activityRec.activity(); - fetch.wakeFromQuiesce(); - - _status = Running; } template <class Impl> void -FullO3CPU<Impl>::suspendContext(int tid) +FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) { - DPRINTF(FullCPU,"[tid: %i]: Suspended ...\n", tid); - unscheduleTickEvent(); - _status = Idle; -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + UNSERIALIZE_ENUM(_status); + BaseCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + + // Use SimpleThread's ability to checkpoint to make it easier to + // read in the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + temp.copyTC(thread[i]->getTC()); + temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); + thread[i]->getTC()->copyArchRegs(temp.getTC()); + } +} - if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); +template <class Impl> +unsigned int +FullO3CPU<Impl>::drain(Event *drain_event) +{ + drainCount = 0; + fetch.drain(); + decode.drain(); + rename.drain(); + iew.drain(); + commit.drain(); + + // Wake the CPU and record activity so everything can drain out if + // the CPU was not able to immediately drain. + if (getState() != SimObject::Drained) { + // A bit of a hack...set the drainEvent after all the drain() + // calls have been made, that way if all of the stages drain + // immediately, the signalDrained() function knows not to call + // process on the drain event. + drainEvent = drain_event; + + wakeCPU(); + activityRec.activity(); + + return 1; + } else { + return 0; } -*/ } template <class Impl> void -FullO3CPU<Impl>::deallocateContext(int tid) +FullO3CPU<Impl>::resume() { - DPRINTF(FullCPU,"[tid:%i]: Deallocating ...", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + assert(system->getMemoryMode() == System::Timing); + fetch.resume(); + decode.resume(); + rename.resume(); + iew.resume(); + commit.resume(); - if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + changeState(SimObject::Running); - removeThread(tid); - } -*/ + if (_status == SwitchedOut || _status == Idle) + return; + + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + _status = Running; } template <class Impl> void -FullO3CPU<Impl>::haltContext(int tid) +FullO3CPU<Impl>::signalDrained() { - DPRINTF(FullCPU,"[tid:%i]: Halted ...", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + if (++drainCount == NumStages) { + if (tickEvent.scheduled()) + tickEvent.squash(); - if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + changeState(SimObject::Drained); - removeThread(tid); + if (drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } -*/ + assert(drainCount <= 5); } template <class Impl> void -FullO3CPU<Impl>::switchOut(Sampler *_sampler) +FullO3CPU<Impl>::switchOut() { - sampler = _sampler; - switchCount = 0; fetch.switchOut(); - decode.switchOut(); rename.switchOut(); - iew.switchOut(); commit.switchOut(); - - // Wake the CPU and record activity so everything can drain out if - // the CPU is currently idle. - wakeCPU(); - activityRec.activity(); -} - -template <class Impl> -void -FullO3CPU<Impl>::signalSwitched() -{ - if (++switchCount == NumStages) { - fetch.doSwitchOut(); - rename.doSwitchOut(); - commit.doSwitchOut(); - instList.clear(); - while (!removeList.empty()) { - removeList.pop(); - } - - if (checker) - checker->switchOut(sampler); - - if (tickEvent.scheduled()) - tickEvent.squash(); - sampler->signalSwitched(); - _status = SwitchedOut; + instList.clear(); + while (!removeList.empty()) { + removeList.pop(); } - assert(switchCount <= 5); + + _status = SwitchedOut; +#if USE_CHECKER + if (checker) + checker->switchOut(); +#endif } template <class Impl> @@ -700,7 +874,7 @@ void FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) { // Flush out any old data from the time buffers. - for (int i = 0; i < 10; ++i) { + for (int i = 0; i < timeBuffer.getSize(); ++i) { timeBuffer.advance(); fetchQueue.advance(); decodeQueue.advance(); @@ -730,7 +904,7 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) if (isActive == activeThreads.end()) { //May Need to Re-code this if the delay variable is the delay //needed for thread to activate - DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", + DPRINTF(O3CPU, "Adding Thread %i to active threads list\n", tid); activeThreads.push_back(tid); @@ -922,6 +1096,22 @@ FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid) commit.setNextPC(val, tid); } +#if THE_ISA != ALPHA_ISA +template <class Impl> +uint64_t +FullO3CPU<Impl>::readNextNPC(unsigned tid) +{ + return commit.readNextNPC(tid); +} + +template <class Impl> +void +FullO3CPU<Impl>::setNextNNPC(uint64_t val,unsigned tid) +{ + commit.setNextNPC(val, tid); +} +#endif + template <class Impl> typename FullO3CPU<Impl>::ListIt FullO3CPU<Impl>::addInst(DynInstPtr &inst) @@ -958,7 +1148,7 @@ template <class Impl> void FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst) { - DPRINTF(FullCPU, "FullCPU: Removing committed instruction [tid:%i] PC %#x " + DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %#x " "[sn:%lli]\n", inst->threadNumber, inst->readPC(), inst->seqNum); @@ -972,7 +1162,7 @@ template <class Impl> void FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) { - DPRINTF(FullCPU, "FullCPU: Thread %i: Deleting instructions from instruction" + DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" " list.\n", tid); ListIt end_it; @@ -982,12 +1172,12 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) if (instList.empty()) { return; } else if (rob.isEmpty(/*tid*/)) { - DPRINTF(FullCPU, "FullCPU: ROB is empty, squashing all insts.\n"); + DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); end_it = instList.begin(); rob_empty = true; } else { end_it = (rob.readTailInst(tid))->getInstListIt(); - DPRINTF(FullCPU, "FullCPU: ROB is not empty, squashing insts not in ROB.\n"); + DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); } removeInstsThisCycle = true; @@ -1026,7 +1216,7 @@ FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, inst_iter--; - DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + DPRINTF(O3CPU, "Deleting instructions from instruction " "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", tid, seq_num, (*inst_iter)->seqNum); @@ -1048,7 +1238,7 @@ inline void FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, const unsigned &tid) { if ((*instIt)->threadNumber == tid) { - DPRINTF(FullCPU, "FullCPU: Squashing instruction, " + DPRINTF(O3CPU, "Squashing instruction, " "[tid:%i] [sn:%lli] PC %#x\n", (*instIt)->threadNumber, (*instIt)->seqNum, @@ -1069,7 +1259,7 @@ void FullO3CPU<Impl>::cleanUpRemovedInsts() { while (!removeList.empty()) { - DPRINTF(FullCPU, "FullCPU: Removing instruction, " + DPRINTF(O3CPU, "Removing instruction, " "[tid:%i] [sn:%lli] PC %#x\n", (*removeList.front())->threadNumber, (*removeList.front())->seqNum, @@ -1185,4 +1375,4 @@ FullO3CPU<Impl>::updateThreadPriority() } // Forward declaration of FullO3CPU. -template class FullO3CPU<AlphaSimpleImpl>; +template class FullO3CPU<O3CPUImpl>; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index ff41a3306..83cb966e3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_CPU_HH__ @@ -48,24 +49,33 @@ #include "cpu/o3/cpu_policy.hh" #include "cpu/o3/scoreboard.hh" #include "cpu/o3/thread_state.hh" +//#include "cpu/o3/thread_context.hh" #include "sim/process.hh" template <class> class Checker; class ThreadContext; +template <class> +class O3ThreadContext; + +class Checkpoint; class MemObject; class Process; -class BaseFullCPU : public BaseCPU +class BaseO3CPU : public BaseCPU { //Stuff that's pretty ISA independent will go here. public: typedef BaseCPU::Params Params; - BaseFullCPU(Params *params); + BaseO3CPU(Params *params); void regStats(); + /** Sets this CPU's ID. */ + void setCpuId(int id) { cpu_id = id; } + + /** Reads this CPU's ID. */ int readCpuId() { return cpu_id; } protected: @@ -78,7 +88,7 @@ class BaseFullCPU : public BaseCPU * tick() function for the CPU is defined here. */ template <class Impl> -class FullO3CPU : public BaseFullCPU +class FullO3CPU : public BaseO3CPU { public: typedef TheISA::FloatReg FloatReg; @@ -93,6 +103,8 @@ class FullO3CPU : public BaseFullCPU typedef typename std::list<DynInstPtr>::iterator ListIt; + friend class O3ThreadContext<Impl>; + public: enum Status { Running, @@ -105,6 +117,9 @@ class FullO3CPU : public BaseFullCPU /** Overall CPU status. */ Status _status; + /** Per-thread status in CPU, used for SMT. */ + Status _threadStatus[Impl::MaxThreads]; + private: class TickEvent : public Event { @@ -141,6 +156,92 @@ class FullO3CPU : public BaseFullCPU tickEvent.squash(); } + class ActivateThreadEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU<Impl> *cpu; + + public: + /** Constructs the event. */ + ActivateThreadEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU<Impl> *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule thread to activate , regardless of its current state. */ + void scheduleActivateThreadEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (activateThreadEvent[tid].squashed()) + activateThreadEvent[tid].reschedule(curTick + cycles(delay)); + else if (!activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule actiavte thread event, regardless of its current state. */ + void unscheduleActivateThreadEvent(int tid) + { + if (activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + ActivateThreadEvent activateThreadEvent[Impl::MaxThreads]; + + class DeallocateContextEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU<Impl> *cpu; + + public: + /** Constructs the event. */ + DeallocateContextEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU<Impl> *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule cpu to deallocate thread context.*/ + void scheduleDeallocateContextEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (deallocateContextEvent[tid].squashed()) + deallocateContextEvent[tid].reschedule(curTick + cycles(delay)); + else if (!deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule thread deallocation in CPU */ + void unscheduleDeallocateContextEvent(int tid) + { + if (deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads]; + public: /** Constructs a CPU with the given parameters. */ FullO3CPU(Params *params); @@ -150,6 +251,9 @@ class FullO3CPU : public BaseFullCPU /** Registers statistics. */ void fullCPURegStats(); + /** Returns a specific port. */ + Port *getPort(const std::string &if_name, int idx); + /** Ticks CPU, calling tick() on each stage, and checking the overall * activity to see if the CPU should deschedule itself. */ @@ -158,6 +262,16 @@ class FullO3CPU : public BaseFullCPU /** Initialize the CPU */ void init(); + /** Returns the Number of Active Threads in the CPU */ + int numActiveThreads() + { return activeThreads.size(); } + + /** Add Thread to Active Threads List */ + void activateThread(unsigned tid); + + /** Remove Thread from Active Threads List */ + void deactivateThread(unsigned tid); + /** Setup CPU to insert a thread's context */ void insertThread(unsigned tid); @@ -184,7 +298,7 @@ class FullO3CPU : public BaseFullCPU /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. */ - void deallocateContext(int tid); + void deallocateContext(int tid, int delay = 1); /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. @@ -200,6 +314,13 @@ class FullO3CPU : public BaseFullCPU /** Update The Order In Which We Process Threads. */ void updateThreadPriority(); + /** Serialize state. */ + virtual void serialize(std::ostream &os); + + /** Unserialize from a checkpoint. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + public: /** Executes a syscall on this cycle. * --------------------------------------- * Note: this is a virtual function. CPU-Specific @@ -207,14 +328,21 @@ class FullO3CPU : public BaseFullCPU */ virtual void syscall(int tid) { panic("Unimplemented!"); } - /** Switches out this CPU. */ - void switchOut(Sampler *sampler); + /** Starts draining the CPU's pipeline of all instructions in + * order to stop all memory accesses. */ + virtual unsigned int drain(Event *drain_event); + + /** Resumes execution after a drain. */ + virtual void resume(); /** Signals to this CPU that a stage has completed switching out. */ - void signalSwitched(); + void signalDrained(); + + /** Switches out this CPU. */ + virtual void switchOut(); /** Takes over from another CPU. */ - void takeOverFrom(BaseCPU *oldCPU); + virtual void takeOverFrom(BaseCPU *oldCPU); /** Get the current instruction sequence number, and increment it. */ InstSeqNum getAndIncrementInstSeq() @@ -299,6 +427,12 @@ class FullO3CPU : public BaseFullCPU /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid); + /** Reads the next NPC of a specific thread. */ + uint64_t readNextNPC(unsigned tid); + + /** Sets the next NPC of a specific thread. */ + void setNextNPC(uint64_t val, unsigned tid); + /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. */ @@ -481,11 +615,11 @@ class FullO3CPU : public BaseFullCPU /** Pointer to memory. */ MemObject *mem; - /** Pointer to the sampler */ - Sampler *sampler; + /** Event to call process() on once draining has completed. */ + Event *drainEvent; - /** Counter of how many stages have completed switching out. */ - int switchCount; + /** Counter of how many stages have completed draining. */ + int drainCount; /** Pointers to all of the threads in the CPU. */ std::vector<Thread *> thread; @@ -507,6 +641,9 @@ class FullO3CPU : public BaseFullCPU /** The cycle that the CPU was last running, used for statistics. */ Tick lastRunningCycle; + /** The cycle that the CPU was last activated by a new thread*/ + Tick lastActivatedCycle; + /** Number of Threads CPU can process */ unsigned numThreads; diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc index 4924f018a..896e38331 100644 --- a/src/cpu/o3/decode.cc +++ b/src/cpu/o3/decode.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/decode_impl.hh" -template class DefaultDecode<AlphaSimpleImpl>; +template class DefaultDecode<O3CPUImpl>; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index ff88358d6..7f5ecbc26 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -48,7 +48,7 @@ class DefaultDecode { private: // Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; typedef typename Impl::CPUPol CPUPol; @@ -95,7 +95,7 @@ class DefaultDecode void regStats(); /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); @@ -109,8 +109,14 @@ class DefaultDecode /** Sets pointer to list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); + /** Drains the decode stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the decode stage. */ - void switchOut(); + void switchOut() { } /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -189,7 +195,7 @@ class DefaultDecode private: // Interfaces to objects outside of decode. /** CPU interface. */ - FullCPU *cpu; + O3CPU *cpu; /** Time buffer interface. */ TimeBuffer<TimeStruct> *timeBuffer; diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 8a6ea6626..8b851c032 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -112,7 +112,7 @@ DefaultDecode<Impl>::regStats() template<class Impl> void -DefaultDecode<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultDecode<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Decode, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -165,11 +165,12 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr) } template <class Impl> -void -DefaultDecode<Impl>::switchOut() +bool +DefaultDecode<Impl>::drain() { - // Decode can immediately switch out. - cpu->signalSwitched(); + // Decode is done draining at any time. + cpu->signalDrained(); + return true; } template <class Impl> @@ -296,7 +297,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) for (int i=0; i<fromFetch->size; i++) { if (fromFetch->insts[i]->threadNumber == tid && fromFetch->insts[i]->seqNum > inst->seqNum) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); } } @@ -345,7 +346,7 @@ DefaultDecode<Impl>::squash(unsigned tid) for (int i=0; i<fromFetch->size; i++) { if (fromFetch->insts[i]->threadNumber == tid) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); squash_count++; } } @@ -427,7 +428,7 @@ DefaultDecode<Impl>::updateStatus() DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::DecodeIdx); + cpu->activateStage(O3CPU::DecodeIdx); } } else { // If it's not unblocking, then decode will not have any internal @@ -436,7 +437,7 @@ DefaultDecode<Impl>::updateStatus() _status = Inactive; DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::DecodeIdx); + cpu->deactivateStage(O3CPU::DecodeIdx); } } } @@ -515,7 +516,7 @@ DefaultDecode<Impl>::checkSignalsAndUpdate(unsigned tid) // Check ROB squash signals from commit. if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(Decode, "[tid:%]: ROB is still squashing.\n",tid); + DPRINTF(Decode, "[tid:%u]: ROB is still squashing.\n", tid); // Continue to squash. decodeStatus[tid] = Squashing; diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh index 3659b1a37..c19fd0abf 100644 --- a/src/cpu/o3/dep_graph.hh +++ b/src/cpu/o3/dep_graph.hh @@ -68,6 +68,8 @@ class DependencyGraph : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) { } + ~DependencyGraph(); + /** Resize the dependency graph to have num_entries registers. */ void resize(int num_entries); @@ -121,6 +123,12 @@ class DependencyGraph }; template <class DynInstPtr> +DependencyGraph<DynInstPtr>::~DependencyGraph() +{ + delete [] dependGraph; +} + +template <class DynInstPtr> void DependencyGraph<DynInstPtr>::resize(int num_entries) { diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh new file mode 100644 index 000000000..a2cdf2dba --- /dev/null +++ b/src/cpu/o3/dyn_inst.hh @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#ifndef __CPU_O3_DYN_INST_HH__ +#define __CPU_O3_DYN_INST_HH__ + +#include "arch/isa_specific.hh" + +#if THE_ISA == ALPHA_ISA +template <class Impl> +class AlphaDynInst; + +struct AlphaSimpleImpl; + +typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst; +#endif + +#endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index 5f52d0fca..d809b07e4 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/fetch_impl.hh" -template class DefaultFetch<AlphaSimpleImpl>; +template class DefaultFetch<O3CPUImpl>; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 76b32de68..931919af8 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_FETCH_HH__ @@ -35,12 +36,10 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/pc_event.hh" -#include "mem/packet.hh" +#include "mem/packet_impl.hh" #include "mem/port.hh" #include "sim/eventq.hh" -class Sampler; - /** * DefaultFetch class handles both single threaded and SMT fetch. Its * width is specified by the parameters; each cycle it tries to fetch @@ -57,7 +56,7 @@ class DefaultFetch typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; /** Typedefs from the CPU policy. */ @@ -163,8 +162,11 @@ class DefaultFetch /** Registers statistics. */ void regStats(); + /** Returns the icache port. */ + Port *getIcachePort() { return icachePort; } + /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer); @@ -181,11 +183,14 @@ class DefaultFetch /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); - /** Begins the switch out of the fetch stage. */ - void switchOut(); + /** Begins the drain of the fetch stage. */ + bool drain(); - /** Completes the switch out of the fetch stage. */ - void doSwitchOut(); + /** Resumes execution after a drain. */ + void resume(); + + /** Tells fetch stage to prepare to be switched out. */ + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -296,8 +301,8 @@ class DefaultFetch int branchCount(); private: - /** Pointer to the FullCPU. */ - FullCPU *cpu; + /** Pointer to the O3CPU. */ + O3CPU *cpu; /** Time buffer interface. */ TimeBuffer<TimeStruct> *timeBuffer; @@ -335,6 +340,15 @@ class DefaultFetch /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; +#if THE_ISA != ALPHA_ISA + /** Per-thread next Next PC. + * This is not a real register but is used for + * architectures that use a branch-delay slot. + * (such as MIPS or Sparc) + */ + Addr nextNPC[Impl::MaxThreads]; +#endif + /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; @@ -390,6 +404,12 @@ class DefaultFetch /** The cache line being fetched. */ uint8_t *cacheData[Impl::MaxThreads]; + /** The PC of the cacheline that has been loaded. */ + Addr cacheDataPC[Impl::MaxThreads]; + + /** Whether or not the cache data is valid. */ + bool cacheDataValid[Impl::MaxThreads]; + /** Size of instructions. */ int instSize; @@ -413,6 +433,9 @@ class DefaultFetch */ bool interruptPending; + /** Is there a drain pending. */ + bool drainPending; + /** Records if fetch is switched out. */ bool switchedOut; @@ -421,6 +444,7 @@ class DefaultFetch Stats::Scalar<> icacheStallCycles; /** Stat for total number of fetched instructions. */ Stats::Scalar<> fetchedInsts; + /** Total number of fetched branches. */ Stats::Scalar<> fetchedBranches; /** Stat for total number of predicted branches. */ Stats::Scalar<> predictedBranches; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index c0a2a5d09..4184e1867 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -26,8 +26,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ +#include "config/use_checker.hh" + #include "arch/isa_traits.hh" #include "arch/utility.hh" #include "cpu/checker/cpu.hh" @@ -106,13 +109,12 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) numThreads(params->numberOfThreads), numFetchingThreads(params->smtNumFetchingThreads), interruptPending(false), + drainPending(false), switchedOut(false) { if (numThreads > Impl::MaxThreads) fatal("numThreads is not a valid value\n"); - DPRINTF(Fetch, "Fetch constructor called\n"); - // Set fetch stage's status to inactive. _status = Inactive; @@ -125,6 +127,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Figure out fetch policy if (policy == "singlethread") { fetchPolicy = SingleThread; + if (numThreads > 1) + panic("Invalid Fetch Policy for a SMT workload."); } else if (policy == "roundrobin") { fetchPolicy = RoundRobin; DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); @@ -158,6 +162,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; stalls[tid].decode = 0; stalls[tid].rename = 0; @@ -268,7 +274,7 @@ DefaultFetch<Impl>::regStats() template<class Impl> void -DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Fetch, "Setting the CPU pointer.\n"); cpu = cpu_ptr; @@ -276,13 +282,11 @@ DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr) // Name is finally available, so create the port. icachePort = new IcachePort(this); - Port *mem_dport = mem->getPort(""); - icachePort->setPeer(mem_dport); - mem_dport->setPeer(icachePort); - +#if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(icachePort); } +#endif // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. @@ -330,6 +334,9 @@ DefaultFetch<Impl>::initStage() for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); +#if THE_ISA != ALPHA_ISA + nextNPC[tid] = cpu->readNextNPC(tid); +#endif } } @@ -349,18 +356,22 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) ++fetchIcacheSquashes; delete pkt->req; delete pkt; - memReq[tid] = NULL; return; } - // Wake up the CPU (if it went to sleep and was waiting on this completion - // event). - cpu->wakeCPU(); + memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize); + cacheDataValid[tid] = true; - DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", - tid); + if (!drainPending) { + // Wake up the CPU (if it went to sleep and was waiting on + // this completion event). + cpu->wakeCPU(); - switchToActive(); + DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", + tid); + + switchToActive(); + } // Only switch to IcacheAccessComplete if we're not stalled as well. if (checkStall(tid)) { @@ -376,18 +387,27 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) } template <class Impl> +bool +DefaultFetch<Impl>::drain() +{ + // Fetch is ready to drain at any time. + cpu->signalDrained(); + drainPending = true; + return true; +} + +template <class Impl> void -DefaultFetch<Impl>::switchOut() +DefaultFetch<Impl>::resume() { - // Fetch is ready to switch out at any time. - switchedOut = true; - cpu->signalSwitched(); + drainPending = false; } template <class Impl> void -DefaultFetch<Impl>::doSwitchOut() +DefaultFetch<Impl>::switchOut() { + switchedOut = true; // Branch predictor needs to have its state cleared. branchPred.switchOut(); } @@ -404,6 +424,9 @@ DefaultFetch<Impl>::takeOverFrom() stalls[i].commit = 0; PC[i] = cpu->readPC(i); nextPC[i] = cpu->readNextPC(i); +#if THE_ISA != ALPHA_ISA + nextNPC[i] = cpu->readNextNPC(i); +#endif fetchStatus[i] = Running; } numInst = 0; @@ -430,7 +453,7 @@ DefaultFetch<Impl>::switchToActive() if (_status == Inactive) { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::FetchIdx); + cpu->activateStage(O3CPU::FetchIdx); _status = Active; } @@ -443,7 +466,7 @@ DefaultFetch<Impl>::switchToInactive() if (_status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::FetchIdx); + cpu->deactivateStage(O3CPU::FetchIdx); _status = Inactive; } @@ -488,7 +511,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (cacheBlocked || (interruptPending && flags == 0) || switchedOut) { + if (cacheBlocked || (interruptPending && flags == 0)) { // Hold off fetch from getting new instructions when: // Cache is blocked, or // while an interrupt is pending and we're not in PAL mode, or @@ -499,6 +522,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Align the fetch PC so it's at the start of a cache block. fetch_PC = icacheBlockAlignPC(fetch_PC); + // If we've already got the block, no need to try to fetch it again. + if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { + return true; + } + // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. // Build request here. @@ -530,7 +558,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Build packet here. PacketPtr data_pkt = new Packet(mem_req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataStatic(cacheData[tid]); + data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); + + cacheDataPC[tid] = fetch_PC; + cacheDataValid[tid] = false; DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); @@ -549,7 +580,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid return false; } - DPRINTF(Fetch, "Doing cache access.\n"); + DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid); lastIcacheStall[tid] = curTick; @@ -662,7 +693,7 @@ DefaultFetch<Impl>::updateFetchStatus() "completion\n",tid); } - cpu->activateStage(FullCPU::FetchIdx); + cpu->activateStage(O3CPU::FetchIdx); } return Active; @@ -673,7 +704,7 @@ DefaultFetch<Impl>::updateFetchStatus() if (_status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::FetchIdx); + cpu->deactivateStage(O3CPU::FetchIdx); } return Inactive; @@ -714,12 +745,15 @@ DefaultFetch<Impl>::tick() // Reset the number of the instruction we're fetching. numInst = 0; +#if FULL_SYSTEM if (fromCommit->commitInfo[0].interruptPending) { interruptPending = true; } + if (fromCommit->commitInfo[0].clearInterrupt) { interruptPending = false; } +#endif for (threadFetched = 0; threadFetched < numFetchingThreads; threadFetched++) { @@ -817,7 +851,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) // Check ROB squash signals from commit. if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(Fetch, "[tid:%u]: ROB is still squashing Thread %u.\n", tid); + DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); // Continue to squash. fetchStatus[tid] = Squashing; @@ -885,7 +919,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) ////////////////////////////////////////// int tid = getFetchingThread(fetchPolicy); - if (tid == -1) { + if (tid == -1 || drainPending) { DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); // Breaks looping condition in tick() @@ -893,6 +927,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) return; } + DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); + // The current PC. Addr &fetch_PC = PC[tid]; @@ -915,7 +951,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) bool fetch_success = fetchCacheLine(fetch_PC, fault, tid); if (!fetch_success) { - ++fetchMiscStallCycles; + if (cacheBlocked) { + ++icacheStallCycles; + } else { + ++fetchMiscStallCycles; + } return; } } else { @@ -984,11 +1024,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, next_PC, inst_seq, cpu); - instruction->setThread(tid); + instruction->setTid(tid); instruction->setASID(tid); - instruction->setState(cpu->thread[tid]); + instruction->setThreadState(cpu->thread[tid]); DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " "[sn:%lli]\n", @@ -1020,7 +1060,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetch_PC = next_PC; if (instruction->isQuiesce()) { - warn("%lli: Quiesce instruction encountered, halting fetch!", + warn("cycle %lli: Quiesce instruction encountered, halting fetch!", curTick); fetchStatus[tid] = QuiescePending; ++numInst; @@ -1041,8 +1081,17 @@ DefaultFetch<Impl>::fetch(bool &status_change) if (fault == NoFault) { DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); +#if THE_ISA == ALPHA_ISA + PC[tid] = next_PC; + nextPC[tid] = next_PC + instSize; +#else PC[tid] = next_PC; nextPC[tid] = next_PC + instSize; + nextPC[tid] = next_PC + instSize; + + thread->setNextPC(thread->readNextNPC()); + thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); +#endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB // miss) @@ -1065,11 +1114,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) next_PC, inst_seq, cpu); instruction->setPredTarg(next_PC + instSize); - instruction->setThread(tid); + instruction->setTid(tid); instruction->setASID(tid); - instruction->setState(cpu->thread[tid]); + instruction->setThreadState(cpu->thread[tid]); instruction->traceData = NULL; @@ -1085,9 +1134,9 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetchStatus[tid] = TrapPending; status_change = true; - warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #else // !FULL_SYSTEM - warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #endif // FULL_SYSTEM } } @@ -1256,6 +1305,6 @@ int DefaultFetch<Impl>::branchCount() { list<unsigned>::iterator threads = (*activeThreads).begin(); - + panic("Branch Count Fetch policy unimplemented\n"); return *threads; } diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc index 545deea9b..42e329aca 100644 --- a/src/cpu/o3/fu_pool.cc +++ b/src/cpu/o3/fu_pool.cc @@ -31,7 +31,7 @@ #include <sstream> #include "cpu/o3/fu_pool.hh" -#include "encumbered/cpu/full/fu_pool.hh" +#include "cpu/func_unit.hh" #include "sim/builder.hh" using namespace std; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index 8145f4cc7..f99be7fe0 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -28,9 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/iew_impl.hh" #include "cpu/o3/inst_queue.hh" -template class DefaultIEW<AlphaSimpleImpl>; +template class DefaultIEW<O3CPUImpl>; diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 2e61af5fc..fb9afde54 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -68,7 +68,7 @@ class DefaultIEW //Typedefs from Impl typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; typedef typename CPUPol::IQ IQ; @@ -80,7 +80,7 @@ class DefaultIEW typedef typename CPUPol::RenameStruct RenameStruct; typedef typename CPUPol::IssueStruct IssueStruct; - friend class Impl::FullCPU; + friend class Impl::O3CPU; friend class CPUPol::IQ; public: @@ -125,8 +125,11 @@ class DefaultIEW /** Initializes stage; sends back the number of free IQ and LSQ entries. */ void initStage(); + /** Returns the dcache port. */ + Port *getDcachePort() { return ldstQueue.getDcachePort(); } + /** Sets CPU pointer for IEW, IQ, and LSQ. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets main time buffer used for backwards communication. */ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); @@ -143,11 +146,14 @@ class DefaultIEW /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *sb_ptr); - /** Starts switch out of IEW stage. */ - void switchOut(); + /** Drains IEW stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume(); /** Completes switch out of IEW stage. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -204,6 +210,45 @@ class DefaultIEW /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } + void incrWb(InstSeqNum &sn) + { + if (++wbOutstanding == wbMax) + ableToIssue = false; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#if DEBUG + wbList.insert(sn); +#endif + } + + void decrWb(InstSeqNum &sn) + { + if (wbOutstanding-- == wbMax) + ableToIssue = true; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#if DEBUG + assert(wbList.find(sn) != wbList.end()); + wbList.erase(sn); +#endif + } + +#if DEBUG + std::set<InstSeqNum> wbList; + + void dumpWb() + { + std::set<InstSeqNum>::iterator wb_it = wbList.begin(); + while (wb_it != wbList.end()) { + cprintf("[sn:%lli]\n", + (*wb_it)); + wb_it++; + } + } +#endif + + bool canIssue() { return ableToIssue; } + + bool ableToIssue; + private: /** Sends commit proper information for a squash due to a branch * mispredict. @@ -261,6 +306,9 @@ class DefaultIEW /** Processes inputs and changes state accordingly. */ void checkSignalsAndUpdate(unsigned tid); + /** Removes instructions from rename from a thread's instruction list. */ + void emptyRenameInsts(unsigned tid); + /** Sorts instructions coming from rename into lists separated by thread. */ void sortInsts(); @@ -328,7 +376,7 @@ class DefaultIEW private: /** CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; /** Records if IEW has written to the time buffer this cycle, so that the * CPU can deschedule itself if there is no activity. @@ -381,20 +429,12 @@ class DefaultIEW */ unsigned issueToExecuteDelay; - /** Width of issue's read path, in instructions. The read path is both - * the skid buffer and the rename instruction queue. - * Note to self: is this really different than issueWidth? - */ - unsigned issueReadWidth; + /** Width of dispatch, in instructions. */ + unsigned dispatchWidth; /** Width of issue, in instructions. */ unsigned issueWidth; - /** Width of execute, in instructions. Might make more sense to break - * down into FP vs int. - */ - unsigned executeWidth; - /** Index into queue of instructions being written back. */ unsigned wbNumInst; @@ -405,6 +445,17 @@ class DefaultIEW */ unsigned wbCycle; + /** Number of instructions in flight that will writeback. */ + unsigned wbOutstanding; + + /** Writeback width. */ + unsigned wbWidth; + + /** Writeback width * writeback depth, where writeback depth is + * the number of cycles of writing back instructions that can be + * buffered. */ + unsigned wbMax; + /** Number of active threads. */ unsigned numThreads; @@ -439,14 +490,6 @@ class DefaultIEW Stats::Scalar<> iewIQFullEvents; /** Stat for number of times the LSQ becomes full. */ Stats::Scalar<> iewLSQFullEvents; - /** Stat for total number of executed instructions. */ - Stats::Scalar<> iewExecutedInsts; - /** Stat for total number of executed load instructions. */ - Stats::Vector<> iewExecLoadInsts; - /** Stat for total number of executed store instructions. */ -// Stats::Scalar<> iewExecStoreInsts; - /** Stat for total number of squashed instructions skipped at execute. */ - Stats::Scalar<> iewExecSquashedInsts; /** Stat for total number of memory ordering violation events. */ Stats::Scalar<> memOrderViolationEvents; /** Stat for total number of incorrect predicted taken branches. */ @@ -456,28 +499,25 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; + /** Stat for total number of executed instructions. */ + Stats::Scalar<> iewExecutedInsts; + /** Stat for total number of executed load instructions. */ + Stats::Vector<> iewExecLoadInsts; + /** Stat for total number of squashed instructions skipped at execute. */ + Stats::Scalar<> iewExecSquashedInsts; /** Number of executed software prefetches. */ - Stats::Vector<> exeSwp; + Stats::Vector<> iewExecutedSwp; /** Number of executed nops. */ - Stats::Vector<> exeNop; + Stats::Vector<> iewExecutedNop; /** Number of executed meomory references. */ - Stats::Vector<> exeRefs; + Stats::Vector<> iewExecutedRefs; /** Number of executed branches. */ - Stats::Vector<> exeBranches; - -// Stats::Vector<> issued_ops; -/* - Stats::Vector<> stat_fu_busy; - Stats::Vector2d<> stat_fuBusy; - Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; -*/ - /** Number of instructions issued per cycle. */ - Stats::Formula issueRate; + Stats::Vector<> iewExecutedBranches; /** Number of executed store instructions. */ Stats::Formula iewExecStoreInsts; -// Stats::Formula issue_op_rate; -// Stats::Formula fu_busy_rate; + /** Number of instructions executed per cycle. */ + Stats::Formula iewExecRate; + /** Number of instructions sent to commit. */ Stats::Vector<> iewInstsToCommit; /** Number of instructions that writeback. */ @@ -490,7 +530,6 @@ class DefaultIEW * to resource contention. */ Stats::Vector<> wbPenalized; - /** Number of instructions per cycle written back. */ Stats::Formula wbRate; /** Average number of woken instructions per writeback. */ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 3929f2e19..684ae2295 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -42,17 +42,17 @@ using namespace std; template<class Impl> DefaultIEW<Impl>::DefaultIEW(Params *params) - : // @todo: Make this into a parameter. - issueToExecQueue(5, 5), + : issueToExecQueue(params->backComSize, params->forwardComSize), instQueue(params), ldstQueue(params), fuPool(params->fuPool), commitToIEWDelay(params->commitToIEWDelay), renameToIEWDelay(params->renameToIEWDelay), issueToExecuteDelay(params->issueToExecuteDelay), - issueReadWidth(params->issueWidth), + dispatchWidth(params->dispatchWidth), issueWidth(params->issueWidth), - executeWidth(params->executeWidth), + wbOutstanding(0), + wbWidth(params->wbWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -75,8 +75,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) fetchRedirect[i] = false; } + wbMax = wbWidth * params->wbDepth; + updateLSQNextCycle = false; + ableToIssue = true; + skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -94,6 +98,7 @@ DefaultIEW<Impl>::regStats() using namespace Stats; instQueue.regStats(); + ldstQueue.regStats(); iewIdleCycles .name(name() + ".iewIdleCycles") @@ -139,20 +144,6 @@ DefaultIEW<Impl>::regStats() .name(name() + ".iewLSQFullEvents") .desc("Number of times the LSQ has become full, causing a stall"); - iewExecutedInsts - .name(name() + ".iewExecutedInsts") - .desc("Number of executed instructions"); - - iewExecLoadInsts - .init(cpu->number_of_threads) - .name(name() + ".iewExecLoadInsts") - .desc("Number of load instructions executed") - .flags(total); - - iewExecSquashedInsts - .name(name() + ".iewExecSquashedInsts") - .desc("Number of squashed instructions skipped in execute"); - memOrderViolationEvents .name(name() + ".memOrderViolationEvents") .desc("Number of memory order violations"); @@ -171,114 +162,105 @@ DefaultIEW<Impl>::regStats() branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; - exeSwp + iewExecutedInsts + .name(name() + ".EXEC:insts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .init(cpu->number_of_threads) + .name(name() + ".EXEC:loads") + .desc("Number of load instructions executed") + .flags(total); + + iewExecSquashedInsts + .name(name() + ".EXEC:squashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + iewExecutedSwp .init(cpu->number_of_threads) .name(name() + ".EXEC:swp") .desc("number of swp insts executed") - .flags(total) - ; + .flags(total); - exeNop + iewExecutedNop .init(cpu->number_of_threads) .name(name() + ".EXEC:nop") .desc("number of nop insts executed") - .flags(total) - ; + .flags(total); - exeRefs + iewExecutedRefs .init(cpu->number_of_threads) .name(name() + ".EXEC:refs") .desc("number of memory reference insts executed") - .flags(total) - ; + .flags(total); - exeBranches + iewExecutedBranches .init(cpu->number_of_threads) .name(name() + ".EXEC:branches") .desc("Number of branches executed") - .flags(total) - ; - - issueRate - .name(name() + ".EXEC:rate") - .desc("Inst execution rate") - .flags(total) - ; - issueRate = iewExecutedInsts / cpu->numCycles; + .flags(total); iewExecStoreInsts .name(name() + ".EXEC:stores") .desc("Number of stores executed") - .flags(total) - ; - iewExecStoreInsts = exeRefs - iewExecLoadInsts; -/* - for (int i=0; i<Num_OpClasses; ++i) { - stringstream subname; - subname << opClassStrings[i] << "_delay"; - issue_delay_dist.subname(i, subname.str()); - } -*/ - // - // Other stats - // + .flags(total); + iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts; + + iewExecRate + .name(name() + ".EXEC:rate") + .desc("Inst execution rate") + .flags(total); + + iewExecRate = iewExecutedInsts / cpu->numCycles; iewInstsToCommit .init(cpu->number_of_threads) .name(name() + ".WB:sent") .desc("cumulative count of insts sent to commit") - .flags(total) - ; + .flags(total); writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") - .flags(total) - ; + .flags(total); producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") - .flags(total) - ; + .flags(total); consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") - .flags(total) - ; + .flags(total); wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate = wbPenalized / writebackCount; wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") - .flags(total) - ; + .flags(total); wbFanout = producerInst / consumerInst; wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") - .flags(total) - ; + .flags(total); wbRate = writebackCount / cpu->numCycles; } @@ -299,7 +281,7 @@ DefaultIEW<Impl>::initStage() template<class Impl> void -DefaultIEW<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultIEW<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(IEW, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -307,7 +289,7 @@ DefaultIEW<Impl>::setCPU(FullCPU *cpu_ptr) instQueue.setCPU(cpu_ptr); ldstQueue.setCPU(cpu_ptr); - cpu->activateStage(FullCPU::IEWIdx); + cpu->activateStage(O3CPU::IEWIdx); } template<class Impl> @@ -371,16 +353,23 @@ DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr) } template <class Impl> +bool +DefaultIEW<Impl>::drain() +{ + // IEW is ready to drain at any time. + cpu->signalDrained(); + return true; +} + +template <class Impl> void -DefaultIEW<Impl>::switchOut() +DefaultIEW<Impl>::resume() { - // IEW is ready to switch out at any time. - cpu->signalSwitched(); } template <class Impl> void -DefaultIEW<Impl>::doSwitchOut() +DefaultIEW<Impl>::switchOut() { // Clear any state. switchedOut = true; @@ -423,7 +412,7 @@ DefaultIEW<Impl>::takeOverFrom() updateLSQNextCycle = false; // @todo: Fix hardcoded number - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < issueToExecQueue.getSize(); ++i) { issueToExecQueue.advance(); } } @@ -456,16 +445,7 @@ DefaultIEW<Impl>::squash(unsigned tid) skidBuffer[tid].pop(); } - while (!insts[tid].empty()) { - if (insts[tid].front()->isLoad() || - insts[tid].front()->isStore() ) { - toRename->iewInfo[tid].dispatchedToLSQ++; - } - - toRename->iewInfo[tid].dispatched++; - - insts[tid].pop(); - } + emptyRenameInsts(tid); } template<class Impl> @@ -591,12 +571,12 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; - if (wbNumInst == issueWidth) { + if (wbNumInst == wbWidth) { ++wbCycle; wbNumInst = 0; } - assert(wbCycle < 5); + assert((wbCycle * wbWidth + wbNumInst) < wbMax); } // Add finished instruction to queue to commit. @@ -611,7 +591,7 @@ DefaultIEW<Impl>::validInstsFromRename() unsigned inst_count = 0; for (int i=0; i<fromRename->size; i++) { - if (!fromRename->insts[i]->squashed) + if (!fromRename->insts[i]->isSquashed()) inst_count++; } @@ -799,10 +779,12 @@ DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid) } if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n"); + DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid); dispatchStatus[tid] = Squashing; + emptyRenameInsts(tid); + wroteToTimeBuffer = true; return; } @@ -853,6 +835,22 @@ DefaultIEW<Impl>::sortInsts() template <class Impl> void +DefaultIEW<Impl>::emptyRenameInsts(unsigned tid) +{ + while (!insts[tid].empty()) { + if (insts[tid].front()->isLoad() || + insts[tid].front()->isStore() ) { + toRename->iewInfo[tid].dispatchedToLSQ++; + } + + toRename->iewInfo[tid].dispatched++; + + insts[tid].pop(); + } +} + +template <class Impl> +void DefaultIEW<Impl>::wakeCPU() { cpu->wakeCPU(); @@ -871,7 +869,7 @@ inline void DefaultIEW<Impl>::activateStage() { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::IEWIdx); + cpu->activateStage(O3CPU::IEWIdx); } template <class Impl> @@ -879,7 +877,7 @@ inline void DefaultIEW<Impl>::deactivateStage() { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::IEWIdx); + cpu->deactivateStage(O3CPU::IEWIdx); } template<class Impl> @@ -951,7 +949,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) // Loop through the instructions, putting them in the instruction // queue. for ( ; dis_num_inst < insts_to_add && - dis_num_inst < issueReadWidth; + dis_num_inst < dispatchWidth; ++dis_num_inst) { inst = insts_to_dispatch.front(); @@ -1090,7 +1088,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) instQueue.recordProducer(inst); - exeNop[tid]++; + iewExecutedNop[tid]++; add_to_iq = false; } else if (inst->isExecuted()) { @@ -1203,6 +1201,7 @@ DefaultIEW<Impl>::executeInsts() ++iewExecSquashedInsts; + decrWb(inst->seqNum); continue; } @@ -1365,6 +1364,8 @@ DefaultIEW<Impl>::writebackInsts() } writebackCount[tid]++; } + + decrWb(inst->seqNum); } } @@ -1501,9 +1502,9 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) - exeSwp[thread_number]++; + iewExecutedSwp[thread_number]++; else - iewExecutedInsts++; + iewIewExecutedcutedInsts++; #else iewExecutedInsts++; #endif @@ -1512,13 +1513,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // Control operations // if (inst->isControl()) - exeBranches[thread_number]++; + iewExecutedBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { - exeRefs[thread_number]++; + iewExecutedRefs[thread_number]++; if (inst->isLoad()) { iewExecLoadInsts[thread_number]++; diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index f2c6b8213..a539066f9 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -28,9 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/inst_queue_impl.hh" // Force instantiation of InstructionQueue. -template class InstructionQueue<AlphaSimpleImpl>; +template class InstructionQueue<O3CPUImpl>; diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 60a713020..4c69ca384 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_INST_QUEUE_HH__ @@ -68,7 +69,7 @@ class InstructionQueue { public: //Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; @@ -80,7 +81,7 @@ class InstructionQueue // Typedef of iterator through the list of instructions. typedef typename std::list<DynInstPtr>::iterator ListIt; - friend class Impl::FullCPU; + friend class Impl::O3CPU; /** FU completion event class. */ class FUCompletion : public Event { @@ -125,7 +126,7 @@ class InstructionQueue void resetState(); /** Sets CPU pointer. */ - void setCPU(FullCPU *_cpu) { cpu = _cpu; } + void setCPU(O3CPU *_cpu) { cpu = _cpu; } /** Sets active threads list. */ void setActiveThreads(std::list<unsigned> *at_ptr); @@ -252,7 +253,7 @@ class InstructionQueue ///////////////////////// /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Cache interface. */ MemInterface *dcacheInterface; @@ -474,12 +475,17 @@ class InstructionQueue /** Stat for number of non-speculative instructions removed due to a squash. */ Stats::Scalar<> iqSquashedNonSpecRemoved; + // Also include number of instructions rescheduled and replayed. - /** Distribution of number of instructions in the queue. */ + /** Distribution of number of instructions in the queue. + * @todo: Need to create struct to track the entry time for each + * instruction. */ Stats::VectorDistribution<> queueResDist; /** Distribution of the number of instructions issued. */ Stats::Distribution<> numIssuedDist; - /** Distribution of the cycles it takes to issue an instruction. */ + /** Distribution of the cycles it takes to issue an instruction. + * @todo: Need to create struct to track the ready time for each + * instruction. */ Stats::VectorDistribution<> issueDelayDist; /** Number of times an instruction could not be issued because a @@ -492,8 +498,7 @@ class InstructionQueue /** Number of instructions issued per cycle. */ Stats::Formula issueRate; -// Stats::Formula issue_stores; -// Stats::Formula issue_op_rate; + /** Number of times the FU was busy. */ Stats::Vector<> fuBusy; /** Number of times the FU was busy per instruction issued. */ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 06a052c6f..36e0842be 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include <limits> @@ -125,7 +126,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) maxEntries[i] = part_amt; } - DPRINTF(Fetch, "IQ sharing policy set to Partitioned:" + DPRINTF(IQ, "IQ sharing policy set to Partitioned:" "%i entries per thread.\n",part_amt); } else if (policy == "threshold") { @@ -140,7 +141,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) maxEntries[i] = thresholdIQ; } - DPRINTF(Fetch, "IQ sharing policy set to Threshold:" + DPRINTF(IQ, "IQ sharing policy set to Threshold:" "%i entries per thread.\n",thresholdIQ); } else { assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic," @@ -289,22 +290,7 @@ InstructionQueue<Impl>::regStats() .flags(total) ; issueRate = iqInstsIssued / cpu->numCycles; -/* - issue_stores - .name(name() + ".ISSUE:stores") - .desc("Number of stores issued") - .flags(total) - ; - issue_stores = exe_refs - exe_loads; -*/ -/* - issue_op_rate - .name(name() + ".ISSUE:op_rate") - .desc("Operation issue rate") - .flags(total) - ; - issue_op_rate = issued_ops / numCycles; -*/ + statFuBusy .init(Num_OpClasses) .name(name() + ".ISSUE:fu_full") @@ -701,6 +687,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() int total_issued = 0; while (total_issued < totalWidth && + iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -791,13 +778,14 @@ InstructionQueue<Impl>::scheduleReadyInsts() // complete. ++freeEntries; count[tid]--; - issuing_inst->removeInIQ(); + issuing_inst->clearInIQ(); } else { memDepUnit[tid].issue(issuing_inst); } listOrder.erase(order_it++); statIssuedInstType[tid][op_class]++; + iewStage->incrWb(issuing_inst->seqNum); } else { statFuBusy[op_class]++; fuBusy[tid]++; @@ -1097,7 +1085,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid) // inst will flow through the rest of the pipeline. squashed_inst->setIssued(); squashed_inst->setCanCommit(); - squashed_inst->removeInIQ(); + squashed_inst->clearInIQ(); //Update Thread IQ Count count[squashed_inst->threadNumber]--; diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh new file mode 100755 index 000000000..f8a9dd8cc --- /dev/null +++ b/src/cpu/o3/isa_specific.hh @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#include "cpu/base.hh" + +#if THE_ISA == ALPHA_ISA + #include "cpu/o3/alpha/cpu.hh" + #include "cpu/o3/alpha/impl.hh" + #include "cpu/o3/alpha/params.hh" + #include "cpu/o3/alpha/dyn_inst.hh" +#else + #error "O3CPU doesnt support this ISA" +#endif diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc index de0325920..527947281 100644 --- a/src/cpu/o3/lsq.cc +++ b/src/cpu/o3/lsq.cc @@ -28,11 +28,9 @@ * Authors: Korey Sewell */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQ<AlphaSimpleImpl>; +template class LSQ<O3CPUImpl>; diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index bc4154c85..190734dc2 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -44,7 +44,7 @@ template <class Impl> class LSQ { public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; typedef typename Impl::CPUPol::LSQUnit LSQUnit; @@ -62,10 +62,20 @@ class LSQ { /** Returns the name of the LSQ. */ std::string name() const; + /** Registers statistics of each LSQ unit. */ + void regStats(); + + /** Returns dcache port. + * @todo: Dcache port needs to be moved up to this level for SMT + * to work. For now it just returns the port from one of the + * threads. + */ + Port *getDcachePort() { return &dcachePort; } + /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the IEW stage pointer. */ void setIEW(IEW *iew_ptr); /** Switches out the LSQ. */ @@ -248,6 +258,15 @@ class LSQ { bool willWB(unsigned tid) { return thread[tid].willWB(); } + /** Returns if the cache is currently blocked. */ + bool cacheBlocked() + { return retryTid != -1; } + + /** Sets the retry thread id, indicating that one of the LSQUnits + * tried to access the cache but the cache was blocked. */ + void setRetryTid(int tid) + { retryTid = tid; } + /** Debugging function to print out all instructions. */ void dumpInsts(); /** Debugging function to print out instructions from a specific thread. */ @@ -264,7 +283,49 @@ class LSQ { template <class T> Fault write(RequestPtr req, T &data, int store_idx); - private: + /** DcachePort class for this LSQ. Handles doing the + * communication with the cache/memory. + */ + class DcachePort : public Port + { + protected: + /** Pointer to LSQ. */ + LSQ *lsq; + + public: + /** Default constructor. */ + DcachePort(LSQ *_lsq) + : lsq(_lsq) + { } + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + /** Timing version of receive. Handles writing back and + * completing the load or store that has returned from + * memory. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of the previous send. */ + virtual void recvRetry(); + }; + + /** D-cache port. */ + DcachePort dcachePort; + + protected: /** The LSQ policy for SMT mode. */ LSQPolicy lsqPolicy; @@ -272,7 +333,7 @@ class LSQ { LSQUnit thread[Impl::MaxThreads]; /** The CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; /** The IEW stage pointer. */ IEW *iewStage; @@ -293,6 +354,10 @@ class LSQ { /** Number of Threads. */ unsigned numThreads; + + /** The thread id of the LSQ Unit that is currently waiting for a + * retry. */ + int retryTid; }; template <class Impl> diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 27aa0dc3c..4e3957029 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 The Regents of The University of Michigan + * Copyright (c) 2005-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,9 +36,53 @@ using namespace std; template <class Impl> +Tick +LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("O3CPU doesn't expect recvStatusChange callback!"); +} + +template <class Impl> +bool +LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt); + return true; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvRetry() +{ + lsq->thread[lsq->retryTid].recvRetry(); + // Speculatively clear the retry Tid. This will get set again if + // the LSQUnit was unable to complete its access. + lsq->retryTid = -1; +} + +template <class Impl> LSQ<Impl>::LSQ(Params *params) - : LQEntries(params->LQEntries), SQEntries(params->SQEntries), - numThreads(params->numberOfThreads) + : dcachePort(this), LQEntries(params->LQEntries), + SQEntries(params->SQEntries), numThreads(params->numberOfThreads), + retryTid(-1) { DPRINTF(LSQ, "Creating LSQ object.\n"); @@ -94,7 +138,8 @@ LSQ<Impl>::LSQ(Params *params) //Initialize LSQs for (int tid=0; tid < numThreads; tid++) { - thread[tid].init(params, maxLQEntries, maxSQEntries, tid); + thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid); + thread[tid].setDcachePort(&dcachePort); } } @@ -108,6 +153,16 @@ LSQ<Impl>::name() const template<class Impl> void +LSQ<Impl>::regStats() +{ + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].regStats(); + } +} + +template<class Impl> +void LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr) { activeThreads = at_ptr; @@ -116,10 +171,12 @@ LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr) template<class Impl> void -LSQ<Impl>::setCPU(FullCPU *cpu_ptr) +LSQ<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; + dcachePort.setName(name()); + for (int tid=0; tid < numThreads; tid++) { thread[tid].setCPU(cpu_ptr); } @@ -492,6 +549,9 @@ LSQ<Impl>::hasStoresToWB() { list<unsigned>::iterator active_threads = (*activeThreads).begin(); + if ((*activeThreads).empty()) + return false; + while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; if (!hasStoresToWB(tid)) diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index e935ffa5c..3ca3fa667 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -29,11 +29,9 @@ * Korey Sewell */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_unit_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQUnit<AlphaSimpleImpl>; +template class LSQUnit<O3CPUImpl>; diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index ce0cdd36f..a76a73f0c 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -40,7 +40,7 @@ #include "config/full_system.hh" #include "base/hashmap.hh" #include "cpu/inst_seq.hh" -#include "mem/packet.hh" +#include "mem/packet_impl.hh" #include "mem/port.hh" /** @@ -61,9 +61,10 @@ class LSQUnit { typedef TheISA::IntReg IntReg; public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::LSQ LSQ; typedef typename Impl::CPUPol::IssueStruct IssueStruct; public: @@ -71,19 +72,26 @@ class LSQUnit { LSQUnit(); /** Initializes the LSQ unit with the specified number of entries. */ - void init(Params *params, unsigned maxLQEntries, + void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id); /** Returns the name of the LSQ unit. */ std::string name() const; + /** Registers statistics. */ + void regStats(); + /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the IEW stage pointer. */ void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } + /** Sets the pointer to the dcache port. */ + void setDcachePort(Port *dcache_port) + { dcachePort = dcache_port; } + /** Switches out LSQ unit. */ void switchOut(); @@ -125,11 +133,10 @@ class LSQUnit { /** Writes back stores. */ void writebackStores(); + /** Completes the data access that has been returned from the + * memory system. */ void completeDataAccess(PacketPtr pkt); - // @todo: Include stats in the LSQ unit. - //void regStats(); - /** Clears all the entries in the LQ. */ void clearLQ(); @@ -204,6 +211,9 @@ class LSQUnit { !storeQueue[storeWBIdx].completed && !isStoreBlocked; } + /** Handles doing the retry. */ + void recvRetry(); + private: /** Writes back the instruction, sending it to IEW. */ void writeback(DynInstPtr &inst, PacketPtr pkt); @@ -214,9 +224,6 @@ class LSQUnit { /** Completes the store at the specified index. */ void completeStore(int store_idx); - /** Handles doing the retry. */ - void recvRetry(); - /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx); /** Decrements the given store index (circular queue). */ @@ -232,59 +239,16 @@ class LSQUnit { private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Pointer to the IEW stage. */ IEW *iewStage; - /** Pointer to memory object. */ - MemObject *mem; - - /** DcachePort class for this LSQ Unit. Handles doing the - * communication with the cache/memory. - * @todo: Needs to be moved to the LSQ level and have some sort - * of arbitration. - */ - class DcachePort : public Port - { - protected: - /** Pointer to CPU. */ - FullCPU *cpu; - /** Pointer to LSQ. */ - LSQUnit *lsq; - - public: - /** Default constructor. */ - DcachePort(FullCPU *_cpu, LSQUnit *_lsq) - : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) - { } - - protected: - /** Atomic version of receive. Panics. */ - virtual Tick recvAtomic(PacketPtr pkt); + /** Pointer to the LSQ. */ + LSQ *lsq; - /** Functional version of receive. Panics. */ - virtual void recvFunctional(PacketPtr pkt); - - /** Receives status change. Other than range changing, panics. */ - virtual void recvStatusChange(Status status); - - /** Returns the address ranges of this device. */ - virtual void getDeviceAddressRanges(AddrRangeList &resp, - AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - - /** Timing version of receive. Handles writing back and - * completing the load or store that has returned from - * memory. */ - virtual bool recvTiming(PacketPtr pkt); - - /** Handles doing a retry of the previous send. */ - virtual void recvRetry(); - }; - - /** Pointer to the D-cache. */ - DcachePort *dcachePort; + /** Pointer to the dcache port. Used only for sending. */ + Port *dcachePort; /** Derived class to hold any sender state the LSQ needs. */ class LSQSenderState : public Packet::SenderState @@ -443,25 +407,35 @@ class LSQUnit { // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. -/* - // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; - // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; + /** Total number of loads forwaded from LSQ stores. */ + Stats::Scalar<> lsqForwLoads; + + /** Total number of loads ignored due to invalid addresses. */ + Stats::Scalar<> invAddrLoads; - // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; + /** Total number of squashed loads. */ + Stats::Scalar<> lsqSquashedLoads; - // total non-speculative bogus addresses seen (debug var) - Counter sim_invalid_addrs; - Stats::Vector<> fu_busy; //cumulative fu busy + /** Total number of responses from the memory system that are + * ignored due to the instruction already being squashed. */ + Stats::Scalar<> lsqIgnoredResponses; - // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; + /** Total number of squashed stores. */ + Stats::Scalar<> lsqSquashedStores; + + /** Total number of software prefetches ignored due to invalid addresses. */ + Stats::Scalar<> invAddrSwpfs; + + /** Ready loads blocked due to partial store-forwarding. */ + Stats::Scalar<> lsqBlockedLoads; + + /** Number of loads that were rescheduled. */ + Stats::Scalar<> lsqRescheduledLoads; + + /** Number of times the LSQ is blocked due to the cache. */ + Stats::Scalar<> lsqCacheBlocked; - Stats::Scalar<> lsqInversion; -*/ public: /** Executes the load at the given index. */ template <class T> @@ -517,8 +491,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). if (req->getFlags() & UNCACHEABLE && - (load_idx != loadHead || !load_inst->reachedCommit)) { + (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; return TheISA::genMachineCheckFault(); } @@ -598,7 +573,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // @todo: Need to make this a parameter. wb->schedule(curTick); - // Should keep track of stat for forwarded data + ++lsqForwLoads; return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || (store_has_upper_limit && upper_load_has_store_part) || @@ -626,6 +601,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not // complete. @@ -633,12 +609,13 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) "Store idx %i to load addr %#x\n", store_idx, req->getVaddr()); + ++lsqBlockedLoads; return NoFault; } } // If there's no forwarding case, then go access memory - DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", load_inst->seqNum, load_inst->readPC()); assert(!load_inst->memData); @@ -646,9 +623,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) ++usedPorts; - DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", - load_inst->readPC()); - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); @@ -658,8 +632,19 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) state->inst = load_inst; data_pkt->senderState = state; - // if we have a cache, do cache access too - if (!dcachePort->sendTiming(data_pkt)) { + // if we the cache is not blocked, do cache access + if (!lsq->cacheBlocked()) { + if (!dcachePort->sendTiming(data_pkt)) { + // If the access didn't succeed, tell the LSQ by setting + // the retry thread id. + lsq->setRetryTid(lsqID); + } + } + + // If the cache was blocked, or has become blocked due to the access, + // handle it. + if (lsq->cacheBlocked()) { + ++lsqCacheBlocked; // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) return NoFault; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 6f32ec304..85b150cd9 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -29,11 +29,18 @@ * Korey Sewell */ -#include "cpu/checker/cpu.hh" +#include "config/use_checker.hh" + +#include "cpu/o3/lsq.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" +#include "mem/packet.hh" #include "mem/request.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + template<class Impl> LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, LSQUnit *lsq_ptr) @@ -71,6 +78,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); if (isSwitchedOut() || inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); delete state; delete pkt; return; @@ -89,46 +97,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) } template <class Impl> -Tick -LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt) -{ - panic("O3CPU model does not work with atomic mode!"); - return curTick; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt) -{ - panic("O3CPU doesn't expect recvFunctional callback!"); -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvStatusChange(Status status) -{ - if (status == RangeChange) - return; - - panic("O3CPU doesn't expect recvStatusChange callback!"); -} - -template <class Impl> -bool -LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt) -{ - lsq->completeDataAccess(pkt); - return true; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvRetry() -{ - lsq->recvRetry(); -} - -template <class Impl> LSQUnit<Impl>::LSQUnit() : loads(0), stores(0), storesToWB(0), stalled(false), isStoreBlocked(false), isLoadBlocked(false), @@ -138,13 +106,15 @@ LSQUnit<Impl>::LSQUnit() template<class Impl> void -LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, +LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id) { DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); switchedOut = false; + lsq = lsq_ptr; + lsqID = id; // Add 1 for the sentinel entry (they are circular queues). @@ -161,8 +131,6 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, usedPorts = 0; cachePorts = params->cachePorts; - mem = params->mem; - memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -170,18 +138,15 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, template<class Impl> void -LSQUnit<Impl>::setCPU(FullCPU *cpu_ptr) +LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; - dcachePort = new DcachePort(cpu, this); - - Port *mem_dport = mem->getPort(""); - dcachePort->setPeer(mem_dport); - mem_dport->setPeer(dcachePort); +#if USE_CHECKER if (cpu->checker) { cpu->checker->setDcachePort(dcachePort); } +#endif } template<class Impl> @@ -197,6 +162,47 @@ LSQUnit<Impl>::name() const template<class Impl> void +LSQUnit<Impl>::regStats() +{ + lsqForwLoads + .name(name() + ".forwLoads") + .desc("Number of loads that had data forwarded from stores"); + + invAddrLoads + .name(name() + ".invAddrLoads") + .desc("Number of loads ignored due to an invalid address"); + + lsqSquashedLoads + .name(name() + ".squashedLoads") + .desc("Number of loads squashed"); + + lsqIgnoredResponses + .name(name() + ".ignoredResponses") + .desc("Number of memory responses ignored because the instruction is squashed"); + + lsqSquashedStores + .name(name() + ".squashedStores") + .desc("Number of stores squashed"); + + invAddrSwpfs + .name(name() + ".invAddrSwpfs") + .desc("Number of software prefetches ignored due to an invalid address"); + + lsqBlockedLoads + .name(name() + ".blockedLoads") + .desc("Number of blocked loads due to partial load-store forwarding"); + + lsqRescheduledLoads + .name(name() + ".rescheduledLoads") + .desc("Number of loads that were rescheduled"); + + lsqCacheBlocked + .name(name() + ".cacheBlocked") + .desc("Number of times an access to memory failed due to the cache being blocked"); +} + +template<class Impl> +void LSQUnit<Impl>::clearLQ() { loadQueue.clear(); @@ -542,7 +548,7 @@ LSQUnit<Impl>::writebackStores() storeQueue[storeWBIdx].canWB && usedPorts < cachePorts) { - if (isStoreBlocked) { + if (isStoreBlocked || lsq->cacheBlocked()) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" " is blocked!\n"); break; @@ -617,7 +623,7 @@ LSQUnit<Impl>::writebackStores() if (!dcachePort->sendTiming(data_pkt)) { // Need to handle becoming blocked on a store. isStoreBlocked = true; - + ++lsqCacheBlocked; assert(retryPkt == NULL); retryPkt = data_pkt; } else { @@ -668,7 +674,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - loadQueue[load_idx]->squashed = true; + loadQueue[load_idx]->setSquashed(); loadQueue[load_idx] = NULL; --loads; @@ -676,6 +682,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) loadTail = load_idx; decrLdIdx(load_idx); + ++lsqSquashedLoads; } if (isLoadBlocked) { @@ -711,7 +718,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst->setSquashed(); storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; @@ -722,6 +729,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) storeTail = store_idx; decrStIdx(store_idx); + ++lsqSquashedStores; } } @@ -744,9 +752,11 @@ LSQUnit<Impl>::storePostSend(Packet *pkt) // only works so long as the checker doesn't try to // verify the value in memory for stores. storeQueue[storeWBIdx].inst->setCompleted(); +#if USE_CHECKER if (cpu->checker) { - cpu->checker->tick(storeQueue[storeWBIdx].inst); + cpu->checker->verify(storeQueue[storeWBIdx].inst); } +#endif } if (pkt->result != Packet::Success) { @@ -781,6 +791,7 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) // Squashed instructions do not need to complete their access. if (inst->isSquashed()) { assert(!inst->isStore()); + ++lsqIgnoredResponses; return; } @@ -839,9 +850,11 @@ LSQUnit<Impl>::completeStore(int store_idx) // Tell the checker we've completed this instruction. Some stores // may get reported twice to the checker, but the checker can // handle that case. +#if USE_CHECKER if (cpu->checker) { - cpu->checker->tick(storeQueue[store_idx].inst); + cpu->checker->verify(storeQueue[store_idx].inst); } +#endif } template <class Impl> @@ -857,6 +870,8 @@ LSQUnit<Impl>::recvRetry() isStoreBlocked = false; } else { // Still blocked! + ++lsqCacheBlocked; + lsq->setRetryTid(lsqID); } } else if (isLoadBlocked) { DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index a95103266..6a14dcbff 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -28,23 +28,22 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/store_set.hh" #include "cpu/o3/mem_dep_unit_impl.hh" // Force instantation of memory dependency unit using store sets and -// AlphaSimpleImpl. -template class MemDepUnit<StoreSet, AlphaSimpleImpl>; +// O3CPUImpl. +template class MemDepUnit<StoreSet, O3CPUImpl>; #ifdef DEBUG template <> int -MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0; +MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_count = 0; template <> int -MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0; +MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_insert = 0; template <> int -MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0; +MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_erase = 0; #endif diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/params.hh index f3cf36887..1c234bcd7 100644..100755 --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/params.hh @@ -28,32 +28,28 @@ * Authors: Kevin Lim */ -#ifndef __CPU_O3_ALPHA_PARAMS_HH__ -#define __CPU_O3_ALPHA_PARAMS_HH__ +#ifndef __CPU_O3_PARAMS_HH__ +#define __CPU_O3_PARAMS_HH__ #include "cpu/o3/cpu.hh" //Forward declarations -class AlphaDTB; -class AlphaITB; class FUPool; -class MemObject; -class Process; -class System; /** - * This file defines the parameters that will be used for the AlphaFullCPU. + * This file defines the parameters that will be used for the O3CPU. * This must be defined externally so that the Impl can have a params class * defined that it can pass to all of the individual stages. */ - -class AlphaSimpleParams : public BaseFullCPU::Params +class O3Params : public BaseO3CPU::Params { public: + unsigned activity; -#if FULL_SYSTEM - AlphaITB *itb; AlphaDTB *dtb; -#else + // + // Pointers to key objects + // +#if !FULL_SYSTEM std::vector<Process *> workload; Process *process; #endif // FULL_SYSTEM @@ -62,13 +58,11 @@ class AlphaSimpleParams : public BaseFullCPU::Params BaseCPU *checker; - unsigned activity; - // // Caches // -// MemInterface *icacheInterface; -// MemInterface *dcacheInterface; + // MemInterface *icacheInterface; + // MemInterface *dcacheInterface; unsigned cachePorts; @@ -104,12 +98,10 @@ class AlphaSimpleParams : public BaseFullCPU::Params unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; + unsigned dispatchWidth; unsigned issueWidth; - unsigned executeWidth; - unsigned executeIntWidth; - unsigned executeFloatWidth; - unsigned executeBranchWidth; - unsigned executeMemoryWidth; + unsigned wbWidth; + unsigned wbDepth; FUPool *fuPool; // @@ -123,6 +115,12 @@ class AlphaSimpleParams : public BaseFullCPU::Params Tick fetchTrapLatency; // + // Timebuffer sizes + // + unsigned backComSize; + unsigned forwardComSize; + + // // Branch predictor (BP, BTB, RAS) // std::string predType; diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index ade5e4e56..b6677b4b1 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -72,7 +72,7 @@ class PhysRegFile // Will make these registers public for now, but they probably should // be private eventually with some accessor functions. public: - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; /** * Constructs a physical register file with the specified amount of @@ -86,10 +86,6 @@ class PhysRegFile //The duplication is unfortunate but it's better than having //different ways to access certain registers. - //Add these in later when everything else is in place -// void serialize(std::ostream &os); -// void unserialize(Checkpoint *cp, const std::string §ion); - /** Reads an integer register. */ uint64_t readIntReg(PhysRegIndex reg_idx) { @@ -278,11 +274,11 @@ class PhysRegFile private: /** CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; public: /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; } + void setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; } /** Number of physical integer registers. */ unsigned numPhysicalIntRegs; diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index 9ca8e82c6..443ada0cb 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/rename_impl.hh" -template class DefaultRename<AlphaSimpleImpl>; +template class DefaultRename<O3CPUImpl>; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 42fdf6bf5..034087feb 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -55,7 +55,7 @@ class DefaultRename // Typedefs from the Impl. typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; // Typedefs from the CPUPol @@ -115,7 +115,7 @@ class DefaultRename void regStats(); /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); @@ -157,12 +157,15 @@ class DefaultRename /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *_scoreboard); + /** Drains the rename stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the rename stage. */ void switchOut(); - /** Completes the switch out. */ - void doSwitchOut(); - /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -291,7 +294,7 @@ class DefaultRename std::list<RenameHistory> historyBuffer[Impl::MaxThreads]; /** Pointer to CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Pointer to main time buffer used for backwards communication. */ TimeBuffer<TimeStruct> *timeBuffer; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index df33b98ee..805a72808 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -162,7 +162,7 @@ DefaultRename<Impl>::regStats() template <class Impl> void -DefaultRename<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultRename<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Rename, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -257,16 +257,17 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard) } template <class Impl> -void -DefaultRename<Impl>::switchOut() +bool +DefaultRename<Impl>::drain() { // Rename is ready to switch out at any time. - cpu->signalSwitched(); + cpu->signalDrained(); + return true; } template <class Impl> void -DefaultRename<Impl>::doSwitchOut() +DefaultRename<Impl>::switchOut() { // Clear any state, fix up the rename map. for (int i = 0; i < numThreads; i++) { @@ -341,7 +342,7 @@ DefaultRename<Impl>::squash(unsigned tid) for (int i=0; i<fromDecode->size; i++) { if (fromDecode->insts[i]->threadNumber == tid) { - fromDecode->insts[i]->squashed = true; + fromDecode->insts[i]->setSquashed(); wroteToTimeBuffer = true; squashCount++; } @@ -755,7 +756,7 @@ DefaultRename<Impl>::updateStatus() DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::RenameIdx); + cpu->activateStage(O3CPU::RenameIdx); } } else { // If it's not unblocking, then rename will not have any internal @@ -764,7 +765,7 @@ DefaultRename<Impl>::updateStatus() _status = Inactive; DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::RenameIdx); + cpu->deactivateStage(O3CPU::RenameIdx); } } } @@ -1022,7 +1023,7 @@ DefaultRename<Impl>::validInsts() unsigned inst_count = 0; for (int i=0; i<fromDecode->size; i++) { - if (!fromDecode->insts[i]->squashed) + if (!fromDecode->insts[i]->isSquashed()) inst_count++; } @@ -1206,7 +1207,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid) } DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename." - " Adding to front of list.", tid); + " Adding to front of list.\n", tid); serializeInst[tid] = NULL; diff --git a/src/cpu/o3/rob.cc b/src/cpu/o3/rob.cc index f99e5ccfd..9976049cd 100644 --- a/src/cpu/o3/rob.cc +++ b/src/cpu/o3/rob.cc @@ -29,9 +29,8 @@ * Nathan Binkert */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/rob_impl.hh" // Force instantiation of InstructionQueue. -template class ROB<AlphaSimpleImpl>; +template class ROB<O3CPUImpl>; diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index 6d1402531..7cd5a5143 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_ROB_HH__ @@ -45,7 +46,7 @@ class ROB typedef TheISA::RegIndex RegIndex; public: //Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo; @@ -90,7 +91,7 @@ class ROB * is created within. * @param cpu_ptr Pointer to the implementation specific full CPU object. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets pointer to the list of active threads. * @param at_ptr Pointer to the list of active threads. @@ -257,7 +258,7 @@ class ROB private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Active Threads in CPU */ std::list<unsigned>* activeThreads; @@ -307,7 +308,7 @@ class ROB private: /** The sequence number of the squashed instruction. */ - InstSeqNum squashedSeqNum; + InstSeqNum squashedSeqNum[Impl::MaxThreads]; /** Is the ROB done squashing. */ bool doneSquashing[Impl::MaxThreads]; diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 97694e371..1b9f666b8 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "config/full_system.hh" @@ -40,10 +41,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth, : numEntries(_numEntries), squashWidth(_squashWidth), numInstsInROB(0), - squashedSeqNum(0), numThreads(_numThreads) { for (int tid=0; tid < numThreads; tid++) { + squashedSeqNum[tid] = 0; doneSquashing[tid] = true; threadEntries[tid] = 0; } @@ -100,7 +101,7 @@ ROB<Impl>::name() const template <class Impl> void -ROB<Impl>::setCPU(FullCPU *cpu_ptr) +ROB<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; @@ -276,7 +277,7 @@ ROB<Impl>::retireHead(unsigned tid) --numInstsInROB; --threadEntries[tid]; - head_inst->removeInROB(); + head_inst->clearInROB(); head_inst->setCommitted(); instList[tid].erase(head_it); @@ -351,11 +352,11 @@ void ROB<Impl>::doSquash(unsigned tid) { DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n", - tid, squashedSeqNum); + tid, squashedSeqNum[tid]); assert(squashIt[tid] != instList[tid].end()); - if ((*squashIt[tid])->seqNum < squashedSeqNum) { + if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -370,7 +371,7 @@ ROB<Impl>::doSquash(unsigned tid) for (int numSquashed = 0; numSquashed < squashWidth && squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum; + (*squashIt[tid])->seqNum > squashedSeqNum[tid]; ++numSquashed) { DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n", @@ -407,7 +408,7 @@ ROB<Impl>::doSquash(unsigned tid) // Check if ROB is done squashing. - if ((*squashIt[tid])->seqNum <= squashedSeqNum) { + if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -519,7 +520,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid) doneSquashing[tid] = false; - squashedSeqNum = squash_num; + squashedSeqNum[tid] = squash_num; if (!instList[tid].empty()) { InstIt tail_thread = instList[tid].end(); @@ -543,6 +544,7 @@ ROB<Impl>::readHeadInst() } } */ + template <class Impl> typename Impl::DynInstPtr ROB<Impl>::readHeadInst(unsigned tid) @@ -557,6 +559,7 @@ ROB<Impl>::readHeadInst(unsigned tid) return dummyInst; } } + /* template <class Impl> uint64_t diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh new file mode 100755 index 000000000..df8d1a6d8 --- /dev/null +++ b/src/cpu/o3/thread_context.hh @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_THREAD_CONTEXT_HH__ +#define __CPU_O3_THREAD_CONTEXT_HH__ + +#include "cpu/o3/isa_specific.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * Derived ThreadContext class for use with the O3CPU. It + * provides the interface for any external objects to access a + * single thread's state and some general CPU state. Any time + * external objects try to update state through this interface, + * the CPU will create an event to squash all in-flight + * instructions in order to ensure state is maintained correctly. + * It must be defined specifically for the O3CPU because + * not all architectural state is located within the O3ThreadState + * (such as the commit PC, and registers), and specific actions + * must be taken when using this interface (such as squashing all + * in-flight instructions when doing a write to this interface). + */ +template <class Impl> +class O3ThreadContext : public ThreadContext +{ + public: + typedef typename Impl::O3CPU O3CPU; + + /** Pointer to the CPU. */ + O3CPU *cpu; + + /** Pointer to the thread state that this TC corrseponds to. */ + O3ThreadState<Impl> *thread; + + /** Returns a pointer to this CPU. */ + virtual BaseCPU *getCpuPtr() { return cpu; } + + /** Sets this CPU's ID. */ + virtual void setCpuId(int id) { cpu->setCpuId(id); } + + /** Reads this CPU's ID. */ + virtual int readCpuId() { return cpu->readCpuId(); } + +#if FULL_SYSTEM + /** Returns a pointer to the system. */ + virtual System *getSystemPtr() { return cpu->system; } + + /** Returns a pointer to physical memory. */ + virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } + + /** Returns a pointer to this thread's kernel statistics. */ + virtual Kernel::Statistics *getKernelStats() + { return thread->kernelStats; } + + virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } + + virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL); + + void delVirtPort(VirtualPort *vp); +#else + virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } + + /** Returns a pointer to this thread's process. */ + virtual Process *getProcessPtr() { return thread->getProcessPtr(); } +#endif + /** Returns this thread's status. */ + virtual Status status() const { return thread->status(); } + + /** Sets this thread's status. */ + virtual void setStatus(Status new_status) + { thread->setStatus(new_status); } + + /** Set the status to Active. Optional delay indicates number of + * cycles to wait before beginning execution. */ + virtual void activate(int delay = 1); + + /** Set the status to Suspended. */ + virtual void suspend(); + + /** Set the status to Unallocated. */ + virtual void deallocate(int delay = 0); + + /** Set the status to Halted. */ + virtual void halt(); + +#if FULL_SYSTEM + /** Dumps the function profiling information. + * @todo: Implement. + */ + virtual void dumpFuncProfile(); +#endif + /** Takes over execution of a thread from another CPU. */ + virtual void takeOverFrom(ThreadContext *old_context); + + /** Registers statistics associated with this TC. */ + virtual void regStats(const std::string &name); + + /** Serializes state. */ + virtual void serialize(std::ostream &os); + /** Unserializes state. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#if FULL_SYSTEM + /** Reads the last tick that this thread was activated on. */ + virtual Tick readLastActivate(); + /** Reads the last tick that this thread was suspended on. */ + virtual Tick readLastSuspend(); + + /** Clears the function profiling information. */ + virtual void profileClear(); + /** Samples the function profiling information. */ + virtual void profileSample(); +#endif + /** Returns this thread's ID number. */ + virtual int getThreadNum() { return thread->readTid(); } + + /** Returns the instruction this thread is currently committing. + * Only used when an instruction faults. + */ + virtual TheISA::MachInst getInst(); + + /** Copies the architectural registers from another TC into this TC. */ + virtual void copyArchRegs(ThreadContext *tc); + + /** Resets all architectural registers to 0. */ + virtual void clearArchRegs(); + + /** Reads an integer register. */ + virtual uint64_t readIntReg(int reg_idx); + + virtual FloatReg readFloatReg(int reg_idx, int width); + + virtual FloatReg readFloatReg(int reg_idx); + + virtual FloatRegBits readFloatRegBits(int reg_idx, int width); + + virtual FloatRegBits readFloatRegBits(int reg_idx); + + /** Sets an integer register to a value. */ + virtual void setIntReg(int reg_idx, uint64_t val); + + virtual void setFloatReg(int reg_idx, FloatReg val, int width); + + virtual void setFloatReg(int reg_idx, FloatReg val); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val); + + /** Reads this thread's PC. */ + virtual uint64_t readPC() + { return cpu->readPC(thread->readTid()); } + + /** Sets this thread's PC. */ + virtual void setPC(uint64_t val); + + /** Reads this thread's next PC. */ + virtual uint64_t readNextPC() + { return cpu->readNextPC(thread->readTid()); } + + /** Sets this thread's next PC. */ + virtual void setNextPC(uint64_t val); + + /** Reads a miscellaneous register. */ + virtual MiscReg readMiscReg(int misc_reg) + { return cpu->readMiscReg(misc_reg, thread->readTid()); } + + /** Reads a misc. register, including any side-effects the + * read might have as defined by the architecture. */ + virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); } + + /** Sets a misc. register. */ + virtual Fault setMiscReg(int misc_reg, const MiscReg &val); + + /** Sets a misc. register, including any side-effects the + * write might have as defined by the architecture. */ + virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + + /** Returns the number of consecutive store conditional failures. */ + // @todo: Figure out where these store cond failures should go. + virtual unsigned readStCondFailures() + { return thread->storeCondFailures; } + + /** Sets the number of consecutive store conditional failures. */ + virtual void setStCondFailures(unsigned sc_failures) + { thread->storeCondFailures = sc_failures; } + + // Only really makes sense for old CPU model. Lots of code + // outside the CPU still checks this function, so it will + // always return false to keep everything working. + /** Checks if the thread is misspeculating. Because it is + * very difficult to determine if the thread is + * misspeculating, this is set as false. */ + virtual bool misspeculating() { return false; } + +#if !FULL_SYSTEM + /** Gets a syscall argument by index. */ + virtual IntReg getSyscallArg(int i); + + /** Sets a syscall argument. */ + virtual void setSyscallArg(int i, IntReg val); + + /** Sets the syscall return value. */ + virtual void setSyscallReturn(SyscallReturn return_value); + + /** Executes a syscall in SE mode. */ + virtual void syscall(int64_t callnum) + { return cpu->syscall(callnum, thread->readTid()); } + + /** Reads the funcExeInst counter. */ + virtual Counter readFuncExeInst() { return thread->funcExeInst; } +#endif +}; + +#endif diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh new file mode 100755 index 000000000..bf8cbf850 --- /dev/null +++ b/src/cpu/o3/thread_context_impl.hh @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/quiesce_event.hh" + +using namespace TheISA; + +#if FULL_SYSTEM +template <class Impl> +VirtualPort * +O3ThreadContext<Impl>::getVirtPort(ThreadContext *src_tc) +{ + if (!src_tc) + return thread->getVirtPort(); + + VirtualPort *vp; + Port *mem_port; + + vp = new VirtualPort("tc-vport", src_tc); + mem_port = cpu->system->physmem->getPort("functional"); + mem_port->setPeer(vp); + vp->setPeer(mem_port); + return vp; +} + +template <class Impl> +void +O3ThreadContext<Impl>::dumpFuncProfile() +{ + // Currently not supported +} +#endif + +template <class Impl> +void +O3ThreadContext<Impl>::takeOverFrom(ThreadContext *old_context) +{ + // some things should already be set up +#if FULL_SYSTEM + assert(getSystemPtr() == old_context->getSystemPtr()); +#else + assert(getProcessPtr() == old_context->getProcessPtr()); +#endif + + // copy over functional state + setStatus(old_context->status()); + copyArchRegs(old_context); + setCpuId(old_context->readCpuId()); + +#if !FULL_SYSTEM + thread->funcExeInst = old_context->readFuncExeInst(); +#else + EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); + if (other_quiesce) { + // Point the quiesce event's TC at this TC so that it wakes up + // the proper CPU. + other_quiesce->tc = this; + } + if (thread->quiesceEvent) { + thread->quiesceEvent->tc = this; + } + + // Transfer kernel stats from one CPU to the other. + thread->kernelStats = old_context->getKernelStats(); +// storeCondFailures = 0; + cpu->lockFlag = false; +#endif + + old_context->setStatus(ThreadContext::Unallocated); + + thread->inSyscall = false; + thread->trapPending = false; +} + +#if FULL_SYSTEM +template <class Impl> +void +O3ThreadContext<Impl>::delVirtPort(VirtualPort *vp) +{ + delete vp->getPeer(); + delete vp; +} +#endif + +template <class Impl> +void +O3ThreadContext<Impl>::activate(int delay) +{ + DPRINTF(O3CPU, "Calling activate on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Active) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; +#endif + + if (thread->status() == ThreadContext::Unallocated) { + cpu->activateWhenReady(thread->readTid()); + return; + } + + thread->setStatus(ThreadContext::Active); + + // status() == Suspended + cpu->activateContext(thread->readTid(), delay); +} + +template <class Impl> +void +O3ThreadContext<Impl>::suspend() +{ + DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Suspended) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; + thread->lastSuspend = curTick; +#endif +/* +#if FULL_SYSTEM + // Don't change the status from active if there are pending interrupts + if (cpu->check_interrupts()) { + assert(status() == ThreadContext::Active); + return; + } +#endif +*/ + thread->setStatus(ThreadContext::Suspended); + cpu->suspendContext(thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::deallocate(int delay) +{ + DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Unallocated) + return; + + thread->setStatus(ThreadContext::Unallocated); + cpu->deallocateContext(thread->readTid(), delay); +} + +template <class Impl> +void +O3ThreadContext<Impl>::halt() +{ + DPRINTF(O3CPU, "Calling halt on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Halted) + return; + + thread->setStatus(ThreadContext::Halted); + cpu->haltContext(thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::regStats(const std::string &name) +{ +#if FULL_SYSTEM + thread->kernelStats = new Kernel::Statistics(cpu->system); + thread->kernelStats->regStats(name + ".kern"); +#endif +} + +template <class Impl> +void +O3ThreadContext<Impl>::serialize(std::ostream &os) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->serialize(os); +#endif + +} + +template <class Impl> +void +O3ThreadContext<Impl>::unserialize(Checkpoint *cp, const std::string §ion) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->unserialize(cp, section); +#endif + +} + +#if FULL_SYSTEM +template <class Impl> +Tick +O3ThreadContext<Impl>::readLastActivate() +{ + return thread->lastActivate; +} + +template <class Impl> +Tick +O3ThreadContext<Impl>::readLastSuspend() +{ + return thread->lastSuspend; +} + +template <class Impl> +void +O3ThreadContext<Impl>::profileClear() +{} + +template <class Impl> +void +O3ThreadContext<Impl>::profileSample() +{} +#endif + +template <class Impl> +TheISA::MachInst +O3ThreadContext<Impl>:: getInst() +{ + return thread->getInst(); +} + +template <class Impl> +void +O3ThreadContext<Impl>::copyArchRegs(ThreadContext *tc) +{ + // This function will mess things up unless the ROB is empty and + // there are no instructions in the pipeline. + unsigned tid = thread->readTid(); + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < TheISA::NumIntRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i); + + DPRINTF(O3CPU, "Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, cpu->readIntReg(renamed_reg), + tc->readIntReg(i)); + + cpu->setIntReg(renamed_reg, tc->readIntReg(i)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < TheISA::NumFloatRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i + TheISA::FP_Base_DepTag); + cpu->setFloatRegBits(renamed_reg, + tc->readFloatRegBits(i)); + } + + // Copy the misc regs. + copyMiscRegs(tc, this); + + // Then finally set the PC and the next PC. + cpu->setPC(tc->readPC(), tid); + cpu->setNextPC(tc->readNextPC(), tid); +#if !FULL_SYSTEM + this->thread->funcExeInst = tc->readFuncExeInst(); +#endif +} + +template <class Impl> +void +O3ThreadContext<Impl>::clearArchRegs() +{} + +template <class Impl> +uint64_t +O3ThreadContext<Impl>::readIntReg(int reg_idx) +{ + return cpu->readArchIntReg(reg_idx, thread->readTid()); +} + +template <class Impl> +FloatReg +O3ThreadContext<Impl>::readFloatReg(int reg_idx, int width) +{ + switch(width) { + case 32: + return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); + case 64: + return cpu->readArchFloatRegDouble(reg_idx, thread->readTid()); + default: + panic("Unsupported width!"); + return 0; + } +} + +template <class Impl> +FloatReg +O3ThreadContext<Impl>::readFloatReg(int reg_idx) +{ + return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); +} + +template <class Impl> +FloatRegBits +O3ThreadContext<Impl>::readFloatRegBits(int reg_idx, int width) +{ + DPRINTF(Fault, "Reading floatint register through the TC!\n"); + return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); +} + +template <class Impl> +FloatRegBits +O3ThreadContext<Impl>::readFloatRegBits(int reg_idx) +{ + return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::setIntReg(int reg_idx, uint64_t val) +{ + cpu->setArchIntReg(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatReg(int reg_idx, FloatReg val, int width) +{ + switch(width) { + case 32: + cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); + break; + case 64: + cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid()); + break; + } + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatReg(int reg_idx, FloatReg val) +{ + cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); + + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val, + int width) +{ + DPRINTF(Fault, "Setting floatint register through the TC!\n"); + cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val) +{ + cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setPC(uint64_t val) +{ + cpu->setPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setNextPC(uint64_t val) +{ + cpu->setNextPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +Fault +O3ThreadContext<Impl>::setMiscReg(int misc_reg, const MiscReg &val) +{ + Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } + + return ret_fault; +} + +template <class Impl> +Fault +O3ThreadContext<Impl>::setMiscRegWithEffect(int misc_reg, + const MiscReg &val) +{ + Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, + thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } + + return ret_fault; +} + +#if !FULL_SYSTEM + +template <class Impl> +TheISA::IntReg +O3ThreadContext<Impl>::getSyscallArg(int i) +{ + return cpu->getSyscallArg(i, thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::setSyscallArg(int i, IntReg val) +{ + cpu->setSyscallArg(i, val, thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::setSyscallReturn(SyscallReturn return_value) +{ + cpu->setSyscallReturn(return_value, thread->readTid()); +} + +#endif // FULL_SYSTEM + diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh index b6535baa1..1c8105204 100644 --- a/src/cpu/o3/thread_state.hh +++ b/src/cpu/o3/thread_state.hh @@ -58,11 +58,11 @@ class Process; template <class Impl> struct O3ThreadState : public ThreadState { typedef ThreadContext::Status Status; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; public: /** Whether or not the thread is currently in syscall mode, and * thus able to be externally updated without squashing. @@ -75,14 +75,14 @@ struct O3ThreadState : public ThreadState { bool trapPending; #if FULL_SYSTEM - O3ThreadState(FullCPU *_cpu, int _thread_num) + O3ThreadState(O3CPU *_cpu, int _thread_num) : ThreadState(-1, _thread_num), inSyscall(0), trapPending(0) { } #else - O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid, + O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject *mem) - : ThreadState(-1, _thread_num, mem, _process, _asid), + : ThreadState(-1, _thread_num, _process, _asid, mem), cpu(_cpu), inSyscall(0), trapPending(0) { } #endif diff --git a/src/cpu/ozone/base_dyn_inst.cc b/src/cpu/ozone/base_dyn_inst.cc new file mode 100644 index 000000000..5a3a69dff --- /dev/null +++ b/src/cpu/ozone/base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +// Explicit instantiation +template class BaseDynInst<OzoneImpl>; + +template <> +int +BaseDynInst<OzoneImpl>::instcount = 0; diff --git a/src/cpu/ozone/bpred_unit.cc b/src/cpu/ozone/bpred_unit.cc new file mode 100644 index 000000000..c823f5e80 --- /dev/null +++ b/src/cpu/ozone/bpred_unit.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/o3/bpred_unit_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +//#include "cpu/ozone/simple_impl.hh" + +template class BPredUnit<OzoneImpl>; +//template class BPredUnit<SimpleImpl>; diff --git a/src/cpu/checker/cpu_builder.cc b/src/cpu/ozone/checker_builder.cc index 3b7583294..c372e51d6 100644 --- a/src/cpu/checker/cpu_builder.cc +++ b/src/cpu/ozone/checker_builder.cc @@ -30,19 +30,24 @@ #include <string> -#include "cpu/checker/cpu.hh" +#include "cpu/checker/cpu_impl.hh" #include "cpu/inst_seq.hh" #include "cpu/ozone/dyn_inst.hh" #include "cpu/ozone/ozone_impl.hh" -#include "mem/base_mem.hh" #include "sim/builder.hh" #include "sim/process.hh" #include "sim/sim_object.hh" +class MemObject; + +template +class Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >; + /** * Specific non-templated derived class used for SimObject configuration. */ -class OzoneChecker : public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > > +class OzoneChecker : + public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > > { public: OzoneChecker(Params *p) @@ -64,7 +69,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - SimObjectParam<FunctionalMemory *> mem; SimObjectParam<System *> system; Param<int> cpu_id; Param<Tick> profile; @@ -72,11 +76,10 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) SimObjectParam<Process *> workload; #endif // FULL_SYSTEM Param<int> clock; - SimObjectParam<BaseMem *> icache; - SimObjectParam<BaseMem *> dcache; Param<bool> defer_registration; Param<bool> exitOnError; + Param<bool> warnOnlyOnLoadError; Param<bool> function_trace; Param<Tick> function_trace_start; @@ -96,7 +99,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker) #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(mem, "memory"), INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), @@ -105,11 +107,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker) #endif // FULL_SYSTEM INIT_PARAM(clock, "clock speed"), - INIT_PARAM(icache, "L1 instruction cache object"), - INIT_PARAM(dcache, "L1 data cache object"), INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(exitOnError, "exit on error"), + INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load " + "result errors", false), INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") @@ -126,6 +128,7 @@ CREATE_SIM_OBJECT(OzoneChecker) params->max_loads_any_thread = 0; params->max_loads_all_threads = 0; params->exitOnError = exitOnError; + params->warnOnlyOnLoadError = warnOnlyOnLoadError; params->deferRegistration = defer_registration; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; @@ -137,13 +140,10 @@ CREATE_SIM_OBJECT(OzoneChecker) temp = max_insts_all_threads; temp = max_loads_any_thread; temp = max_loads_all_threads; - BaseMem *cache = icache; - cache = dcache; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->mem = mem; params->system = system; params->cpu_id = cpu_id; params->profile = profile; diff --git a/src/cpu/ozone/cpu.cc b/src/cpu/ozone/cpu.cc index 303c78eea..eb6ac37bd 100644 --- a/src/cpu/ozone/cpu.cc +++ b/src/cpu/ozone/cpu.cc @@ -31,7 +31,7 @@ #include "cpu/ozone/cpu_impl.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" -template class OzoneCPU<SimpleImpl>; +//template class OzoneCPU<SimpleImpl>; template class OzoneCPU<OzoneImpl>; diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index e9550c39b..e411c12bd 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -43,6 +43,7 @@ #include "cpu/ozone/thread_state.hh" #include "cpu/pc_event.hh" #include "cpu/static_inst.hh" +#include "mem/page_table.hh" #include "sim/eventq.hh" // forward declarations @@ -54,7 +55,6 @@ class AlphaDTB; class PhysicalMemory; class MemoryController; -class Sampler; class RemoteGDB; class GDBListener; @@ -70,6 +70,7 @@ class Process; class Checkpoint; class EndQuiesceEvent; +class MemObject; class Request; namespace Trace { @@ -111,7 +112,7 @@ class OzoneCPU : public BaseCPU void setCpuId(int id); - int readCpuId() { return thread->cpuId; } + int readCpuId() { return thread->readCpuId(); } #if FULL_SYSTEM System *getSystemPtr() { return cpu->system; } @@ -122,22 +123,22 @@ class OzoneCPU : public BaseCPU AlphaDTB * getDTBPtr() { return cpu->dtb; } - Kernel::Statistics *getKernelStats() { return thread->kernelStats; } + Kernel::Statistics *getKernelStats() + { return thread->getKernelStats(); } FunctionalPort *getPhysPort() { return thread->getPhysPort(); } VirtualPort *getVirtPort(ThreadContext *tc = NULL) { return thread->getVirtPort(tc); } - void delVirtPort(VirtualPort *vp) - { thread->delVirtPort(vp); } + void delVirtPort(VirtualPort *vp); #else - TranslatingPort *getMemPort() { return thread->port; } + TranslatingPort *getMemPort() { return thread->getMemPort(); } - Process *getProcessPtr() { return thread->process; } + Process *getProcessPtr() { return thread->getProcessPtr(); } #endif - Status status() const { return thread->_status; } + Status status() const { return thread->status(); } void setStatus(Status new_status); @@ -149,7 +150,7 @@ class OzoneCPU : public BaseCPU void suspend(); /// Set the status to Unallocated. - void deallocate(); + void deallocate(int delay = 0); /// Set the status to Halted. void halt(); @@ -212,12 +213,11 @@ class OzoneCPU : public BaseCPU uint64_t readNextNPC() { - panic("Alpha has no NextNPC!"); return 0; } void setNextNPC(uint64_t val) - { panic("Alpha has no NextNPC!"); } + { } public: // ISA stuff: @@ -250,7 +250,7 @@ class OzoneCPU : public BaseCPU { thread->renameTable[TheISA::ArgumentReg0 + i]->setIntResult(i); } void setSyscallReturn(SyscallReturn return_value) - { cpu->setSyscallReturn(return_value, thread->tid); } + { cpu->setSyscallReturn(return_value, thread->readTid()); } Counter readFuncExeInst() { return thread->funcExeInst; } @@ -355,12 +355,10 @@ class OzoneCPU : public BaseCPU int cpuId; - void switchOut(Sampler *sampler); + void switchOut(); void signalSwitched(); void takeOverFrom(BaseCPU *oldCPU); - Sampler *sampler; - int switchCount; #if FULL_SYSTEM @@ -374,6 +372,10 @@ class OzoneCPU : public BaseCPU PhysicalMemory *physmem; #endif + virtual Port *getPort(const std::string &name, int idx); + + MemObject *mem; + FrontEnd *frontEnd; BackEnd *backEnd; @@ -383,7 +385,7 @@ class OzoneCPU : public BaseCPU virtual void activateContext(int thread_num, int delay); virtual void suspendContext(int thread_num); - virtual void deallocateContext(int thread_num); + virtual void deallocateContext(int thread_num, int delay); virtual void haltContext(int thread_num); // statistics @@ -415,50 +417,41 @@ class OzoneCPU : public BaseCPU #if FULL_SYSTEM - bool validInstAddr(Addr addr) { return true; } - bool validDataAddr(Addr addr) { return true; } - - Fault translateInstReq(Request *req) + /** Translates instruction requestion. */ + Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return itb->translate(req, tc); + return itb->translate(req, thread->getTC()); } - Fault translateDataReadReq(Request *req) + /** Translates data read request. */ + Fault translateDataReadReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return dtb->translate(req, tc, false); + return dtb->translate(req, thread->getTC(), false); } - Fault translateDataWriteReq(Request *req) + /** Translates data write request. */ + Fault translateDataWriteReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return dtb->translate(req, tc, true); + return dtb->translate(req, thread->getTC(), true); } #else - bool validInstAddr(Addr addr) - { return true; } - - bool validDataAddr(Addr addr) - { return true; } - - int getInstAsid() { return thread.asid; } - int getDataAsid() { return thread.asid; } - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(Request *req) + Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return thread.translateInstReq(req); + return thread->getProcessPtr()->pTable->translate(req); } /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(Request *req) + Fault translateDataReadReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return thread.translateDataReadReq(req); + return thread->getProcessPtr()->pTable->translate(req); } /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(Request *req) + Fault translateDataWriteReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return thread.translateDataWriteReq(req); + return thread->getProcessPtr()->pTable->translate(req); } #endif @@ -599,14 +592,14 @@ class OzoneCPU : public BaseCPU #if FULL_SYSTEM Fault hwrei(); - int readIntrFlag() { return thread.regs.intrflag; } - void setIntrFlag(int val) { thread.regs.intrflag = val; } + int readIntrFlag() { return thread.intrflag; } + void setIntrFlag(int val) { thread.intrflag = val; } bool inPalMode() { return AlphaISA::PcPAL(thread.PC); } bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); } bool simPalCheck(int palFunc); void processInterrupts(); #else - void syscall(); + void syscall(uint64_t &callnum); void setSyscallReturn(SyscallReturn return_value, int tid); #endif diff --git a/src/cpu/ozone/cpu_builder.cc b/src/cpu/ozone/cpu_builder.cc index 18f257a25..e239b7a94 100644 --- a/src/cpu/ozone/cpu_builder.cc +++ b/src/cpu/ozone/cpu_builder.cc @@ -34,9 +34,7 @@ #include "cpu/inst_seq.hh" #include "cpu/ozone/cpu.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" #include "cpu/ozone/simple_params.hh" -#include "mem/cache/base_cache.hh" #include "sim/builder.hh" #include "sim/process.hh" #include "sim/sim_object.hh" @@ -49,14 +47,6 @@ class DerivOzoneCPU : public OzoneCPU<OzoneImpl> { } }; -class SimpleOzoneCPU : public OzoneCPU<SimpleImpl> -{ - public: - SimpleOzoneCPU(SimpleParams *p) - : OzoneCPU<SimpleImpl>(p) - { } -}; - //////////////////////////////////////////////////////////////////////// // @@ -78,7 +68,7 @@ SimObjectVectorParam<Process *> workload; //SimObjectParam<PageTable *> page_table; #endif // FULL_SYSTEM -SimObjectParam<FunctionalMemory *> mem; +SimObjectParam<MemObject *> mem; SimObjectParam<BaseCPU *> checker; @@ -87,8 +77,8 @@ Param<Counter> max_insts_all_threads; Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; -SimObjectParam<BaseCache *> icache; -SimObjectParam<BaseCache *> dcache; +//SimObjectParam<BaseCache *> icache; +//SimObjectParam<BaseCache *> dcache; Param<unsigned> cachePorts; Param<unsigned> width; @@ -215,8 +205,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) "count", 0), - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), +// INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), +// INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), INIT_PARAM_DFLT(width, "Width", 1), @@ -361,8 +351,8 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) // // Caches // - params->icacheInterface = icache ? icache->getInterface() : NULL; - params->dcacheInterface = dcache ? dcache->getInterface() : NULL; +// params->icacheInterface = icache ? icache->getInterface() : NULL; +// params->dcacheInterface = dcache ? dcache->getInterface() : NULL; params->cachePorts = cachePorts; params->width = width; @@ -459,405 +449,3 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) } REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU) - - - -//////////////////////////////////////////////////////////////////////// -// -// OzoneCPU Simulation Object -// - -BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - - Param<int> clock; - Param<int> numThreads; - -#if FULL_SYSTEM -SimObjectParam<System *> system; -Param<int> cpu_id; -SimObjectParam<AlphaITB *> itb; -SimObjectParam<AlphaDTB *> dtb; -#else -SimObjectVectorParam<Process *> workload; -//SimObjectParam<PageTable *> page_table; -#endif // FULL_SYSTEM - -SimObjectParam<FunctionalMemory *> mem; - -SimObjectParam<BaseCPU *> checker; - -Param<Counter> max_insts_any_thread; -Param<Counter> max_insts_all_threads; -Param<Counter> max_loads_any_thread; -Param<Counter> max_loads_all_threads; - -SimObjectParam<BaseCache *> icache; -SimObjectParam<BaseCache *> dcache; - -Param<unsigned> cachePorts; -Param<unsigned> width; -Param<unsigned> frontEndWidth; -Param<unsigned> backEndWidth; -Param<unsigned> backEndSquashLatency; -Param<unsigned> backEndLatency; -Param<unsigned> maxInstBufferSize; -Param<unsigned> numPhysicalRegs; - -Param<unsigned> decodeToFetchDelay; -Param<unsigned> renameToFetchDelay; -Param<unsigned> iewToFetchDelay; -Param<unsigned> commitToFetchDelay; -Param<unsigned> fetchWidth; - -Param<unsigned> renameToDecodeDelay; -Param<unsigned> iewToDecodeDelay; -Param<unsigned> commitToDecodeDelay; -Param<unsigned> fetchToDecodeDelay; -Param<unsigned> decodeWidth; - -Param<unsigned> iewToRenameDelay; -Param<unsigned> commitToRenameDelay; -Param<unsigned> decodeToRenameDelay; -Param<unsigned> renameWidth; - -Param<unsigned> commitToIEWDelay; -Param<unsigned> renameToIEWDelay; -Param<unsigned> issueToExecuteDelay; -Param<unsigned> issueWidth; -Param<unsigned> executeWidth; -Param<unsigned> executeIntWidth; -Param<unsigned> executeFloatWidth; -Param<unsigned> executeBranchWidth; -Param<unsigned> executeMemoryWidth; - -Param<unsigned> iewToCommitDelay; -Param<unsigned> renameToROBDelay; -Param<unsigned> commitWidth; -Param<unsigned> squashWidth; - -Param<std::string> predType; -Param<unsigned> localPredictorSize; -Param<unsigned> localCtrBits; -Param<unsigned> localHistoryTableSize; -Param<unsigned> localHistoryBits; -Param<unsigned> globalPredictorSize; -Param<unsigned> globalCtrBits; -Param<unsigned> globalHistoryBits; -Param<unsigned> choicePredictorSize; -Param<unsigned> choiceCtrBits; - -Param<unsigned> BTBEntries; -Param<unsigned> BTBTagSize; - -Param<unsigned> RASSize; - -Param<unsigned> LQEntries; -Param<unsigned> SQEntries; -Param<unsigned> LFSTSize; -Param<unsigned> SSITSize; - -Param<unsigned> numPhysIntRegs; -Param<unsigned> numPhysFloatRegs; -Param<unsigned> numIQEntries; -Param<unsigned> numROBEntries; - -Param<bool> decoupledFrontEnd; -Param<int> dispatchWidth; -Param<int> wbWidth; - -Param<unsigned> smtNumFetchingThreads; -Param<std::string> smtFetchPolicy; -Param<std::string> smtLSQPolicy; -Param<unsigned> smtLSQThreshold; -Param<std::string> smtIQPolicy; -Param<unsigned> smtIQThreshold; -Param<std::string> smtROBPolicy; -Param<unsigned> smtROBThreshold; -Param<std::string> smtCommitPolicy; - -Param<unsigned> instShiftAmt; - -Param<bool> defer_registration; - -Param<bool> function_trace; -Param<Tick> function_trace_start; - -END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - -BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - - INIT_PARAM(clock, "clock speed"), - INIT_PARAM(numThreads, "number of HW thread contexts"), - -#if FULL_SYSTEM - INIT_PARAM(system, "System object"), - INIT_PARAM(cpu_id, "processor ID"), - INIT_PARAM(itb, "Instruction translation buffer"), - INIT_PARAM(dtb, "Data translation buffer"), -#else - INIT_PARAM(workload, "Processes to run"), -// INIT_PARAM(page_table, "Page table"), -#endif // FULL_SYSTEM - - INIT_PARAM_DFLT(mem, "Memory", NULL), - - INIT_PARAM_DFLT(checker, "Checker CPU", NULL), - - INIT_PARAM_DFLT(max_insts_any_thread, - "Terminate when any thread reaches this inst count", - 0), - INIT_PARAM_DFLT(max_insts_all_threads, - "Terminate when all threads have reached" - "this inst count", - 0), - INIT_PARAM_DFLT(max_loads_any_thread, - "Terminate when any thread reaches this load count", - 0), - INIT_PARAM_DFLT(max_loads_all_threads, - "Terminate when all threads have reached this load" - "count", - 0), - - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), - - INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), - INIT_PARAM_DFLT(width, "Width", 1), - INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), - INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), - INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), - INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), - INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), - INIT_PARAM(numPhysicalRegs, "Number of physical registers"), - - INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), - INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), - INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" - "delay"), - INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), - INIT_PARAM(fetchWidth, "Fetch width"), - INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), - INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" - "delay"), - INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), - INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), - INIT_PARAM(decodeWidth, "Decode width"), - - INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" - "delay"), - INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), - INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), - INIT_PARAM(renameWidth, "Rename width"), - - INIT_PARAM(commitToIEWDelay, "Commit to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(renameToIEWDelay, "Rename to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" - "to the IEW stage)"), - INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), - - INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " - "delay"), - INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), - INIT_PARAM(commitWidth, "Commit width"), - INIT_PARAM(squashWidth, "Squash width"), - - INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), - INIT_PARAM(localPredictorSize, "Size of local predictor"), - INIT_PARAM(localCtrBits, "Bits per counter"), - INIT_PARAM(localHistoryTableSize, "Size of local history table"), - INIT_PARAM(localHistoryBits, "Bits for the local history"), - INIT_PARAM(globalPredictorSize, "Size of global predictor"), - INIT_PARAM(globalCtrBits, "Bits per counter"), - INIT_PARAM(globalHistoryBits, "Bits of history"), - INIT_PARAM(choicePredictorSize, "Size of choice predictor"), - INIT_PARAM(choiceCtrBits, "Bits of choice counters"), - - INIT_PARAM(BTBEntries, "Number of BTB entries"), - INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), - - INIT_PARAM(RASSize, "RAS size"), - - INIT_PARAM(LQEntries, "Number of load queue entries"), - INIT_PARAM(SQEntries, "Number of store queue entries"), - INIT_PARAM(LFSTSize, "Last fetched store table size"), - INIT_PARAM(SSITSize, "Store set ID table size"), - - INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), - INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " - "registers"), - INIT_PARAM(numIQEntries, "Number of instruction queue entries"), - INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), - - INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), - INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), - INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), - - INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), - INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), - INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), - INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), - INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), - INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), - INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), - INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), - INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), - - INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), - INIT_PARAM(defer_registration, "defer system registration (for sampling)"), - - INIT_PARAM(function_trace, "Enable function trace"), - INIT_PARAM(function_trace_start, "Cycle to start function trace") - -END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - -CREATE_SIM_OBJECT(SimpleOzoneCPU) -{ - SimpleOzoneCPU *cpu; - -#if FULL_SYSTEM - // Full-system only supports a single thread for the moment. - int actual_num_threads = 1; -#else - // In non-full-system mode, we infer the number of threads from - // the workload if it's not explicitly specified. - int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); - - if (workload.size() == 0) { - fatal("Must specify at least one workload!"); - } - -#endif - - SimpleParams *params = new SimpleParams; - - params->clock = clock; - - params->name = getInstanceName(); - params->numberOfThreads = actual_num_threads; - -#if FULL_SYSTEM - params->system = system; - params->cpu_id = cpu_id; - params->itb = itb; - params->dtb = dtb; -#else - params->workload = workload; -// params->pTable = page_table; -#endif // FULL_SYSTEM - - params->mem = mem; - params->checker = checker; - params->max_insts_any_thread = max_insts_any_thread; - params->max_insts_all_threads = max_insts_all_threads; - params->max_loads_any_thread = max_loads_any_thread; - params->max_loads_all_threads = max_loads_all_threads; - - // - // Caches - // - params->icacheInterface = icache ? icache->getInterface() : NULL; - params->dcacheInterface = dcache ? dcache->getInterface() : NULL; - params->cachePorts = cachePorts; - - params->width = width; - params->frontEndWidth = frontEndWidth; - params->backEndWidth = backEndWidth; - params->backEndSquashLatency = backEndSquashLatency; - params->backEndLatency = backEndLatency; - params->maxInstBufferSize = maxInstBufferSize; - params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; - - params->decodeToFetchDelay = decodeToFetchDelay; - params->renameToFetchDelay = renameToFetchDelay; - params->iewToFetchDelay = iewToFetchDelay; - params->commitToFetchDelay = commitToFetchDelay; - params->fetchWidth = fetchWidth; - - params->renameToDecodeDelay = renameToDecodeDelay; - params->iewToDecodeDelay = iewToDecodeDelay; - params->commitToDecodeDelay = commitToDecodeDelay; - params->fetchToDecodeDelay = fetchToDecodeDelay; - params->decodeWidth = decodeWidth; - - params->iewToRenameDelay = iewToRenameDelay; - params->commitToRenameDelay = commitToRenameDelay; - params->decodeToRenameDelay = decodeToRenameDelay; - params->renameWidth = renameWidth; - - params->commitToIEWDelay = commitToIEWDelay; - params->renameToIEWDelay = renameToIEWDelay; - params->issueToExecuteDelay = issueToExecuteDelay; - params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; - - params->iewToCommitDelay = iewToCommitDelay; - params->renameToROBDelay = renameToROBDelay; - params->commitWidth = commitWidth; - params->squashWidth = squashWidth; - - params->predType = predType; - params->localPredictorSize = localPredictorSize; - params->localCtrBits = localCtrBits; - params->localHistoryTableSize = localHistoryTableSize; - params->localHistoryBits = localHistoryBits; - params->globalPredictorSize = globalPredictorSize; - params->globalCtrBits = globalCtrBits; - params->globalHistoryBits = globalHistoryBits; - params->choicePredictorSize = choicePredictorSize; - params->choiceCtrBits = choiceCtrBits; - - params->BTBEntries = BTBEntries; - params->BTBTagSize = BTBTagSize; - - params->RASSize = RASSize; - - params->LQEntries = LQEntries; - params->SQEntries = SQEntries; - - params->SSITSize = SSITSize; - params->LFSTSize = LFSTSize; - - params->numPhysIntRegs = numPhysIntRegs; - params->numPhysFloatRegs = numPhysFloatRegs; - params->numIQEntries = numIQEntries; - params->numROBEntries = numROBEntries; - - params->decoupledFrontEnd = decoupledFrontEnd; - params->dispatchWidth = dispatchWidth; - params->wbWidth = wbWidth; - - params->smtNumFetchingThreads = smtNumFetchingThreads; - params->smtFetchPolicy = smtFetchPolicy; - params->smtIQPolicy = smtIQPolicy; - params->smtLSQPolicy = smtLSQPolicy; - params->smtLSQThreshold = smtLSQThreshold; - params->smtROBPolicy = smtROBPolicy; - params->smtROBThreshold = smtROBThreshold; - params->smtCommitPolicy = smtCommitPolicy; - - params->instShiftAmt = 2; - - params->deferRegistration = defer_registration; - - params->functionTrace = function_trace; - params->functionTraceStart = function_trace_start; - - cpu = new SimpleOzoneCPU(params); - - return cpu; -} - -REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU) - diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index 76e2318aa..f58b81990 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -29,21 +29,17 @@ * Nathan Binkert */ -//#include <cstdio> -//#include <cstdlib> +#include "config/full_system.hh" +#include "config/use_checker.hh" #include "arch/isa_traits.hh" // For MachInst #include "base/trace.hh" -#include "config/full_system.hh" #include "cpu/base.hh" -#include "cpu/checker/thread_context.hh" #include "cpu/thread_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/cpu.hh" #include "cpu/quiesce_event.hh" #include "cpu/static_inst.hh" -//#include "mem/base_mem.hh" -#include "mem/mem_interface.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" @@ -56,17 +52,18 @@ //#include "base/remote_gdb.hh" #include "cpu/profile.hh" #include "kern/kernel_stats.hh" -#include "mem/functional/memory_control.hh" -#include "mem/functional/physical.hh" #include "sim/faults.hh" #include "sim/sim_events.hh" #include "sim/sim_exit.hh" #include "sim/system.hh" #else // !FULL_SYSTEM -#include "mem/functional/functional.hh" #include "sim/process.hh" #endif // FULL_SYSTEM +#if USE_CHECKER +#include "cpu/checker/thread_context.hh" +#endif + using namespace TheISA; template <class Impl> @@ -101,13 +98,12 @@ OzoneCPU<Impl>::TickEvent::description() template <class Impl> OzoneCPU<Impl>::OzoneCPU(Params *p) #if FULL_SYSTEM - : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), - mem(p->mem), + : BaseCPU(p), thread(this, 0), tickEvent(this, p->width), #else - : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width), - mem(p->workload[0]->getMemory()), + : BaseCPU(p), thread(this, 0, p->workload[0], 0, p->mem), + tickEvent(this, p->width), #endif - comm(5, 5) + mem(p->mem), comm(5, 5) { frontEnd = new FrontEnd(p); backEnd = new BackEnd(p); @@ -115,6 +111,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p) _status = Idle; if (p->checker) { +#if USE_CHECKER BaseCPU *temp_checker = p->checker; checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker); checker->setMemory(mem); @@ -123,7 +120,10 @@ OzoneCPU<Impl>::OzoneCPU(Params *p) #endif checkerTC = new CheckerThreadContext<OzoneTC>(&ozoneTC, checker); thread.tc = checkerTC; - tc = checkerXC; + tc = checkerTC; +#else + panic("Checker enabled but not compiled in!"); +#endif } else { checker = NULL; thread.tc = &ozoneTC; @@ -139,15 +139,13 @@ OzoneCPU<Impl>::OzoneCPU(Params *p) #if FULL_SYSTEM /***** All thread state stuff *****/ thread.cpu = this; - thread.tid = 0; - thread.mem = p->mem; + thread.setTid(0); thread.quiesceEvent = new EndQuiesceEvent(tc); system = p->system; itb = p->itb; dtb = p->dtb; - memctrl = p->system->memctrl; physmem = p->system->physmem; if (p->profile) { @@ -166,9 +164,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p) thread.profilePC = 3; #else thread.cpu = this; - thread.tid = 0; - thread.process = p->workload[0]; - thread.asid = 0; #endif // !FULL_SYSTEM numInst = 0; @@ -206,7 +201,35 @@ OzoneCPU<Impl>::OzoneCPU(Params *p) backEnd->renameTable.copyFrom(thread.renameTable); #if !FULL_SYSTEM -// pTable = p->pTable; + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + TranslatingPort *trans_port; + trans_port = new TranslatingPort(csprintf("%s-%d-funcport", + name(), 0), + p->workload[0]->pTable, + false); + mem_port = p->mem->getPort("functional"); + mem_port->setPeer(trans_port); + trans_port->setPeer(mem_port); + thread.setMemPort(trans_port); +#else + Port *mem_port; + FunctionalPort *phys_port; + VirtualPort *virt_port; + phys_port = new FunctionalPort(csprintf("%s-%d-funcport", + name(), 0)); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(phys_port); + phys_port->setPeer(mem_port); + + virt_port = new VirtualPort(csprintf("%s-%d-vport", + name(), 0)); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(virt_port); + virt_port->setPeer(mem_port); + + thread.setPhysPort(phys_port); + thread.setVirtPort(virt_port); #endif lockFlag = 0; @@ -221,9 +244,8 @@ OzoneCPU<Impl>::~OzoneCPU() template <class Impl> void -OzoneCPU<Impl>::switchOut(Sampler *_sampler) +OzoneCPU<Impl>::switchOut() { - sampler = _sampler; switchCount = 0; // Front end needs state from back end, so switch out the back end first. backEnd->switchOut(); @@ -237,12 +259,14 @@ OzoneCPU<Impl>::signalSwitched() if (++switchCount == 2) { backEnd->doSwitchOut(); frontEnd->doSwitchOut(); +#if USE_CHECKER if (checker) - checker->switchOut(sampler); + checker->switchOut(); +#endif + _status = SwitchedOut; if (tickEvent.scheduled()) tickEvent.squash(); - sampler->signalSwitched(); } assert(switchCount <= 2); } @@ -291,7 +315,7 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay) notIdleFraction++; scheduleTickEvent(delay); _status = Running; - thread._status = ThreadContext::Active; + thread.setStatus(ThreadContext::Active); frontEnd->wakeFromQuiesce(); } @@ -311,7 +335,7 @@ OzoneCPU<Impl>::suspendContext(int thread_num) template <class Impl> void -OzoneCPU<Impl>::deallocateContext(int thread_num) +OzoneCPU<Impl>::deallocateContext(int thread_num, int delay) { // for now, these are equivalent suspendContext(thread_num); @@ -395,6 +419,18 @@ OzoneCPU<Impl>::init() } template <class Impl> +Port * +OzoneCPU<Impl>::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return backEnd->getDcachePort(); + else if (if_name == "icache_port") + return frontEnd->getIcachePort(); + else + panic("No Such Port\n"); +} + +template <class Impl> void OzoneCPU<Impl>::serialize(std::ostream &os) { @@ -510,7 +546,7 @@ template <class Impl> Addr OzoneCPU<Impl>::dbg_vtophys(Addr addr) { - return vtophys(tcProxy, addr); + return vtophys(tc, addr); } #endif // FULL_SYSTEM @@ -526,7 +562,7 @@ OzoneCPU<Impl>::post_interrupt(int int_num, int index) // thread.activate(); // Hack for now. Otherwise might have to go through the tc, or // I need to figure out what's the right thing to call. - activateContext(thread.tid, 1); + activateContext(thread.readTid(), 1); } } #endif // FULL_SYSTEM @@ -565,7 +601,7 @@ OzoneCPU<Impl>::squashFromTC() #if !FULL_SYSTEM template <class Impl> void -OzoneCPU<Impl>::syscall() +OzoneCPU<Impl>::syscall(uint64_t &callnum) { // Not sure this copy is needed, depending on how the TC proxy is made. thread.renameTable.copyFrom(backEnd->renameTable); @@ -576,7 +612,7 @@ OzoneCPU<Impl>::syscall() DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst); - thread.process->syscall(yc); + thread.process->syscall(callnum, tc); thread.funcExeInst--; @@ -690,9 +726,9 @@ OzoneCPU<Impl>::simPalCheck(int palFunc) switch (palFunc) { case PAL::halt: - haltContext(thread.tid); + haltContext(thread.readTid()); if (--System::numSystemsRunning == 0) - new SimExitEvent("all cpus halted"); + exitSimLoop("all cpus halted"); break; case PAL::bpt: @@ -718,21 +754,31 @@ void OzoneCPU<Impl>::OzoneTC::setCpuId(int id) { cpu->cpuId = id; - thread->cpuId = id; + thread->setCpuId(id); } +#if FULL_SYSTEM +template <class Impl> +void +OzoneCPU<Impl>::OzoneTC::delVirtPort(VirtualPort *vp) +{ + delete vp->getPeer(); + delete vp; +} +#endif + template <class Impl> void OzoneCPU<Impl>::OzoneTC::setStatus(Status new_status) { - thread->_status = new_status; + thread->setStatus(new_status); } template <class Impl> void OzoneCPU<Impl>::OzoneTC::activate(int delay) { - cpu->activateContext(thread->tid, delay); + cpu->activateContext(thread->readTid(), delay); } /// Set the status to Suspended. @@ -740,15 +786,15 @@ template <class Impl> void OzoneCPU<Impl>::OzoneTC::suspend() { - cpu->suspendContext(thread->tid); + cpu->suspendContext(thread->readTid()); } /// Set the status to Unallocated. template <class Impl> void -OzoneCPU<Impl>::OzoneTC::deallocate() +OzoneCPU<Impl>::OzoneTC::deallocate(int delay) { - cpu->deallocateContext(thread->tid); + cpu->deallocateContext(thread->readTid(), delay); } /// Set the status to Halted. @@ -756,7 +802,7 @@ template <class Impl> void OzoneCPU<Impl>::OzoneTC::halt() { - cpu->haltContext(thread->tid); + cpu->haltContext(thread->readTid()); } #if FULL_SYSTEM @@ -771,7 +817,6 @@ void OzoneCPU<Impl>::OzoneTC::takeOverFrom(ThreadContext *old_context) { // some things should already be set up - assert(getMemPtr() == old_context->getMemPtr()); #if FULL_SYSTEM assert(getSystemPtr() == old_context->getSystemPtr()); #else @@ -867,7 +912,7 @@ template <class Impl> int OzoneCPU<Impl>::OzoneTC::getThreadNum() { - return thread->tid; + return thread->readTid(); } // Also somewhat obnoxious. Really only used for the TLB fault. @@ -875,7 +920,7 @@ template <class Impl> TheISA::MachInst OzoneCPU<Impl>::OzoneTC::getInst() { - return thread->inst; + return thread->getInst(); } template <class Impl> @@ -894,7 +939,7 @@ OzoneCPU<Impl>::OzoneTC::copyArchRegs(ThreadContext *tc) } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) { int fp_idx = i - TheISA::FP_Base_DepTag; thread->renameTable[i]->setDoubleResult( - tc->readFloatRegDouble(fp_idx)); + tc->readFloatReg(fp_idx, 64)); } } @@ -904,7 +949,7 @@ OzoneCPU<Impl>::OzoneTC::copyArchRegs(ThreadContext *tc) // Need to copy the TC values into the current rename table, // copy the misc regs. - thread->regs.miscRegs.copyMiscRegs(tc); + copyMiscRegs(tc, this); } template <class Impl> @@ -922,7 +967,7 @@ OzoneCPU<Impl>::OzoneTC::readIntReg(int reg_idx) } template <class Impl> -float +TheISA::FloatReg OzoneCPU<Impl>::OzoneTC::readFloatReg(int reg_idx, int width) { int idx = reg_idx + TheISA::FP_Base_DepTag; @@ -1049,15 +1094,15 @@ template <class Impl> TheISA::MiscReg OzoneCPU<Impl>::OzoneTC::readMiscReg(int misc_reg) { - return thread->regs.miscRegs.readReg(misc_reg); + return thread->miscRegFile.readReg(misc_reg); } template <class Impl> TheISA::MiscReg OzoneCPU<Impl>::OzoneTC::readMiscRegWithEffect(int misc_reg, Fault &fault) { - return thread->regs.miscRegs.readRegWithEffect(misc_reg, - fault, this); + return thread->miscRegFile.readRegWithEffect(misc_reg, + fault, this); } template <class Impl> @@ -1065,7 +1110,7 @@ Fault OzoneCPU<Impl>::OzoneTC::setMiscReg(int misc_reg, const MiscReg &val) { // Needs to setup a squash event unless we're in syscall mode - Fault ret_fault = thread->regs.miscRegs.setReg(misc_reg, val); + Fault ret_fault = thread->miscRegFile.setReg(misc_reg, val); if (!thread->inSyscall) { cpu->squashFromTC(); @@ -1079,8 +1124,8 @@ Fault OzoneCPU<Impl>::OzoneTC::setMiscRegWithEffect(int misc_reg, const MiscReg &val) { // Needs to setup a squash event unless we're in syscall mode - Fault ret_fault = thread->regs.miscRegs.setRegWithEffect(misc_reg, val, - this); + Fault ret_fault = thread->miscRegFile.setRegWithEffect(misc_reg, val, + this); if (!thread->inSyscall) { cpu->squashFromTC(); diff --git a/src/cpu/ozone/dyn_inst.hh b/src/cpu/ozone/dyn_inst.hh index 0bb50bd69..67691d416 100644 --- a/src/cpu/ozone/dyn_inst.hh +++ b/src/cpu/ozone/dyn_inst.hh @@ -34,9 +34,8 @@ #include "arch/isa_traits.hh" #include "config/full_system.hh" #include "cpu/base_dyn_inst.hh" -#include "cpu/ozone/cpu.hh" // MUST include this #include "cpu/inst_seq.hh" -//#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this +#include "cpu/ozone/cpu.hh" // MUST include this #include "cpu/ozone/ozone_impl.hh" #include <list> @@ -47,15 +46,17 @@ class OzoneDynInst : public BaseDynInst<Impl> { public: // Typedefs - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::OzoneCPU OzoneCPU; - typedef typename FullCPU::ImplState ImplState; + typedef typename OzoneCPU::ImplState ImplState; // Typedef for DynInstPtr. This is really just a RefCountingPtr<OoODynInst>. typedef typename Impl::DynInstPtr DynInstPtr; typedef TheISA::ExtMachInst ExtMachInst; typedef TheISA::MachInst MachInst; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscReg MiscReg; typedef typename std::list<DynInstPtr>::iterator ListIt; @@ -67,10 +68,10 @@ class OzoneDynInst : public BaseDynInst<Impl> MaxInstDestRegs = TheISA::MaxInstDestRegs }; - OzoneDynInst(FullCPU *cpu); + OzoneDynInst(OzoneCPU *cpu); OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu); + InstSeqNum seq_num, OzoneCPU *cpu); OzoneDynInst(StaticInstPtr inst); @@ -131,7 +132,7 @@ class OzoneDynInst : public BaseDynInst<Impl> Fault initiateAcc(); - Fault completeAcc(); + Fault completeAcc(Packet *pkt); // The register accessor methods provide the index of the // instruction's operand (e.g., 0 or 1), not the architectural @@ -149,17 +150,30 @@ class OzoneDynInst : public BaseDynInst<Impl> return srcInsts[idx]->readIntResult(); } - float readFloatRegSingle(const StaticInst *si, int idx) + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + switch(width) { + case 32: + return srcInsts[idx]->readFloatResult(); + case 64: + return srcInsts[idx]->readDoubleResult(); + default: + panic("Width not supported"); + return 0; + } + } + + FloatReg readFloatReg(const StaticInst *si, int idx) { return srcInsts[idx]->readFloatResult(); } - double readFloatRegDouble(const StaticInst *si, int idx) + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) { - return srcInsts[idx]->readDoubleResult(); + return srcInsts[idx]->readIntResult(); } - uint64_t readFloatRegInt(const StaticInst *si, int idx) + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) { return srcInsts[idx]->readIntResult(); } @@ -172,19 +186,25 @@ class OzoneDynInst : public BaseDynInst<Impl> BaseDynInst<Impl>::setIntReg(si, idx, val); } - void setFloatRegSingle(const StaticInst *si, int idx, float val) + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + BaseDynInst<Impl>::setFloatReg(si, idx, val, width); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) { - BaseDynInst<Impl>::setFloatRegSingle(si, idx, val); + BaseDynInst<Impl>::setFloatReg(si, idx, val); } - void setFloatRegDouble(const StaticInst *si, int idx, double val) + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) { - BaseDynInst<Impl>::setFloatRegDouble(si, idx, val); + BaseDynInst<Impl>::setFloatRegBits(si, idx, val); } - void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) { - BaseDynInst<Impl>::setFloatRegInt(si, idx, val); + BaseDynInst<Impl>::setFloatRegBits(si, idx, val); } void setIntResult(uint64_t result) { this->instResult.integer = result; } @@ -223,7 +243,7 @@ class OzoneDynInst : public BaseDynInst<Impl> void trap(Fault fault); bool simPalCheck(int palFunc); #else - void syscall(); + void syscall(uint64_t &callnum); #endif ListIt iqIt; diff --git a/src/cpu/ozone/dyn_inst_impl.hh b/src/cpu/ozone/dyn_inst_impl.hh index 4149bf144..bad902c2a 100644 --- a/src/cpu/ozone/dyn_inst_impl.hh +++ b/src/cpu/ozone/dyn_inst_impl.hh @@ -37,7 +37,7 @@ using namespace TheISA; template <class Impl> -OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu) +OzoneDynInst<Impl>::OzoneDynInst(OzoneCPU *cpu) : BaseDynInst<Impl>(0, 0, 0, 0, cpu) { this->setResultReady(); @@ -47,7 +47,7 @@ OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu) template <class Impl> OzoneDynInst<Impl>::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu) + InstSeqNum seq_num, OzoneCPU *cpu) : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu) { initInstPtrs(); @@ -111,19 +111,9 @@ OzoneDynInst<Impl>::initiateAcc() template <class Impl> Fault -OzoneDynInst<Impl>::completeAcc() +OzoneDynInst<Impl>::completeAcc(Packet *pkt) { - if (this->isLoad()) { - this->fault = this->staticInst->completeAcc(this->req->data, - this, - this->traceData); - } else if (this->isStore()) { - this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, - this, - this->traceData); - } else { - panic("Unknown type!"); - } + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); return this->fault; } @@ -298,7 +288,7 @@ template <class Impl> void OzoneDynInst<Impl>::trap(Fault fault) { - fault->invoke(this->thread->getXCProxy()); + fault->invoke(this->thread->getTC()); } template <class Impl> @@ -310,8 +300,8 @@ OzoneDynInst<Impl>::simPalCheck(int palFunc) #else template <class Impl> void -OzoneDynInst<Impl>::syscall() +OzoneDynInst<Impl>::syscall(uint64_t &callnum) { - this->cpu->syscall(); + this->cpu->syscall(callnum); } #endif diff --git a/src/cpu/ozone/front_end.cc b/src/cpu/ozone/front_end.cc index f0ea8eae1..cfd033564 100644 --- a/src/cpu/ozone/front_end.cc +++ b/src/cpu/ozone/front_end.cc @@ -30,7 +30,7 @@ #include "cpu/ozone/front_end_impl.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" template class FrontEnd<OzoneImpl>; -template class FrontEnd<SimpleImpl>; +//template class FrontEnd<SimpleImpl>; diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh index af190008c..3ed3c4d18 100644 --- a/src/cpu/ozone/front_end.hh +++ b/src/cpu/ozone/front_end.hh @@ -33,15 +33,17 @@ #include <deque> +#include "arch/utility.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/bpred_unit.hh" #include "cpu/ozone/rename_table.hh" +#include "mem/port.hh" #include "mem/request.hh" #include "sim/eventq.hh" #include "sim/stats.hh" class ThreadContext; -class MemInterface; +class MemObject; template <class> class OzoneThreadState; class PageTable; @@ -55,18 +57,55 @@ class FrontEnd typedef typename Impl::Params Params; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::CPUType CPUType; typedef typename Impl::BackEnd BackEnd; - typedef typename Impl::FullCPU::OzoneTC OzoneTC; - typedef typename Impl::FullCPU::CommStruct CommStruct; + typedef typename Impl::CPUType::OzoneTC OzoneTC; + typedef typename Impl::CPUType::CommStruct CommStruct; + + /** IcachePort class. Handles doing the communication with the + * cache/memory. + */ + class IcachePort : public Port + { + protected: + /** Pointer to FE. */ + FrontEnd<Impl> *fe; + + public: + /** Default constructor. */ + IcachePort(FrontEnd<Impl> *_fe) + : fe(_fe) + { } + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + /** Timing version of receive. Handles setting fetch to the + * proper status to start fetching. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of a failed fetch. */ + virtual void recvRetry(); + }; FrontEnd(Params *params); std::string name() const; - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } + void setCPU(CPUType *cpu_ptr); void setBackEnd(BackEnd *back_end_ptr) { backEnd = back_end_ptr; } @@ -80,6 +119,8 @@ class FrontEnd void regStats(); + Port *getIcachePort() { return &icachePort; } + void tick(); Fault fetchCacheLine(); void processInst(DynInstPtr &inst); @@ -104,6 +145,8 @@ class FrontEnd bool switchedOut; private: + void recvRetry(); + bool updateStatus(); void checkBE(); @@ -130,7 +173,7 @@ class FrontEnd { return cpu->globalSeqNum++; } public: - FullCPU *cpu; + CPUType *cpu; BackEnd *backEnd; @@ -141,8 +184,9 @@ class FrontEnd enum Status { Running, Idle, - IcacheMissStall, - IcacheMissComplete, + IcacheWaitResponse, + IcacheWaitRetry, + IcacheAccessComplete, SerializeBlocked, SerializeComplete, RenameBlocked, @@ -161,37 +205,9 @@ class FrontEnd BranchPred branchPred; - class IcachePort : public Port - { - protected: - FrontEnd *fe; - - public: - IcachePort(const std::string &_name, FrontEnd *_fe) - : Port(_name), fe(_fe) - { } - - protected: - virtual Tick recvAtomic(PacketPtr pkt); - - virtual void recvFunctional(PacketPtr pkt); - - virtual void recvStatusChange(Status status); - - virtual void getDeviceAddressRanges(AddrRangeList &resp, - AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - - virtual bool recvTiming(PacketPtr pkt); - - virtual void recvRetry(); - }; - IcachePort icachePort; -#if !FULL_SYSTEM - PageTable *pTable; -#endif + MemObject *mem; RequestPtr memReq; @@ -209,6 +225,11 @@ class FrontEnd bool cacheBlkValid; + bool cacheBlocked; + + /** The packet that is waiting to be retried. */ + PacketPtr retryPkt; + public: RenameTable<Impl> renameTable; diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index 467567c10..9da937320 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -28,21 +28,69 @@ * Authors: Kevin Lim */ +#include "config/use_checker.hh" + #include "arch/faults.hh" #include "arch/isa_traits.hh" #include "base/statistics.hh" #include "cpu/thread_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/front_end.hh" -#include "mem/mem_interface.hh" -#include "sim/byte_swap.hh" +#include "mem/mem_object.hh" +#include "mem/packet.hh" +#include "mem/request.hh" + +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif using namespace TheISA; +template<class Impl> +Tick +FrontEnd<Impl>::IcachePort::recvAtomic(PacketPtr pkt) +{ + panic("FrontEnd doesn't expect recvAtomic callback!"); + return curTick; +} + +template<class Impl> +void +FrontEnd<Impl>::IcachePort::recvFunctional(PacketPtr pkt) +{ + panic("FrontEnd doesn't expect recvFunctional callback!"); +} + +template<class Impl> +void +FrontEnd<Impl>::IcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("FrontEnd doesn't expect recvStatusChange callback!"); +} + +template<class Impl> +bool +FrontEnd<Impl>::IcachePort::recvTiming(Packet *pkt) +{ + fe->processCacheCompletion(pkt); + return true; +} + +template<class Impl> +void +FrontEnd<Impl>::IcachePort::recvRetry() +{ + fe->recvRetry(); +} + template <class Impl> FrontEnd<Impl>::FrontEnd(Params *params) : branchPred(params), - icacheInterface(params->icacheInterface), + icachePort(this), + mem(params->mem), instBufferSize(0), maxInstBufferSize(params->maxInstBufferSize), width(params->frontEndWidth), @@ -57,7 +105,7 @@ FrontEnd<Impl>::FrontEnd(Params *params) memReq = NULL; // Size of cache block. - cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + cacheBlkSize = 64; assert(isPowerOf2(cacheBlkSize)); @@ -69,11 +117,10 @@ FrontEnd<Impl>::FrontEnd(Params *params) fetchCacheLineNextCycle = true; - cacheBlkValid = false; + cacheBlkValid = cacheBlocked = false; + + retryPkt = NULL; -#if !FULL_SYSTEM -// pTable = params->pTable; -#endif fetchFault = NoFault; } @@ -86,6 +133,21 @@ FrontEnd<Impl>::name() const template <class Impl> void +FrontEnd<Impl>::setCPU(CPUType *cpu_ptr) +{ + cpu = cpu_ptr; + + icachePort.setName(this->name() + "-iport"); + +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->setIcachePort(&icachePort); + } +#endif +} + +template <class Impl> +void FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm) { comm = _comm; @@ -272,7 +334,7 @@ FrontEnd<Impl>::tick() IFQFcount += instBufferSize == maxInstBufferSize; // Fetch cache line - if (status == IcacheMissComplete) { + if (status == IcacheAccessComplete) { cacheBlkValid = true; status = Running; @@ -281,8 +343,8 @@ FrontEnd<Impl>::tick() if (freeRegs <= 0) status = RenameBlocked; checkBE(); - } else if (status == IcacheMissStall) { - DPRINTF(FE, "Still in Icache miss stall.\n"); + } else if (status == IcacheWaitResponse || status == IcacheWaitRetry) { + DPRINTF(FE, "Still in Icache wait.\n"); icacheStallCycles++; return; } @@ -303,7 +365,7 @@ FrontEnd<Impl>::tick() } else if (status == QuiescePending) { DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n"); return; - } else if (status != IcacheMissComplete) { + } else if (status != IcacheAccessComplete) { if (fetchCacheLineNextCycle) { Fault fault = fetchCacheLine(); if (fault != NoFault) { @@ -314,7 +376,7 @@ FrontEnd<Impl>::tick() fetchCacheLineNextCycle = false; } // If miss, stall until it returns. - if (status == IcacheMissStall) { + if (status == IcacheWaitResponse || status == IcacheWaitRetry) { // Tell CPU to not tick me for now. return; } @@ -404,22 +466,16 @@ FrontEnd<Impl>::fetchCacheLine() // Setup the memReq to do a read of the first isntruction's address. // Set the appropriate read size and flags as well. - memReq = new MemReq(); - - memReq->asid = 0; - memReq->thread_num = 0; - memReq->data = new uint8_t[64]; - memReq->tc = tc; - memReq->cmd = Read; - memReq->reset(fetch_PC, cacheBlkSize, flags); + memReq = new Request(0, fetch_PC, cacheBlkSize, flags, + fetch_PC, cpu->readCpuId(), 0); // Translate the instruction request. - fault = cpu->translateInstReq(memReq); + fault = cpu->translateInstReq(memReq, thread); // Now do the timing access to see whether or not the instruction // exists within the cache. - if (icacheInterface && fault == NoFault) { -#if FULL_SYSTEM + if (fault == NoFault) { +#if 0 if (cpu->system->memctrl->badaddr(memReq->paddr) || memReq->flags & UNCACHEABLE) { DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " @@ -429,30 +485,21 @@ FrontEnd<Impl>::fetchCacheLine() } #endif - memReq->completionEvent = NULL; - - memReq->time = curTick; - fault = cpu->mem->read(memReq, cacheData); - - MemAccessResult res = icacheInterface->access(memReq); - - // If the cache missed then schedule an event to wake - // up this stage once the cache miss completes. - if (icacheInterface->doEvents() && res != MA_HIT) { - memReq->completionEvent = new ICacheCompletionEvent(memReq, this); - - status = IcacheMissStall; - - cacheBlkValid = false; - - DPRINTF(FE, "Cache miss.\n"); - } else { - DPRINTF(FE, "Cache hit.\n"); - - cacheBlkValid = true; - -// memcpy(cacheData, memReq->data, memReq->size); + // Build packet here. + PacketPtr data_pkt = new Packet(memReq, + Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(cacheData); + + if (!icachePort.sendTiming(data_pkt)) { + assert(retryPkt == NULL); + DPRINTF(Fetch, "Out of MSHRs!\n"); + status = IcacheWaitRetry; + retryPkt = data_pkt; + cacheBlocked = true; + return NoFault; } + + status = IcacheWaitResponse; } // Note that this will set the cache block PC a bit earlier than it should @@ -565,7 +612,7 @@ FrontEnd<Impl>::handleFault(Fault &fault) // instruction->setASID(tid); - instruction->setState(thread); + instruction->setThreadState(thread); instruction->traceData = NULL; @@ -614,8 +661,8 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC, } // Clear the icache miss if it's outstanding. - if (status == IcacheMissStall && icacheInterface) { - DPRINTF(FE, "Squashing outstanding Icache miss.\n"); + if (status == IcacheWaitResponse) { + DPRINTF(FE, "Squashing outstanding Icache access.\n"); memReq = NULL; } @@ -652,20 +699,22 @@ FrontEnd<Impl>::getInst() template <class Impl> void -FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req) +FrontEnd<Impl>::processCacheCompletion(PacketPtr pkt) { DPRINTF(FE, "Processing cache completion\n"); // Do something here. - if (status != IcacheMissStall || - req != memReq || + if (status != IcacheWaitResponse || + pkt->req != memReq || switchedOut) { DPRINTF(FE, "Previous fetch was squashed.\n"); fetchIcacheSquashes++; + delete pkt->req; + delete pkt; return; } - status = IcacheMissComplete; + status = IcacheAccessComplete; /* if (checkStall(tid)) { fetchStatus[tid] = Blocked; @@ -677,6 +726,8 @@ FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req) // Reset the completion event to NULL. // memReq->completionEvent = NULL; + delete pkt->req; + delete pkt; memReq = NULL; } @@ -698,6 +749,27 @@ FrontEnd<Impl>::addFreeRegs(int num_freed) } template <class Impl> +void +FrontEnd<Impl>::recvRetry() +{ + assert(cacheBlocked); + if (retryPkt != NULL) { + assert(status == IcacheWaitRetry); + + if (icachePort.sendTiming(retryPkt)) { + status = IcacheWaitResponse; + retryPkt = NULL; + cacheBlocked = false; + } + } else { + // Access has been squashed since it was sent out. Just clear + // the cache being blocked. + cacheBlocked = false; + } + +} + +template <class Impl> bool FrontEnd<Impl>::updateStatus() { @@ -775,7 +847,7 @@ FrontEnd<Impl>::getInstFromCacheline() DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst), inst_seq, cpu); - instruction->setState(thread); + instruction->setThreadState(thread); DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n", inst_seq, instruction->readPC(), @@ -899,24 +971,3 @@ FrontEnd<Impl>::dumpInsts() buff_it++; } } - -template <class Impl> -FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe) - : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe) -{ - this->setFlags(Event::AutoDelete); -} - -template <class Impl> -void -FrontEnd<Impl>::ICacheCompletionEvent::process() -{ - frontEnd->processCacheCompletion(req); -} - -template <class Impl> -const char * -FrontEnd<Impl>::ICacheCompletionEvent::description() -{ - return "ICache completion event"; -} diff --git a/src/cpu/ozone/lw_back_end.hh b/src/cpu/ozone/lw_back_end.hh index bb81f60c8..d836ceebd 100644 --- a/src/cpu/ozone/lw_back_end.hh +++ b/src/cpu/ozone/lw_back_end.hh @@ -51,6 +51,8 @@ class ThreadContext; template <class Impl> class OzoneThreadState; +class Port; + template <class Impl> class LWBackEnd { @@ -60,9 +62,9 @@ class LWBackEnd typedef typename Impl::Params Params; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::OzoneCPU OzoneCPU; typedef typename Impl::FrontEnd FrontEnd; - typedef typename Impl::FullCPU::CommStruct CommStruct; + typedef typename Impl::OzoneCPU::CommStruct CommStruct; struct SizeStruct { int size; @@ -95,35 +97,13 @@ class LWBackEnd const char *description(); }; - /** LdWriteback event for a load completion. */ - class LdWritebackEvent : public Event { - private: - /** Instruction that is writing back data to the register file. */ - DynInstPtr inst; - /** Pointer to IEW stage. */ - LWBackEnd *be; - - bool dcacheMiss; - - public: - /** Constructs a load writeback event. */ - LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be); - - /** Processes writeback event. */ - virtual void process(); - /** Returns the description of the writeback event. */ - virtual const char *description(); - - void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); } - }; - LWBackEnd(Params *params); std::string name() const; void regStats(); - void setCPU(FullCPU *cpu_ptr); + void setCPU(OzoneCPU *cpu_ptr); void setFrontEnd(FrontEnd *front_end_ptr) { frontEnd = front_end_ptr; } @@ -136,6 +116,8 @@ class LWBackEnd void setCommBuffer(TimeBuffer<CommStruct> *_comm); + Port *getDcachePort() { return LSQ.getDcachePort(); } + void tick(); void squash(); void generateTCEvent() { tcSquash = true; } @@ -239,7 +221,7 @@ class LWBackEnd void updateComInstStats(DynInstPtr &inst); public: - FullCPU *cpu; + OzoneCPU *cpu; FrontEnd *frontEnd; @@ -273,24 +255,6 @@ class LWBackEnd RenameTable<Impl> renameTable; private: - class DCacheCompletionEvent : public Event - { - private: - LWBackEnd *be; - - public: - DCacheCompletionEvent(LWBackEnd *_be); - - virtual void process(); - virtual const char *description(); - }; - - friend class DCacheCompletionEvent; - - DCacheCompletionEvent cacheCompletionEvent; - - MemInterface *dcacheInterface; - // General back end width. Used if the more specific isn't given. int width; diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh index ed406d5a3..a4f1d805e 100644 --- a/src/cpu/ozone/lw_back_end_impl.hh +++ b/src/cpu/ozone/lw_back_end_impl.hh @@ -28,9 +28,14 @@ * Authors: Kevin Lim */ -#include "cpu/checker/cpu.hh" +#include "config/use_checker.hh" + #include "cpu/ozone/lw_back_end.hh" -#include "encumbered/cpu/full/op_class.hh" +#include "cpu/op_class.hh" + +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif template <class Impl> void @@ -134,86 +139,11 @@ LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst) assert(found_inst); } -template<class Impl> -LWBackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, - LWBackEnd<Impl> *_be) - : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false) -{ - this->setFlags(Event::AutoDelete); -} - -template<class Impl> -void -LWBackEnd<Impl>::LdWritebackEvent::process() -{ - DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); -// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); - - //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); - -// iewStage->wakeCPU(); - - if (be->isSwitchedOut()) - return; - - if (dcacheMiss) { - be->removeDcacheMiss(inst); - } - - if (inst->isSquashed()) { - inst = NULL; - return; - } - - if (!inst->isExecuted()) { - inst->setExecuted(); - - // Execute again to copy data to proper place. - inst->completeAcc(); - } - - // Need to insert instruction into queue to commit - be->instToCommit(inst); - - //wroteToTimeBuffer = true; -// iewStage->activityThisCycle(); - - inst = NULL; -} - -template<class Impl> -const char * -LWBackEnd<Impl>::LdWritebackEvent::description() -{ - return "Load writeback event"; -} - - -template <class Impl> -LWBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be) - : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) -{ -} - -template <class Impl> -void -LWBackEnd<Impl>::DCacheCompletionEvent::process() -{ -} - -template <class Impl> -const char * -LWBackEnd<Impl>::DCacheCompletionEvent::description() -{ - return "Cache completion event"; -} - template <class Impl> LWBackEnd<Impl>::LWBackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - trapSquash(false), tcSquash(false), cacheCompletionEvent(this), - dcacheInterface(params->dcacheInterface), width(params->backEndWidth), - exactFullStall(true) + trapSquash(false), tcSquash(false), + width(params->backEndWidth), exactFullStall(true) { numROBEntries = params->numROBEntries; numInsts = 0; @@ -239,6 +169,7 @@ LWBackEnd<Impl>::LWBackEnd(Params *params) LSQ.init(params, params->LQEntries, params->SQEntries, 0); dispatchStatus = Running; + commitStatus = Running; } template <class Impl> @@ -569,7 +500,7 @@ LWBackEnd<Impl>::regStats() template <class Impl> void -LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr) +LWBackEnd<Impl>::setCPU(OzoneCPU *cpu_ptr) { cpu = cpu_ptr; LSQ.setCPU(cpu_ptr); @@ -626,6 +557,7 @@ LWBackEnd<Impl>::checkInterrupts() } } } +#endif template <class Impl> void @@ -639,7 +571,7 @@ LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency) // Consider holding onto the trap and waiting until the trap event // happens for this to be executed. - fault->invoke(thread->getTCProxy()); + fault->invoke(thread->getTC()); // Exit state update mode to avoid accidental updating. thread->inSyscall = false; @@ -649,7 +581,6 @@ LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency) // Generate trap squash event. generateTrapEvent(latency); } -#endif template <class Impl> void @@ -671,6 +602,7 @@ LWBackEnd<Impl>::tick() #if FULL_SYSTEM checkInterrupts(); +#endif if (trapSquash) { assert(!tcSquash); @@ -678,7 +610,6 @@ LWBackEnd<Impl>::tick() } else if (tcSquash) { squashFromTC(); } -#endif if (dispatchStatus != Blocked) { dispatchInsts(); @@ -929,11 +860,6 @@ LWBackEnd<Impl>::executeInsts() // at the commit stage. if (inst->isMemRef() && (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { - if (dcacheInterface->isBlocked()) { - // Should I move the instruction aside? - DPRINTF(BE, "Execute: dcache is blocked\n"); - break; - } DPRINTF(BE, "Execute: Initiating access for memory " "reference.\n"); @@ -941,7 +867,7 @@ LWBackEnd<Impl>::executeInsts() LSQ.executeLoad(inst); } else if (inst->isStore()) { LSQ.executeStore(inst); - if (inst->req && !(inst->req->flags & LOCKED)) { + if (inst->req && !(inst->req->getFlags() & LOCKED)) { inst->setExecuted(); instToCommit(inst); @@ -1078,7 +1004,7 @@ LWBackEnd<Impl>::commitInst(int inst_num) thread->setPC(inst->readPC()); thread->setNextPC(inst->readNextPC()); - inst->reachedCommit = true; + inst->setAtCommit(); // If the instruction is not executed yet, then it is a non-speculative // or store inst. Signal backwards that it should be executed. @@ -1183,9 +1109,11 @@ LWBackEnd<Impl>::commitInst(int inst_num) // Use checker prior to updating anything due to traps or PC // based events. +#if USE_CHECKER if (checker) { - checker->tick(inst); + checker->verify(inst); } +#endif if (inst_fault != NoFault) { DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", @@ -1200,19 +1128,18 @@ LWBackEnd<Impl>::commitInst(int inst_num) } else if (inst_num != 0) { DPRINTF(BE, "Will wait until instruction is head of commit group.\n"); return false; - } else if (checker && inst->isStore()) { - checker->tick(inst); } +#if USE_CHECKER + else if (checker && inst->isStore()) { + checker->verify(inst); + } +#endif thread->setInst( static_cast<TheISA::MachInst>(inst->staticInst->machInst)); -#if FULL_SYSTEM + handleFault(inst_fault); return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - inst->PC); -#endif // FULL_SYSTEM } int freed_regs = 0; @@ -1259,7 +1186,7 @@ LWBackEnd<Impl>::commitInst(int inst_num) assert(!thread->inSyscall && !thread->trapPending); oldpc = thread->readPC(); cpu->system->pcEventQueue.service( - thread->getTCProxy()); + thread->getTC()); count++; } while (oldpc != thread->readPC()); if (count > 1) { @@ -1346,7 +1273,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn) (*insts_it)->setCanCommit(); - (*insts_it)->removeInROB(); + (*insts_it)->clearInROB(); for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); @@ -1497,10 +1424,10 @@ LWBackEnd<Impl>::doSwitchOut() template <class Impl> void -LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_xc) +LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_tc) { switchedOut = false; - xcSquash = false; + tcSquash = false; trapSquash = false; numInsts = 0; @@ -1510,7 +1437,7 @@ LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_xc) switchedOut = false; dispatchStatus = Running; commitStatus = Running; - LSQ.takeOverFrom(old_xc); + LSQ.takeOverFrom(old_tc); } template <class Impl> diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index b2924db54..2eb09d01a 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -47,7 +47,7 @@ #include "sim/debug.hh" #include "sim/sim_object.hh" -//class PageTable; +class MemObject; /** * Class that implements the actual LQ and SQ for each specific thread. @@ -64,7 +64,7 @@ template <class Impl> class OzoneLWLSQ { public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::OzoneCPU OzoneCPU; typedef typename Impl::BackEnd BackEnd; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::IssueStruct IssueStruct; @@ -73,35 +73,6 @@ class OzoneLWLSQ { typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt; - private: - class StoreCompletionEvent : public Event { - public: - /** Constructs a store completion event. */ - StoreCompletionEvent(DynInstPtr &inst, BackEnd *be, - Event *wb_event, OzoneLWLSQ *lsq_ptr); - - /** Processes the store completion event. */ - void process(); - - /** Returns the description of this event. */ - const char *description(); - - private: - /** The store index of the store being written back. */ - DynInstPtr inst; - - BackEnd *be; - /** The writeback event for the store. Needed for store - * conditionals. - */ - public: - Event *wbEvent; - bool miss; - private: - /** The pointer to the LSQ unit that issued the store. */ - OzoneLWLSQ<Impl> *lsqPtr; - }; - public: /** Constructs an LSQ unit. init() must be called prior to use. */ OzoneLWLSQ(); @@ -114,15 +85,13 @@ class OzoneLWLSQ { std::string name() const; /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } + void setCPU(OzoneCPU *cpu_ptr); /** Sets the back-end stage pointer. */ void setBE(BackEnd *be_ptr) { be = be_ptr; } - /** Sets the page table pointer. */ -// void setPageTable(PageTable *pt_ptr); + Port *getDcachePort() { return &dcachePort; } /** Ticks the LSQ unit, which in this case only resets the number of * used cache ports. @@ -155,6 +124,10 @@ class OzoneLWLSQ { /** Writes back stores. */ void writebackStores(); + /** Completes the data access that has been returned from the + * memory system. */ + void completeDataAccess(PacketPtr pkt); + // @todo: Include stats in the LSQ unit. //void regStats(); @@ -231,8 +204,8 @@ class OzoneLWLSQ { /** Returns if the LSQ unit will writeback on this cycle. */ bool willWB() { return storeQueue.back().canWB && - !storeQueue.back().completed/* && - !dcacheInterface->isBlocked()*/; } + !storeQueue.back().completed && + !isStoreBlocked; } void switchOut(); @@ -243,12 +216,21 @@ class OzoneLWLSQ { bool switchedOut; private: + /** Writes back the instruction, sending it to IEW. */ + void writeback(DynInstPtr &inst, PacketPtr pkt); + + /** Handles completing the send of a store to memory. */ + void storePostSend(Packet *pkt, DynInstPtr &inst); + /** Completes the store at the specified index. */ void completeStore(int store_idx); + /** Handles doing the retry. */ + void recvRetry(); + private: /** Pointer to the CPU. */ - FullCPU *cpu; + OzoneCPU *cpu; /** Pointer to the back-end stage. */ BackEnd *be; @@ -258,11 +240,11 @@ class OzoneLWLSQ { class DcachePort : public Port { protected: - FullCPU *cpu; + OzoneLWLSQ *lsq; public: - DcachePort(const std::string &_name, FullCPU *_cpu) - : Port(_name), cpu(_cpu) + DcachePort(OzoneLWLSQ *_lsq) + : lsq(_lsq) { } protected: @@ -281,12 +263,9 @@ class OzoneLWLSQ { virtual void recvRetry(); }; - /** Pointer to the D-cache. */ + /** D-cache port. */ DcachePort dcachePort; - /** Pointer to the page table. */ -// PageTable *pTable; - public: struct SQEntry { /** Constructs an empty store queue entry. */ @@ -319,6 +298,48 @@ class OzoneLWLSQ { typename std::list<DynInstPtr>::iterator lqIt; }; + /** Derived class to hold any sender state the LSQ needs. */ + class LSQSenderState : public Packet::SenderState + { + public: + /** Default constructor. */ + LSQSenderState() + : noWB(false) + { } + + /** Instruction who initiated the access to memory. */ + DynInstPtr inst; + /** Whether or not it is a load. */ + bool isLoad; + /** The LQ/SQ index of the instruction. */ + int idx; + /** Whether or not the instruction will need to writeback. */ + bool noWB; + }; + + /** Writeback event, specifically for when stores forward data to loads. */ + class WritebackEvent : public Event { + public: + /** Constructs a writeback event. */ + WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, OzoneLWLSQ *lsq_ptr); + + /** Processes the writeback event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + /** Instruction whose results are being written back. */ + DynInstPtr inst; + + /** The packet that would have been sent to memory. */ + PacketPtr pkt; + + /** The pointer to the LSQ unit that issued the store. */ + OzoneLWLSQ<Impl> *lsqPtr; + }; + enum Status { Running, Idle, @@ -395,6 +416,12 @@ class OzoneLWLSQ { /** The index of the above store. */ LQIt stallingLoad; + /** The packet that needs to be retried. */ + PacketPtr retryPkt; + + /** Whehter or not a store is blocked due to the memory system. */ + bool isStoreBlocked; + /** Whether or not a load is blocked due to the memory system. It is * cleared when this value is checked via loadBlocked(). */ @@ -470,7 +497,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) // too). // @todo: Fix uncached accesses. if (req->getFlags() & UNCACHEABLE && - (inst != loadQueue.back() || !inst->reachedCommit)) { + (inst != loadQueue.back() || !inst->isAtCommit())) { DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " "commit/LSQ!\n", inst->seqNum); @@ -532,17 +559,19 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to " "[sn:%lli] addr %#x, data %#x\n", - (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(inst->memData)); -/* - typename BackEnd::LdWritebackEvent *wb = - new typename BackEnd::LdWritebackEvent(inst, - be); + (*sq_it).inst->seqNum, inst->seqNum, req->getVaddr(), + *(inst->memData)); + + PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); + + WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); // We'll say this has a 1 cycle load-store forwarding latency // for now. - // FIXME - Need to make this a parameter. + // @todo: Need to make this a parameter. wb->schedule(curTick); -*/ + // Should keep track of stat for forwarded data return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || @@ -575,7 +604,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " "Store [sn:%lli] to load addr %#x\n", - (*sq_it).inst->seqNum, req->vaddr); + (*sq_it).inst->seqNum, req->getVaddr()); return NoFault; } @@ -597,6 +626,12 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(inst->memData); + LSQSenderState *state = new LSQSenderState; + state->isLoad = true; + state->idx = load_idx; + state->inst = inst; + data_pkt->senderState = state; + // if we have a cache, do cache access too if (!dcachePort.sendTiming(data_pkt)) { // There's an older load that's already going to squash. @@ -613,6 +648,10 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) return NoFault; } + if (req->getFlags() & LOCKED) { + cpu->lockFlag = true; + } + if (data_pkt->result != Packet::Success) { DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n"); DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index 05db3028a..88e9c218f 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -28,64 +28,112 @@ * Authors: Kevin Lim */ +#include "config/use_checker.hh" + #include "arch/isa_traits.hh" #include "base/str.hh" #include "cpu/ozone/lw_lsq.hh" #include "cpu/checker/cpu.hh" -template <class Impl> -OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, - BackEnd *_be, - Event *wb_event, - OzoneLWLSQ<Impl> *lsq_ptr) - : Event(&mainEventQueue), - inst(_inst), - be(_be), - wbEvent(wb_event), - miss(false), - lsqPtr(lsq_ptr) +template<class Impl> +OzoneLWLSQ<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, + OzoneLWLSQ *lsq_ptr) + : Event(&mainEventQueue), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) { this->setFlags(Event::AutoDelete); } -template <class Impl> +template<class Impl> void -OzoneLWLSQ<Impl>::StoreCompletionEvent::process() +OzoneLWLSQ<Impl>::WritebackEvent::process() +{ + if (!lsqPtr->isSwitchedOut()) { + lsqPtr->writeback(inst, pkt); + } + delete pkt; +} + +template<class Impl> +const char * +OzoneLWLSQ<Impl>::WritebackEvent::description() { - DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n", - inst->seqNum); + return "Store writeback event"; +} - //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); +template <class Impl> +Tick +OzoneLWLSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} -// lsqPtr->cpu->wakeCPU(); - if (lsqPtr->isSwitchedOut()) { - if (wbEvent) - delete wbEvent; +template <class Impl> +void +OzoneLWLSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} +template <class Impl> +void +OzoneLWLSQ<Impl>::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) return; - } - if (wbEvent) { - wbEvent->process(); - delete wbEvent; - } + panic("O3CPU doesn't expect recvStatusChange callback!"); +} - lsqPtr->completeStore(inst->sqIdx); - if (miss) - be->removeDcacheMiss(inst); +template <class Impl> +bool +OzoneLWLSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->completeDataAccess(pkt); + return true; } template <class Impl> -const char * -OzoneLWLSQ<Impl>::StoreCompletionEvent::description() +void +OzoneLWLSQ<Impl>::DcachePort::recvRetry() +{ + lsq->recvRetry(); +} + +template<class Impl> +void +OzoneLWLSQ<Impl>::completeDataAccess(PacketPtr pkt) { - return "LSQ store completion event"; + LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState); + DynInstPtr inst = state->inst; + DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum); + DPRINTF(Activity, "Activity: Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + + if (isSwitchedOut() || inst->isSquashed()) { + delete state; + delete pkt; + return; + } else { + if (!state->noWB) { + writeback(inst, pkt); + } + + if (inst->isStore()) { + completeStore(state->idx); + } + } + + delete state; + delete pkt; } template <class Impl> OzoneLWLSQ<Impl>::OzoneLWLSQ() - : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), - loadBlockedHandled(false) + : switchedOut(false), dcachePort(this), loads(0), stores(0), + storesToWB(0), stalled(false), isStoreBlocked(false), + isLoadBlocked(false), loadBlockedHandled(false) { } @@ -106,11 +154,11 @@ OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries, SQIndices.push(i); } + mem = params->mem; + usedPorts = 0; cachePorts = params->cachePorts; - dcacheInterface = params->dcacheInterface; - loadFaultInst = storeFaultInst = memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -125,6 +173,20 @@ OzoneLWLSQ<Impl>::name() const template<class Impl> void +OzoneLWLSQ<Impl>::setCPU(OzoneCPU *cpu_ptr) +{ + cpu = cpu_ptr; + dcachePort.setName(this->name() + "-dport"); + +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->setDcachePort(&dcachePort); + } +#endif +} + +template<class Impl> +void OzoneLWLSQ<Impl>::clearLQ() { loadQueue.clear(); @@ -481,6 +543,12 @@ OzoneLWLSQ<Impl>::writebackStores() (*sq_it).canWB && usedPorts < cachePorts) { + if (isStoreBlocked) { + DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + DynInstPtr inst = (*sq_it).inst; if ((*sq_it).size == 0 && !(*sq_it).completed) { @@ -495,48 +563,64 @@ OzoneLWLSQ<Impl>::writebackStores() continue; } - if (dcacheInterface && dcacheInterface->isBlocked()) { - DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" - " is blocked!\n"); - break; - } - ++usedPorts; assert((*sq_it).req); assert(!(*sq_it).committed); + Request *req = (*sq_it).req; (*sq_it).committed = true; - MemReqPtr req = (*sq_it).req; + assert(!inst->memData); + inst->memData = new uint8_t[64]; + memcpy(inst->memData, (uint8_t *)&(*sq_it).data, + req->getSize()); - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; + PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); - switch((*sq_it).size) { - case 1: - cpu->write(req, (uint8_t &)(*sq_it).data); - break; - case 2: - cpu->write(req, (uint16_t &)(*sq_it).data); - break; - case 4: - cpu->write(req, (uint32_t &)(*sq_it).data); - break; - case 8: - cpu->write(req, (uint64_t &)(*sq_it).data); - break; - default: - panic("Unexpected store size!\n"); - } - if (!(req->flags & LOCKED)) { - (*sq_it).inst->setCompleted(); - if (cpu->checker) { - cpu->checker->tick((*sq_it).inst); + LSQSenderState *state = new LSQSenderState; + state->isLoad = false; + state->idx = inst->sqIdx; + state->inst = inst; + data_pkt->senderState = state; + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + (*sq_it).inst->readPC(), + req->getPaddr(), *(inst->memData), + inst->seqNum); + + // @todo: Remove this SC hack once the memory system handles it. + if (req->getFlags() & LOCKED) { + if (req->getFlags() & UNCACHEABLE) { + req->setScResult(2); + } else { + if (cpu->lockFlag) { + req->setScResult(1); + } else { + req->setScResult(0); + // Hack: Instantly complete this store. + completeDataAccess(data_pkt); + --sq_it; + continue; + } } + } else { + // Non-store conditionals do not need a writeback. + state->noWB = true; } + if (!dcachePort.sendTiming(data_pkt)) { + // Need to handle becoming blocked on a store. + isStoreBlocked = true; + assert(retryPkt == NULL); + retryPkt = data_pkt; + } else { + storePostSend(data_pkt, inst); + --sq_it; + } +/* DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " "to Addr:%#x, data:%#x [sn:%lli]\n", inst->sqIdx,inst->readPC(), @@ -606,6 +690,7 @@ OzoneLWLSQ<Impl>::writebackStores() } else { panic("Must HAVE DCACHE!!!!!\n"); } +*/ } // Not sure this should set it to 0. @@ -685,10 +770,6 @@ OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num) SQIndices.push((*sq_it).inst->sqIdx); (*sq_it).inst = NULL; (*sq_it).canWB = 0; - - if ((*sq_it).req) { - assert(!(*sq_it).req->completionEvent); - } (*sq_it).req = NULL; --stores; storeQueue.erase(sq_it++); @@ -734,6 +815,72 @@ OzoneLWLSQ<Impl>::dumpInsts() template <class Impl> void +OzoneLWLSQ<Impl>::storePostSend(Packet *pkt, DynInstPtr &inst) +{ + if (isStalled() && + inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load [sn:%lli]\n", + stallingStoreIsn, (*stallingLoad)->seqNum); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst((*stallingLoad)); + } + + if (!inst->isStoreConditional()) { + // The store is basically completed at this time. This + // only works so long as the checker doesn't try to + // verify the value in memory for stores. + inst->setCompleted(); +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->verify(inst); + } +#endif + } + + if (pkt->result != Packet::Success) { + DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); + + DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", + inst->seqNum); + + //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); + + //DPRINTF(OzoneLWLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size()); + + // @todo: Increment stat here. + } else { + DPRINTF(OzoneLSQ,"D-Cache: Write Hit!\n"); + + DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", + inst->seqNum); + } +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) +{ + // Squashed instructions do not need to complete their access. + if (inst->isSquashed()) { + assert(!inst->isStore()); + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Complete access to copy data to proper place. + inst->completeAcc(pkt); + } + + // Need to insert instruction into queue to commit + be->instToCommit(inst); +} + +template <class Impl> +void OzoneLWLSQ<Impl>::completeStore(int store_idx) { SQHashIt sq_hash_it = SQItHash.find(store_idx); @@ -766,9 +913,18 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx) --stores; inst->setCompleted(); +#if USE_CHECKER if (cpu->checker) { - cpu->checker->tick(inst); + cpu->checker->verify(inst); } +#endif +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::recvRetry() +{ + panic("Unimplemented!"); } template <class Impl> @@ -777,68 +933,6 @@ OzoneLWLSQ<Impl>::switchOut() { assert(storesToWB == 0); switchedOut = true; - SQIt sq_it = --(storeQueue.end()); - while (storesToWB > 0 && - sq_it != storeQueue.end() && - (*sq_it).inst && - (*sq_it).canWB) { - - DynInstPtr inst = (*sq_it).inst; - - if ((*sq_it).size == 0 && !(*sq_it).completed) { - sq_it--; - continue; - } - - // Store conditionals don't complete until *after* they have written - // back. If it's here and not yet sent to memory, then don't bother - // as it's not part of committed state. - if (inst->isDataPrefetch() || (*sq_it).committed) { - sq_it--; - continue; - } else if ((*sq_it).req->flags & LOCKED) { - sq_it--; - assert(!(*sq_it).canWB || - ((*sq_it).canWB && (*sq_it).req->flags & LOCKED)); - continue; - } - - assert((*sq_it).req); - assert(!(*sq_it).committed); - - MemReqPtr req = (*sq_it).req; - (*sq_it).committed = true; - - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); - - DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n", - inst->sqIdx,inst->readPC(), - req->paddr, *(req->data), - inst->seqNum); - - switch((*sq_it).size) { - case 1: - cpu->write(req, (uint8_t &)(*sq_it).data); - break; - case 2: - cpu->write(req, (uint16_t &)(*sq_it).data); - break; - case 4: - cpu->write(req, (uint32_t &)(*sq_it).data); - break; - case 8: - cpu->write(req, (uint64_t &)(*sq_it).data); - break; - default: - panic("Unexpected store size!\n"); - } - } // Clear the queue to free up resources storeQueue.clear(); diff --git a/src/cpu/ozone/ozone_base_dyn_inst.cc b/src/cpu/ozone/ozone_base_dyn_inst.cc new file mode 100644 index 000000000..5a3a69dff --- /dev/null +++ b/src/cpu/ozone/ozone_base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +// Explicit instantiation +template class BaseDynInst<OzoneImpl>; + +template <> +int +BaseDynInst<OzoneImpl>::instcount = 0; diff --git a/src/cpu/ozone/ozone_impl.hh b/src/cpu/ozone/ozone_impl.hh index e977d06a9..503675738 100644 --- a/src/cpu/ozone/ozone_impl.hh +++ b/src/cpu/ozone/ozone_impl.hh @@ -50,7 +50,7 @@ class OzoneDynInst; struct OzoneImpl { typedef SimpleParams Params; typedef OzoneCPU<OzoneImpl> OzoneCPU; - typedef OzoneCPU FullCPU; + typedef OzoneCPU CPUType; // Would like to put these into their own area. // typedef NullPredictor BranchPred; diff --git a/src/cpu/ozone/rename_table.cc b/src/cpu/ozone/rename_table.cc index b0a36afbe..a44054b6e 100644 --- a/src/cpu/ozone/rename_table.cc +++ b/src/cpu/ozone/rename_table.cc @@ -30,7 +30,7 @@ #include "cpu/ozone/rename_table_impl.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" template class RenameTable<OzoneImpl>; -template class RenameTable<SimpleImpl>; +//template class RenameTable<SimpleImpl>; diff --git a/src/cpu/ozone/simple_base_dyn_inst.cc b/src/cpu/ozone/simple_base_dyn_inst.cc new file mode 100644 index 000000000..fdaeaf57e --- /dev/null +++ b/src/cpu/ozone/simple_base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +// Explicit instantiation +template class BaseDynInst<SimpleImpl>; + +template <> +int +BaseDynInst<SimpleImpl>::instcount = 0; diff --git a/src/cpu/ozone/simple_cpu_builder.cc b/src/cpu/ozone/simple_cpu_builder.cc new file mode 100644 index 000000000..baaf7c708 --- /dev/null +++ b/src/cpu/ozone/simple_cpu_builder.cc @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include <string> + +#include "cpu/checker/cpu.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/cpu_impl.hh" +#include "cpu/ozone/simple_impl.hh" +#include "cpu/ozone/simple_params.hh" +#include "mem/cache/base_cache.hh" +#include "sim/builder.hh" +#include "sim/process.hh" +#include "sim/sim_object.hh" + +template +class OzoneCPU<SimpleImpl>; + +class SimpleOzoneCPU : public OzoneCPU<SimpleImpl> +{ + public: + SimpleOzoneCPU(SimpleParams *p) + : OzoneCPU<SimpleImpl>(p) + { } +}; + +//////////////////////////////////////////////////////////////////////// +// +// OzoneCPU Simulation Object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + Param<int> clock; + Param<int> numThreads; + +#if FULL_SYSTEM +SimObjectParam<System *> system; +Param<int> cpu_id; +SimObjectParam<AlphaITB *> itb; +SimObjectParam<AlphaDTB *> dtb; +#else +SimObjectVectorParam<Process *> workload; +//SimObjectParam<PageTable *> page_table; +#endif // FULL_SYSTEM + +SimObjectParam<FunctionalMemory *> mem; + +SimObjectParam<BaseCPU *> checker; + +Param<Counter> max_insts_any_thread; +Param<Counter> max_insts_all_threads; +Param<Counter> max_loads_any_thread; +Param<Counter> max_loads_all_threads; + +SimObjectParam<BaseCache *> icache; +SimObjectParam<BaseCache *> dcache; + +Param<unsigned> cachePorts; +Param<unsigned> width; +Param<unsigned> frontEndWidth; +Param<unsigned> backEndWidth; +Param<unsigned> backEndSquashLatency; +Param<unsigned> backEndLatency; +Param<unsigned> maxInstBufferSize; +Param<unsigned> numPhysicalRegs; + +Param<unsigned> decodeToFetchDelay; +Param<unsigned> renameToFetchDelay; +Param<unsigned> iewToFetchDelay; +Param<unsigned> commitToFetchDelay; +Param<unsigned> fetchWidth; + +Param<unsigned> renameToDecodeDelay; +Param<unsigned> iewToDecodeDelay; +Param<unsigned> commitToDecodeDelay; +Param<unsigned> fetchToDecodeDelay; +Param<unsigned> decodeWidth; + +Param<unsigned> iewToRenameDelay; +Param<unsigned> commitToRenameDelay; +Param<unsigned> decodeToRenameDelay; +Param<unsigned> renameWidth; + +Param<unsigned> commitToIEWDelay; +Param<unsigned> renameToIEWDelay; +Param<unsigned> issueToExecuteDelay; +Param<unsigned> issueWidth; +Param<unsigned> executeWidth; +Param<unsigned> executeIntWidth; +Param<unsigned> executeFloatWidth; +Param<unsigned> executeBranchWidth; +Param<unsigned> executeMemoryWidth; + +Param<unsigned> iewToCommitDelay; +Param<unsigned> renameToROBDelay; +Param<unsigned> commitWidth; +Param<unsigned> squashWidth; + +Param<std::string> predType; +Param<unsigned> localPredictorSize; +Param<unsigned> localCtrBits; +Param<unsigned> localHistoryTableSize; +Param<unsigned> localHistoryBits; +Param<unsigned> globalPredictorSize; +Param<unsigned> globalCtrBits; +Param<unsigned> globalHistoryBits; +Param<unsigned> choicePredictorSize; +Param<unsigned> choiceCtrBits; + +Param<unsigned> BTBEntries; +Param<unsigned> BTBTagSize; + +Param<unsigned> RASSize; + +Param<unsigned> LQEntries; +Param<unsigned> SQEntries; +Param<unsigned> LFSTSize; +Param<unsigned> SSITSize; + +Param<unsigned> numPhysIntRegs; +Param<unsigned> numPhysFloatRegs; +Param<unsigned> numIQEntries; +Param<unsigned> numROBEntries; + +Param<bool> decoupledFrontEnd; +Param<int> dispatchWidth; +Param<int> wbWidth; + +Param<unsigned> smtNumFetchingThreads; +Param<std::string> smtFetchPolicy; +Param<std::string> smtLSQPolicy; +Param<unsigned> smtLSQThreshold; +Param<std::string> smtIQPolicy; +Param<unsigned> smtIQThreshold; +Param<std::string> smtROBPolicy; +Param<unsigned> smtROBThreshold; +Param<std::string> smtCommitPolicy; + +Param<unsigned> instShiftAmt; + +Param<bool> defer_registration; + +Param<bool> function_trace; +Param<Tick> function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +// INIT_PARAM(page_table, "Page table"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + INIT_PARAM_DFLT(width, "Width", 1), + INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), + INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), + INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), + INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), + INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), + INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), + INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), + INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +CREATE_SIM_OBJECT(SimpleOzoneCPU) +{ + SimpleOzoneCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + SimpleParams *params = new SimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params->system = system; + params->cpu_id = cpu_id; + params->itb = itb; + params->dtb = dtb; +#else + params->workload = workload; +// params->pTable = page_table; +#endif // FULL_SYSTEM + + params->mem = mem; + params->checker = checker; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->icacheInterface = icache ? icache->getInterface() : NULL; + params->dcacheInterface = dcache ? dcache->getInterface() : NULL; + params->cachePorts = cachePorts; + + params->width = width; + params->frontEndWidth = frontEndWidth; + params->backEndWidth = backEndWidth; + params->backEndSquashLatency = backEndSquashLatency; + params->backEndLatency = backEndLatency; + params->maxInstBufferSize = maxInstBufferSize; + params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->issueWidth = issueWidth; + params->executeWidth = executeWidth; + params->executeIntWidth = executeIntWidth; + params->executeFloatWidth = executeFloatWidth; + params->executeBranchWidth = executeBranchWidth; + params->executeMemoryWidth = executeMemoryWidth; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + + params->predType = predType; + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->decoupledFrontEnd = decoupledFrontEnd; + params->dispatchWidth = dispatchWidth; + params->wbWidth = wbWidth; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new SimpleOzoneCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU) + diff --git a/src/cpu/ozone/simple_params.hh b/src/cpu/ozone/simple_params.hh index 13eb05e77..11cee716f 100644 --- a/src/cpu/ozone/simple_params.hh +++ b/src/cpu/ozone/simple_params.hh @@ -37,8 +37,7 @@ class AlphaDTB; class AlphaITB; class FUPool; -class FunctionalMemory; -class MemInterface; +class MemObject; class PageTable; class Process; class System; @@ -62,13 +61,13 @@ class SimpleParams : public BaseCPU::Params //Page Table PageTable *pTable; - FunctionalMemory *mem; + MemObject *mem; // // Caches // - MemInterface *icacheInterface; - MemInterface *dcacheInterface; +// MemInterface *icacheInterface; +// MemInterface *dcacheInterface; unsigned cachePorts; unsigned width; diff --git a/src/cpu/ozone/thread_state.hh b/src/cpu/ozone/thread_state.hh index 299878c29..ef4b1429d 100644 --- a/src/cpu/ozone/thread_state.hh +++ b/src/cpu/ozone/thread_state.hh @@ -58,30 +58,23 @@ class FunctionalMemory; template <class Impl> struct OzoneThreadState : public ThreadState { typedef typename ThreadContext::Status Status; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::CPUType CPUType; typedef TheISA::MiscReg MiscReg; #if FULL_SYSTEM - OzoneThreadState(FullCPU *_cpu, int _thread_num) + OzoneThreadState(CPUType *_cpu, int _thread_num) : ThreadState(-1, _thread_num), - inSyscall(0), trapPending(0) + intrflag(0), inSyscall(0), trapPending(0) { - memset(®s, 0, sizeof(TheISA::RegFile)); + miscRegFile.clear(); } #else - OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) - : ThreadState(-1, _thread_num, NULL, _process, _asid), + OzoneThreadState(CPUType *_cpu, int _thread_num, Process *_process, + int _asid, MemObject *mem) + : ThreadState(-1, _thread_num, _process, _asid, mem), cpu(_cpu), inSyscall(0), trapPending(0) { - memset(®s, 0, sizeof(TheISA::RegFile)); - } - - OzoneThreadState(FullCPU *_cpu, int _thread_num, - int _asid) - : ThreadState(-1, _thread_num, NULL, NULL, _asid), - cpu(_cpu), inSyscall(0), trapPending(0) - { - memset(®s, 0, sizeof(TheISA::RegFile)); + miscRegFile.clear(); } #endif @@ -91,9 +84,11 @@ struct OzoneThreadState : public ThreadState { Addr nextPC; - TheISA::RegFile regs; + TheISA::MiscRegFile miscRegFile; + + int intrflag; - typename Impl::FullCPU *cpu; + typename Impl::CPUType *cpu; bool inSyscall; @@ -103,54 +98,24 @@ struct OzoneThreadState : public ThreadState { ThreadContext *getTC() { return tc; } -#if !FULL_SYSTEM - Fault translateInstReq(Request *req) - { - return process->pTable->translate(req); - } - Fault translateDataReadReq(Request *req) - { - return process->pTable->translate(req); - } - Fault translateDataWriteReq(Request *req) - { - return process->pTable->translate(req); - } -#else - Fault translateInstReq(Request *req) - { - return cpu->itb->translate(req); - } - - Fault translateDataReadReq(Request *req) - { - return cpu->dtb->translate(req, false); - } - - Fault translateDataWriteReq(Request *req) - { - return cpu->dtb->translate(req, true); - } -#endif - MiscReg readMiscReg(int misc_reg) { - return regs.readMiscReg(misc_reg); + return miscRegFile.readReg(misc_reg); } MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) { - return regs.readMiscRegWithEffect(misc_reg, fault, tc); + return miscRegFile.readRegWithEffect(misc_reg, fault, tc); } Fault setMiscReg(int misc_reg, const MiscReg &val) { - return regs.setMiscReg(misc_reg, val); + return miscRegFile.setReg(misc_reg, val); } Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) { - return regs.setMiscRegWithEffect(misc_reg, val, tc); + return miscRegFile.setRegWithEffect(misc_reg, val, tc); } uint64_t readPC() diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 071193f02..1752b2b5b 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -33,6 +33,7 @@ #include "cpu/simple/atomic.hh" #include "mem/packet_impl.hh" #include "sim/builder.hh" +#include "sim/system.hh" using namespace std; using namespace TheISA; @@ -55,18 +56,28 @@ AtomicSimpleCPU::TickEvent::description() return "AtomicSimpleCPU tick event"; } +Port * +AtomicSimpleCPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return &dcachePort; + else if (if_name == "icache_port") + return &icachePort; + else + panic("No Such Port\n"); +} void AtomicSimpleCPU::init() { //Create Memory Ports (conect them up) - Port *mem_dport = mem->getPort(""); - dcachePort.setPeer(mem_dport); - mem_dport->setPeer(&dcachePort); +// Port *mem_dport = mem->getPort(""); +// dcachePort.setPeer(mem_dport); +// mem_dport->setPeer(&dcachePort); - Port *mem_iport = mem->getPort(""); - icachePort.setPeer(mem_iport); - mem_iport->setPeer(&icachePort); +// Port *mem_iport = mem->getPort(""); +// icachePort.setPeer(mem_iport); +// mem_iport->setPeer(&icachePort); BaseCPU::init(); #if FULL_SYSTEM @@ -124,15 +135,18 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p) // @todo fix me and get the real cpu id & thread number!!! ifetch_req = new Request(); + ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); ifetch_pkt->dataStatic(&inst); data_read_req = new Request(); + data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_read_pkt = new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast); data_read_pkt->dataStatic(&dataReg); data_write_req = new Request(); + data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_write_pkt = new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast); } @@ -145,8 +159,8 @@ AtomicSimpleCPU::~AtomicSimpleCPU() void AtomicSimpleCPU::serialize(ostream &os) { - BaseSimpleCPU::serialize(os); SERIALIZE_ENUM(_status); + BaseSimpleCPU::serialize(os); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); } @@ -154,21 +168,25 @@ AtomicSimpleCPU::serialize(ostream &os) void AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { - BaseSimpleCPU::unserialize(cp, section); UNSERIALIZE_ENUM(_status); + BaseSimpleCPU::unserialize(cp, section); tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); } void -AtomicSimpleCPU::switchOut(Sampler *s) +AtomicSimpleCPU::resume() { - sampler = s; - if (status() == Running) { - _status = SwitchedOut; + assert(system->getMemoryMode() == System::Atomic); + changeState(SimObject::Running); +} - tickEvent.squash(); - } - sampler->signalSwitched(); +void +AtomicSimpleCPU::switchOut() +{ + assert(status() == Running || status() == Idle); + _status = SwitchedOut; + + tickEvent.squash(); } @@ -410,15 +428,14 @@ AtomicSimpleCPU::tick() postExecute(); if (simulate_stalls) { - // This calculation assumes that the icache and dcache - // access latencies are always a multiple of the CPU's - // cycle time. If not, the next tick event may get - // scheduled at a non-integer multiple of the CPU - // cycle time. Tick icache_stall = icache_latency - cycles(1); Tick dcache_stall = dcache_access ? dcache_latency - cycles(1) : 0; - latency += icache_stall + dcache_stall; + Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1); + if (cycles(stall_cycles) < (icache_stall + dcache_stall)) + latency += cycles(stall_cycles+1); + else + latency += cycles(stall_cycles); } } @@ -442,11 +459,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; SimObjectParam<MemObject *> mem; + SimObjectParam<System *> system; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - SimObjectParam<System *> system; Param<int> cpu_id; Param<Tick> profile; #else @@ -474,11 +491,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else @@ -511,11 +528,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU) params->width = width; params->simulate_stalls = simulate_stalls; params->mem = mem; + params->system = system; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->system = system; params->cpu_id = cpu_id; params->profile = profile; #else diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 7f4956da9..d59ca01aa 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -122,10 +122,13 @@ class AtomicSimpleCPU : public BaseSimpleCPU public: + virtual Port *getPort(const std::string &if_name, int idx = -1); + virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - void switchOut(Sampler *s); + virtual void resume(); + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); virtual void activateContext(int thread_num, int delay); diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index d94b0e079..af10e64d7 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Steve Reinhardt + * Korey Sewell */ #include "arch/utility.hh" @@ -40,7 +41,6 @@ #include "cpu/base.hh" #include "cpu/exetrace.hh" #include "cpu/profile.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/simple/base.hh" #include "cpu/simple_thread.hh" #include "cpu/smt.hh" @@ -55,10 +55,10 @@ #include "sim/sim_events.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" +#include "sim/system.hh" #if FULL_SYSTEM #include "base/remote_gdb.hh" -#include "sim/system.hh" #include "arch/tlb.hh" #include "arch/stacktrace.hh" #include "arch/vtophys.hh" @@ -358,8 +358,13 @@ Fault BaseSimpleCPU::setupFetchRequest(Request *req) { // set up memory request for instruction fetch +#if THE_ISA == ALPHA_ISA + DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p",thread->readPC(), + thread->readNextPC()); +#else DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p NNPC:%08p\n",thread->readPC(), thread->readNextPC(),thread->readNextNPC()); +#endif req->setVirt(0, thread->readPC() & ~3, sizeof(MachInst), (FULL_SYSTEM && (thread->readPC() & 1)) ? PHYSICAL : 0, @@ -440,11 +445,7 @@ void BaseSimpleCPU::advancePC(Fault fault) { if (fault != NoFault) { -#if FULL_SYSTEM fault->invoke(tc); -#else // !FULL_SYSTEM - fatal("fault (%s) detected @ PC %08p", fault->name(), thread->readPC()); -#endif // FULL_SYSTEM } else { // go to the next instruction diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 39bc86050..57cfa3c2c 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -38,7 +38,6 @@ #include "cpu/base.hh" #include "cpu/simple_thread.hh" #include "cpu/pc_event.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/static_inst.hh" #include "mem/packet.hh" #include "mem/port.hh" @@ -128,11 +127,6 @@ class BaseSimpleCPU : public BaseCPU // Static data storage TheISA::IntReg dataReg; - // Pointer to the sampler that is telling us to switchover. - // Used to signal the completion of the pipe drain and schedule - // the next switchover - Sampler *sampler; - StaticInstPtr curStaticInst; void checkForInterrupts(); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index c99db8fbf..d2c2c7c47 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -33,23 +33,25 @@ #include "cpu/simple/timing.hh" #include "mem/packet_impl.hh" #include "sim/builder.hh" +#include "sim/system.hh" using namespace std; using namespace TheISA; +Port * +TimingSimpleCPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return &dcachePort; + else if (if_name == "icache_port") + return &icachePort; + else + panic("No Such Port\n"); +} void TimingSimpleCPU::init() { - //Create Memory Ports (conect them up) - Port *mem_dport = mem->getPort(""); - dcachePort.setPeer(mem_dport); - mem_dport->setPeer(&dcachePort); - - Port *mem_iport = mem->getPort(""); - icachePort.setPeer(mem_iport); - mem_iport->setPeer(&icachePort); - BaseCPU::init(); #if FULL_SYSTEM for (int i = 0; i < threadContexts.size(); ++i) { @@ -88,6 +90,9 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) { _status = Idle; ifetch_pkt = dcache_pkt = NULL; + drainEvent = NULL; + fetchEvent = NULL; + changeState(SimObject::Running); } @@ -98,25 +103,61 @@ TimingSimpleCPU::~TimingSimpleCPU() void TimingSimpleCPU::serialize(ostream &os) { - BaseSimpleCPU::serialize(os); SERIALIZE_ENUM(_status); + BaseSimpleCPU::serialize(os); } void TimingSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { - BaseSimpleCPU::unserialize(cp, section); UNSERIALIZE_ENUM(_status); + BaseSimpleCPU::unserialize(cp, section); +} + +unsigned int +TimingSimpleCPU::drain(Event *drain_event) +{ + // TimingSimpleCPU is ready to drain if it's not waiting for + // an access to complete. + if (status() == Idle || status() == Running || status() == SwitchedOut) { + changeState(SimObject::Drained); + return 0; + } else { + changeState(SimObject::Draining); + drainEvent = drain_event; + return 1; + } } void -TimingSimpleCPU::switchOut(Sampler *s) +TimingSimpleCPU::resume() { - sampler = s; - if (status() == Running) { - _status = SwitchedOut; + if (_status != SwitchedOut && _status != Idle) { + // Delete the old event if it existed. + if (fetchEvent) { + assert(!fetchEvent->scheduled()); + delete fetchEvent; + } + + fetchEvent = + new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false); + fetchEvent->schedule(curTick); } - sampler->signalSwitched(); + + assert(system->getMemoryMode() == System::Timing); + changeState(SimObject::Running); +} + +void +TimingSimpleCPU::switchOut() +{ + assert(status() == Running || status() == Idle); + _status = SwitchedOut; + + // If we've been scheduled to resume but are then told to switch out, + // we'll need to cancel it. + if (fetchEvent && fetchEvent->scheduled()) + fetchEvent->deschedule(); } @@ -148,9 +189,9 @@ TimingSimpleCPU::activateContext(int thread_num, int delay) notIdleFraction++; _status = Running; // kick things off by initiating the fetch of the next instruction - Event *e = - new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, true); - e->schedule(curTick + cycles(delay)); + fetchEvent = + new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false); + fetchEvent->schedule(curTick + cycles(delay)); } @@ -176,7 +217,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) { // need to fill in CPU & thread IDs here Request *data_read_req = new Request(); - + data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { @@ -257,6 +298,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { // need to fill in CPU & thread IDs here Request *data_write_req = new Request(); + data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); // translate to physical address @@ -340,6 +382,7 @@ TimingSimpleCPU::fetch() // need to fill in CPU & thread IDs here Request *ifetch_req = new Request(); + ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE Fault fault = setupFetchRequest(ifetch_req); ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); @@ -383,11 +426,17 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) // instruction assert(pkt->result == Packet::Success); assert(_status == IcacheWaitResponse); + _status = Running; delete pkt->req; delete pkt; + if (getState() == SimObject::Draining) { + completeDrain(); + return; + } + preExecute(); if (curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) { // load or store: just send to dcache @@ -440,6 +489,15 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) assert(_status == DcacheWaitResponse); _status = Running; + if (getState() == SimObject::Draining) { + completeDrain(); + + delete pkt->req; + delete pkt; + + return; + } + Fault fault = curStaticInst->completeAcc(pkt, this, traceData); delete pkt->req; @@ -450,6 +508,13 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) } +void +TimingSimpleCPU::completeDrain() +{ + DPRINTF(Config, "Done draining\n"); + changeState(SimObject::Drained); + drainEvent->process(); +} bool TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt) @@ -484,11 +549,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; SimObjectParam<MemObject *> mem; + SimObjectParam<System *> system; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - SimObjectParam<System *> system; Param<int> cpu_id; Param<Tick> profile; #else @@ -516,11 +581,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else @@ -551,11 +616,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; params->mem = mem; + params->system = system; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->system = system; params->cpu_id = cpu_id; params->profile = profile; #else diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index ab0b2d2ca..ac36e5c99 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -64,6 +64,10 @@ class TimingSimpleCPU : public BaseSimpleCPU Status status() const { return _status; } + Event *drainEvent; + + Event *fetchEvent; + private: class CpuPort : public Port @@ -128,10 +132,15 @@ class TimingSimpleCPU : public BaseSimpleCPU public: + virtual Port *getPort(const std::string &if_name, int idx = -1); + virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - void switchOut(Sampler *s); + virtual unsigned int drain(Event *drain_event); + virtual void resume(); + + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); virtual void activateContext(int thread_num, int delay); @@ -147,6 +156,8 @@ class TimingSimpleCPU : public BaseSimpleCPU void completeIfetch(Packet *); void completeDataAccess(Packet *); void advanceInst(Fault fault); + private: + void completeDrain(); }; #endif // __CPU_SIMPLE_TIMING_HH__ diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index 48383ca93..af1db2ff2 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -107,7 +107,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys, #else SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject* memobj) - : ThreadState(-1, _thread_num, memobj, _process, _asid), + : ThreadState(-1, _thread_num, _process, _asid, memobj), cpu(_cpu) { /* Use this port to for syscall emulation writes to memory. */ @@ -123,15 +123,19 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, tc = new ProxyThreadContext<SimpleThread>(this); } -SimpleThread::SimpleThread(RegFile *regFile) - : ThreadState(-1, -1, NULL, NULL, -1), cpu(NULL) +#endif + +SimpleThread::SimpleThread() +#if FULL_SYSTEM + : ThreadState(-1, -1) +#else + : ThreadState(-1, -1, NULL, -1, NULL) +#endif { - regs = *regFile; tc = new ProxyThreadContext<SimpleThread>(this); + regs.clear(); } -#endif - SimpleThread::~SimpleThread() { delete tc; @@ -147,13 +151,8 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext) assert(process == oldContext->getProcessPtr()); #endif - // copy over functional state - _status = oldContext->status(); - copyArchRegs(oldContext); - cpuId = oldContext->readCpuId(); -#if !FULL_SYSTEM - funcExeInst = oldContext->readFuncExeInst(); -#else + copyState(oldContext); +#if FULL_SYSTEM EndQuiesceEvent *quiesce = oldContext->getQuiesceEvent(); if (quiesce) { // Point the quiesce event's TC at this TC so that it wakes up @@ -171,42 +170,49 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext) } void -SimpleThread::serialize(ostream &os) +SimpleThread::copyTC(ThreadContext *context) { - SERIALIZE_ENUM(_status); - regs.serialize(os); - // thread_num and cpu_id are deterministic from the config - SERIALIZE_SCALAR(funcExeInst); - SERIALIZE_SCALAR(inst); + copyState(context); #if FULL_SYSTEM - Tick quiesceEndTick = 0; - if (quiesceEvent->scheduled()) - quiesceEndTick = quiesceEvent->when(); - SERIALIZE_SCALAR(quiesceEndTick); - if (kernelStats) - kernelStats->serialize(os); + EndQuiesceEvent *quiesce = context->getQuiesceEvent(); + if (quiesce) { + quiesceEvent = quiesce; + } + Kernel::Statistics *stats = context->getKernelStats(); + if (stats) { + kernelStats = stats; + } #endif } +void +SimpleThread::copyState(ThreadContext *oldContext) +{ + // copy over functional state + _status = oldContext->status(); + copyArchRegs(oldContext); + cpuId = oldContext->readCpuId(); +#if !FULL_SYSTEM + funcExeInst = oldContext->readFuncExeInst(); +#endif +} + +void +SimpleThread::serialize(ostream &os) +{ + ThreadState::serialize(os); + regs.serialize(os); + // thread_num and cpu_id are deterministic from the config +} + void SimpleThread::unserialize(Checkpoint *cp, const std::string §ion) { - UNSERIALIZE_ENUM(_status); + ThreadState::unserialize(cp, section); regs.unserialize(cp, section); // thread_num and cpu_id are deterministic from the config - UNSERIALIZE_SCALAR(funcExeInst); - UNSERIALIZE_SCALAR(inst); - -#if FULL_SYSTEM - Tick quiesceEndTick; - UNSERIALIZE_SCALAR(quiesceEndTick); - if (quiesceEndTick) - quiesceEvent->schedule(quiesceEndTick); - if (kernelStats) - kernelStats->unserialize(cp, section); -#endif } #if FULL_SYSTEM diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index de65e9891..d36853db4 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -119,16 +119,20 @@ class SimpleThread : public ThreadState #else SimpleThread(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject *memobj); - // Constructor to use SimpleThread to pass reg file around. Not - // used for anything else. - SimpleThread(RegFile *regFile); #endif + + SimpleThread(); + virtual ~SimpleThread(); virtual void takeOverFrom(ThreadContext *oldContext); void regStats(const std::string &name); + void copyTC(ThreadContext *context); + + void copyState(ThreadContext *oldContext); + void serialize(std::ostream &os); void unserialize(Checkpoint *cp, const std::string §ion); diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index bea52f510..ea1a65148 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -34,10 +34,12 @@ #include <bitset> #include <string> +#include "base/bitfield.hh" #include "base/hashmap.hh" #include "base/misc.hh" #include "base/refcnt.hh" #include "cpu/op_class.hh" +#include "cpu/o3/dyn_inst.hh" #include "sim/host.hh" #include "arch/isa_traits.hh" @@ -50,9 +52,6 @@ class DynInst; class Packet; template <class Impl> -class AlphaDynInst; - -template <class Impl> class OzoneDynInst; class CheckerCPU; @@ -411,16 +410,10 @@ class StaticInst : public StaticInstBase //This is defined as inline below. static StaticInstPtr decode(ExtMachInst mach_inst); - //MIPS Decoder Debug Functions - int getOpcode() { return (machInst & 0xFC000000) >> 26 ; }//31..26 - int getRs() { return (machInst & 0x03E00000) >> 21; } //25...21 - int getRt() { return (machInst & 0x001F0000) >> 16; } //20...16 - int getRd() { return (machInst & 0x0000F800) >> 11; } //15...11 - int getImm() { return (machInst & 0x0000FFFF); } //15...0 - int getFunction(){ return (machInst & 0x0000003F); }//5...0 - int getBranch(){ return (machInst & 0x0000FFFF); }//15...0 - int getJump(){ return (machInst & 0x03FFFFFF); }//5...0 - int getHint(){ return (machInst & 0x000007C0) >> 6; } //10...6 + /// Return opcode of machine instruction + uint32_t getOpcode() { return bits(machInst, 31, 26);} + + /// Return name of machine instruction std::string getName() { return mnemonic; } }; diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 48c8fa28d..e019e22bc 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -143,7 +143,7 @@ class ThreadContext virtual void suspend() = 0; /// Set the status to Unallocated. - virtual void deallocate() = 0; + virtual void deallocate(int delay = 0) = 0; /// Set the status to Halted. virtual void halt() = 0; @@ -245,10 +245,13 @@ class ThreadContext virtual void setSyscallReturn(SyscallReturn return_value) = 0; - virtual void syscall(int64_t callnum) = 0; - // Same with st cond failures. virtual Counter readFuncExeInst() = 0; + + // This function exits the thread context in the CPU and returns + // 1 if the CPU has no more active threads (meaning it's OK to exit); + // Used in syscall-emulation mode when a thread calls the exit syscall. + virtual int exit() { return 1; }; #endif virtual void changeRegFileContext(RegFile::ContextParam param, @@ -315,7 +318,7 @@ class ProxyThreadContext : public ThreadContext void suspend() { actualTC->suspend(); } /// Set the status to Unallocated. - void deallocate() { actualTC->deallocate(); } + void deallocate(int delay = 0) { actualTC->deallocate(); } /// Set the status to Halted. void halt() { actualTC->halt(); } @@ -432,8 +435,6 @@ class ProxyThreadContext : public ThreadContext void setSyscallReturn(SyscallReturn return_value) { actualTC->setSyscallReturn(return_value); } - void syscall(int64_t callnum) { actualTC->syscall(callnum); } - Counter readFuncExeInst() { return actualTC->readFuncExeInst(); } #endif diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc index dcfa93c3e..6a96560f1 100644 --- a/src/cpu/thread_state.cc +++ b/src/cpu/thread_state.cc @@ -31,6 +31,12 @@ #include "base/output.hh" #include "cpu/profile.hh" #include "cpu/thread_state.hh" +#include "sim/serialize.hh" + +#if FULL_SYSTEM +#include "cpu/quiesce_event.hh" +#include "kern/kernel_stats.hh" +#endif #if FULL_SYSTEM ThreadState::ThreadState(int _cpuId, int _tid) @@ -38,8 +44,8 @@ ThreadState::ThreadState(int _cpuId, int _tid) profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL), funcExeInst(0), storeCondFailures(0) #else -ThreadState::ThreadState(int _cpuId, int _tid, MemObject *mem, - Process *_process, short _asid) +ThreadState::ThreadState(int _cpuId, int _tid, Process *_process, + short _asid, MemObject *mem) : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0), process(_process), asid(_asid), funcExeInst(0), storeCondFailures(0) @@ -49,6 +55,43 @@ ThreadState::ThreadState(int _cpuId, int _tid, MemObject *mem, numLoad = 0; } +void +ThreadState::serialize(std::ostream &os) +{ + SERIALIZE_ENUM(_status); + // thread_num and cpu_id are deterministic from the config + SERIALIZE_SCALAR(funcExeInst); + SERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick = 0; + if (quiesceEvent->scheduled()) + quiesceEndTick = quiesceEvent->when(); + SERIALIZE_SCALAR(quiesceEndTick); + if (kernelStats) + kernelStats->serialize(os); +#endif +} + +void +ThreadState::unserialize(Checkpoint *cp, const std::string §ion) +{ + + UNSERIALIZE_ENUM(_status); + // thread_num and cpu_id are deterministic from the config + UNSERIALIZE_SCALAR(funcExeInst); + UNSERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick; + UNSERIALIZE_SCALAR(quiesceEndTick); + if (quiesceEndTick) + quiesceEvent->schedule(quiesceEndTick); + if (kernelStats) + kernelStats->unserialize(cp, section); +#endif +} + #if FULL_SYSTEM void diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh index de9b2f14e..b03a2e2bb 100644 --- a/src/cpu/thread_state.hh +++ b/src/cpu/thread_state.hh @@ -49,6 +49,8 @@ namespace Kernel { }; #endif +class Checkpoint; + /** * Struct for holding general thread state that is needed across CPU * models. This includes things such as pointers to the process, @@ -61,10 +63,14 @@ struct ThreadState { #if FULL_SYSTEM ThreadState(int _cpuId, int _tid); #else - ThreadState(int _cpuId, int _tid, MemObject *mem, - Process *_process, short _asid); + ThreadState(int _cpuId, int _tid, Process *_process, + short _asid, MemObject *mem); #endif + void serialize(std::ostream &os); + + void unserialize(Checkpoint *cp, const std::string §ion); + void setCpuId(int id) { cpuId = id; } int readCpuId() { return cpuId; } diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc index 63435e87c..5ffc02d34 100644 --- a/src/dev/ide_ctrl.cc +++ b/src/dev/ide_ctrl.cc @@ -227,177 +227,143 @@ IdeController::setDmaComplete(IdeDisk *disk) // Read and write handling //// -void -IdeController::readConfig(int offset, uint8_t *data) +Tick +IdeController::readConfig(Packet *pkt) { - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::readConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 1) <= IDE_CTRL_CONF_END) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; + if (offset < PCI_DEVICE_SPECIFIC) + return PciDev::readConfig(pkt); + assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END); + pkt->allocate(); + + switch (pkt->getSize()) { + case sizeof(uint8_t): switch (offset) { case IDE_CTRL_CONF_DEV_TIMING: - *data = config_regs.sidetim; + pkt->set<uint8_t>(config_regs.sidetim); break; case IDE_CTRL_CONF_UDMA_CNTRL: - *data = config_regs.udmactl; + pkt->set<uint8_t>(config_regs.udmactl); break; case IDE_CTRL_CONF_PRIM_TIMING+1: - *data = htole(config_regs.idetim0) >> 8; + pkt->set<uint8_t>(htole(config_regs.idetim0) >> 8); break; case IDE_CTRL_CONF_SEC_TIMING+1: - *data = htole(config_regs.idetim1) >> 8; + pkt->set<uint8_t>(htole(config_regs.idetim1) >> 8); break; case IDE_CTRL_CONF_IDE_CONFIG: - *data = htole(config_regs.ideconfig) & 0xFF; + pkt->set<uint8_t>(htole(config_regs.ideconfig) & 0xFF); break; case IDE_CTRL_CONF_IDE_CONFIG+1: - *data = htole(config_regs.ideconfig) >> 8; + pkt->set<uint8_t>(htole(config_regs.ideconfig) >> 8); break; default: panic("Invalid PCI configuration read for size 1 at offset: %#x!\n", offset); } - - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - DPRINTF(IdeCtrl, "PCI read offset: %#x size: 1 data: %#x\n", - offset, (uint32_t)*data); -} - -void -IdeController::readConfig(int offset, uint16_t *data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::readConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 2) <= IDE_CTRL_CONF_END) { - + DPRINTF(IdeCtrl, "PCI read offset: %#x size: 1 data: %#x\n", offset, + (uint32_t)pkt->get<uint8_t>()); + break; + case sizeof(uint16_t): switch (offset) { case IDE_CTRL_CONF_PRIM_TIMING: - *data = config_regs.idetim0; + pkt->set<uint16_t>(config_regs.idetim0); break; case IDE_CTRL_CONF_SEC_TIMING: - *data = config_regs.idetim1; + pkt->set<uint16_t>(config_regs.idetim1); break; case IDE_CTRL_CONF_UDMA_TIMING: - *data = config_regs.udmatim; + pkt->set<uint16_t>(config_regs.udmatim); break; case IDE_CTRL_CONF_IDE_CONFIG: - *data = config_regs.ideconfig; + pkt->set<uint16_t>(config_regs.ideconfig); break; default: panic("Invalid PCI configuration read for size 2 offset: %#x!\n", offset); } - - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); + DPRINTF(IdeCtrl, "PCI read offset: %#x size: 2 data: %#x\n", offset, + (uint32_t)pkt->get<uint16_t>()); + break; + case sizeof(uint32_t): + panic("No 32bit reads implemented for this device."); + DPRINTF(IdeCtrl, "PCI read offset: %#x size: 4 data: %#x\n", offset, + (uint32_t)pkt->get<uint32_t>()); + break; + default: + panic("invalid access size(?) for PCI configspace!\n"); } - DPRINTF(IdeCtrl, "PCI read offset: %#x size: 2 data: %#x\n", offset, *data); -} + pkt->result = Packet::Success; + return configDelay; -void -IdeController::readConfig(int offset, uint32_t *data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::readConfig(offset, data); - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - DPRINTF(IdeCtrl, "PCI read offset: %#x size: 4 data: %#x\n", offset, *data); } -void -IdeController::writeConfig(int offset, const uint8_t data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::writeConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 1) <= IDE_CTRL_CONF_END) { - switch (offset) { - case IDE_CTRL_CONF_DEV_TIMING: - config_regs.sidetim = data; - break; - case IDE_CTRL_CONF_UDMA_CNTRL: - config_regs.udmactl = data; - break; - case IDE_CTRL_CONF_IDE_CONFIG: - config_regs.ideconfig = (config_regs.ideconfig & 0xFF00) | (data); - break; - case IDE_CTRL_CONF_IDE_CONFIG+1: - config_regs.ideconfig = (config_regs.ideconfig & 0x00FF) | data << 8; - break; - default: - panic("Invalid PCI configuration write for size 1 offset: %#x!\n", - offset); - } - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - DPRINTF(IdeCtrl, "PCI write offset: %#x size: 1 data: %#x\n", - offset, (uint32_t)data); -} - -void -IdeController::writeConfig(int offset, const uint16_t data) +Tick +IdeController::writeConfig(Packet *pkt) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::writeConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 2) <= IDE_CTRL_CONF_END) { + PciDev::writeConfig(pkt); + } else { + assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END); - switch (offset) { - case IDE_CTRL_CONF_PRIM_TIMING: - config_regs.idetim0 = data; - break; - case IDE_CTRL_CONF_SEC_TIMING: - config_regs.idetim1 = data; + switch (pkt->getSize()) { + case sizeof(uint8_t): + switch (offset) { + case IDE_CTRL_CONF_DEV_TIMING: + config_regs.sidetim = pkt->get<uint8_t>(); + break; + case IDE_CTRL_CONF_UDMA_CNTRL: + config_regs.udmactl = pkt->get<uint8_t>(); + break; + case IDE_CTRL_CONF_IDE_CONFIG: + config_regs.ideconfig = (config_regs.ideconfig & 0xFF00) | + (pkt->get<uint8_t>()); + break; + case IDE_CTRL_CONF_IDE_CONFIG+1: + config_regs.ideconfig = (config_regs.ideconfig & 0x00FF) | + pkt->get<uint8_t>() << 8; + break; + default: + panic("Invalid PCI configuration write for size 1 offset: %#x!\n", + offset); + } + DPRINTF(IdeCtrl, "PCI write offset: %#x size: 1 data: %#x\n", + offset, (uint32_t)pkt->get<uint8_t>()); break; - case IDE_CTRL_CONF_UDMA_TIMING: - config_regs.udmatim = data; + case sizeof(uint16_t): + switch (offset) { + case IDE_CTRL_CONF_PRIM_TIMING: + config_regs.idetim0 = pkt->get<uint16_t>(); + break; + case IDE_CTRL_CONF_SEC_TIMING: + config_regs.idetim1 = pkt->get<uint16_t>(); + break; + case IDE_CTRL_CONF_UDMA_TIMING: + config_regs.udmatim = pkt->get<uint16_t>(); + break; + case IDE_CTRL_CONF_IDE_CONFIG: + config_regs.ideconfig = pkt->get<uint16_t>(); + break; + default: + panic("Invalid PCI configuration write for size 2 offset: %#x!\n", + offset); + } + DPRINTF(IdeCtrl, "PCI write offset: %#x size: 2 data: %#x\n", + offset, (uint32_t)pkt->get<uint16_t>()); break; - case IDE_CTRL_CONF_IDE_CONFIG: - config_regs.ideconfig = data; + case sizeof(uint32_t): + panic("Write of unimplemented PCI config. register: %x\n", offset); break; default: - panic("Invalid PCI configuration write for size 2 offset: %#x!\n", - offset); + panic("invalid access size(?) for PCI configspace!\n"); } - - } else { - panic("Write of unimplemented PCI config. register: %x\n", offset); } - DPRINTF(IdeCtrl, "PCI write offset: %#x size: 2 data: %#x\n", offset, data); - - /* Trap command register writes and enable IO/BM as appropriate. */ - if (offset == PCI_COMMAND) { - if (letoh(config.command) & PCI_CMD_IOSE) - io_enabled = true; - else - io_enabled = false; - - if (letoh(config.command) & PCI_CMD_BME) - bm_enabled = true; - else - bm_enabled = false; - } - -} - -void -IdeController::writeConfig(int offset, const uint32_t data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::writeConfig(offset, data); - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - - DPRINTF(IdeCtrl, "PCI write offset: %#x size: 4 data: %#x\n", offset, data); + /* Trap command register writes and enable IO/BM as appropriate as well as + * BARs. */ switch(offset) { case PCI0_BASE_ADDR0: if (BARAddrs[0] != 0) @@ -423,9 +389,24 @@ IdeController::writeConfig(int offset, const uint32_t data) if (BARAddrs[4] != 0) bmi_addr = BARAddrs[4]; break; + + case PCI_COMMAND: + if (letoh(config.command) & PCI_CMD_IOSE) + io_enabled = true; + else + io_enabled = false; + + if (letoh(config.command) & PCI_CMD_BME) + bm_enabled = true; + else + bm_enabled = false; + break; } + pkt->result = Packet::Success; + return configDelay; } + Tick IdeController::read(Packet *pkt) { @@ -770,7 +751,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(IdeController) SimObjectParam<System *> system; SimObjectParam<Platform *> platform; - SimObjectParam<PciConfigAll *> configspace; SimObjectParam<PciConfigData *> configdata; Param<uint32_t> pci_bus; Param<uint32_t> pci_dev; @@ -784,7 +764,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(IdeController) INIT_PARAM(system, "System pointer"), INIT_PARAM(platform, "Platform pointer"), - INIT_PARAM(configspace, "PCI Configspace"), INIT_PARAM(configdata, "PCI Config data"), INIT_PARAM(pci_bus, "PCI bus ID"), INIT_PARAM(pci_dev, "PCI device number"), @@ -800,7 +779,6 @@ CREATE_SIM_OBJECT(IdeController) params->name = getInstanceName(); params->platform = platform; params->system = system; - params->configSpace = configspace; params->configData = configdata; params->busNum = pci_bus; params->deviceNum = pci_dev; diff --git a/src/dev/ide_ctrl.hh b/src/dev/ide_ctrl.hh index 1d30c8b31..5842d322e 100644 --- a/src/dev/ide_ctrl.hh +++ b/src/dev/ide_ctrl.hh @@ -204,12 +204,8 @@ class IdeController : public PciDev IdeController(Params *p); ~IdeController(); - virtual void writeConfig(int offset, const uint8_t data); - virtual void writeConfig(int offset, const uint16_t data); - virtual void writeConfig(int offset, const uint32_t data); - virtual void readConfig(int offset, uint8_t *data); - virtual void readConfig(int offset, uint16_t *data); - virtual void readConfig(int offset, uint32_t *data); + virtual Tick writeConfig(Packet *pkt); + virtual Tick readConfig(Packet *pkt); void setDmaComplete(IdeDisk *disk); diff --git a/src/dev/ide_disk.cc b/src/dev/ide_disk.cc index dc78021f8..12564ddd0 100644 --- a/src/dev/ide_disk.cc +++ b/src/dev/ide_disk.cc @@ -318,7 +318,7 @@ IdeDisk::doDmaTransfer() panic("Inconsistent DMA transfer state: dmaState = %d devState = %d\n", dmaState, devState); - if (ctrl->dmaPending()) { + if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) { dmaTransferEvent.schedule(curTick + DMA_BACKOFF_PERIOD); return; } else @@ -398,8 +398,7 @@ IdeDisk::doDmaRead() curPrd.getByteCount(), TheISA::PageBytes); } - if (ctrl->dmaPending()) { - panic("shouldn't be reentant??"); + if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) { dmaReadWaitEvent.schedule(curTick + DMA_BACKOFF_PERIOD); return; } else if (!dmaReadCG->done()) { @@ -474,8 +473,7 @@ IdeDisk::doDmaWrite() dmaWriteCG = new ChunkGenerator(curPrd.getBaseAddr(), curPrd.getByteCount(), TheISA::PageBytes); } - if (ctrl->dmaPending()) { - panic("shouldn't be reentant??"); + if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) { dmaWriteWaitEvent.schedule(curTick + DMA_BACKOFF_PERIOD); return; } else if (!dmaWriteCG->done()) { diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index 485216874..660efabfd 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -32,10 +32,12 @@ #include "base/trace.hh" #include "dev/io_device.hh" #include "sim/builder.hh" +#include "sim/system.hh" -PioPort::PioPort(PioDevice *dev, Platform *p) - : Port(dev->name() + "-pioport"), device(dev), platform(p) +PioPort::PioPort(PioDevice *dev, System *s, std::string pname) + : Port(dev->name() + pname), device(dev), sys(s), + outTiming(0), drainEvent(NULL) { } @@ -62,34 +64,68 @@ PioPort::getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) void PioPort::recvRetry() { - Packet* pkt = transmitList.front(); - if (Port::sendTiming(pkt)) { - transmitList.pop_front(); + bool result = true; + while (result && transmitList.size()) { + result = Port::sendTiming(transmitList.front()); + if (result) + transmitList.pop_front(); } + if (transmitList.size() == 0 && drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } - void PioPort::SendEvent::process() { + port->outTiming--; + assert(port->outTiming >= 0); if (port->Port::sendTiming(packet)) - return; + if (port->transmitList.size() == 0 && port->drainEvent) { + port->drainEvent->process(); + port->drainEvent = NULL; + } + return; port->transmitList.push_back(packet); } +void +PioPort::resendNacked(Packet *pkt) { + pkt->reinitNacked(); + if (transmitList.size()) { + transmitList.push_front(pkt); + } else { + if (!Port::sendTiming(pkt)) + transmitList.push_front(pkt); + } +}; bool PioPort::recvTiming(Packet *pkt) { - Tick latency = device->recvAtomic(pkt); - // turn packet around to go back to requester - pkt->makeTimingResponse(); - sendTiming(pkt, latency); + if (pkt->result == Packet::Nacked) { + resendNacked(pkt); + } else { + Tick latency = device->recvAtomic(pkt); + // turn packet around to go back to requester + pkt->makeTimingResponse(); + sendTiming(pkt, latency); + } return true; } +unsigned int +PioPort::drain(Event *de) +{ + if (outTiming == 0 && transmitList.size() == 0) + return 0; + drainEvent = de; + return 1; +} + PioDevice::~PioDevice() { if (pioPort) @@ -104,6 +140,19 @@ PioDevice::init() pioPort->sendStatusChange(Port::RangeChange); } + +unsigned int +PioDevice::drain(Event *de) +{ + unsigned int count; + count = pioPort->drain(de); + if (count) + changeState(Draining); + else + changeState(Drained); + return count; +} + void BasicPioDevice::addressRanges(AddrRangeList &range_list) { @@ -113,8 +162,9 @@ BasicPioDevice::addressRanges(AddrRangeList &range_list) } -DmaPort::DmaPort(DmaDevice *dev, Platform *p) - : Port(dev->name() + "-dmaport"), device(dev), platform(p), pendingCount(0) +DmaPort::DmaPort(DmaDevice *dev, System *s) + : Port(dev->name() + "-dmaport"), device(dev), sys(s), pendingCount(0), + actionInProgress(0), drainEvent(NULL) { } bool @@ -144,6 +194,11 @@ DmaPort::recvTiming(Packet *pkt) } delete pkt->req; delete pkt; + + if (pendingCount == 0 && drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } else { panic("Got packet without sender state... huh?\n"); } @@ -155,6 +210,29 @@ DmaDevice::DmaDevice(Params *p) : PioDevice(p), dmaPort(NULL) { } + +unsigned int +DmaDevice::drain(Event *de) +{ + unsigned int count; + count = pioPort->drain(de) + dmaPort->drain(de); + if (count) + changeState(Draining); + else + changeState(Drained); + return count; +} + +unsigned int +DmaPort::drain(Event *de) +{ + if (pendingCount == 0) + return 0; + drainEvent = de; + return 1; +} + + void DmaPort::recvRetry() { @@ -180,6 +258,8 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, { assert(event); + assert(device->getState() == SimObject::Running); + DmaReqState *reqState = new DmaReqState(event, this, size); for (ChunkGenerator gen(addr, size, peerBlockSize()); @@ -197,51 +277,54 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, pendingCount++; sendDma(pkt); } + } void DmaPort::sendDma(Packet *pkt, bool front) { - // some kind of selction between access methods - // more work is going to have to be done to make - // switching actually work - /* MemState state = device->platform->system->memState; - - if (state == Timing) { */ - DPRINTF(DMA, "Attempting to send Packet %#x with addr: %#x\n", - pkt, pkt->getAddr()); - if (transmitList.size() || !sendTiming(pkt)) { - if (front) - transmitList.push_front(pkt); - else - transmitList.push_back(pkt); - DPRINTF(DMA, "-- Failed: queued\n"); - } else { - DPRINTF(DMA, "-- Done\n"); - } - /* } else if (state == Atomic) { - sendAtomic(pkt); - if (pkt->senderState) { - DmaReqState *state = dynamic_cast<DmaReqState*>(pkt->senderState); - assert(state); - state->completionEvent->schedule(curTick + (pkt->time - - pkt->req->getTime()) +1); - delete state; - } - pendingCount--; - assert(pendingCount >= 0); - delete pkt->req; - delete pkt; - - } else if (state == Functional) { - sendFunctional(pkt); - // Is this correct??? - completionEvent->schedule(pkt->req->responseTime - pkt->req->requestTime); - completionEvent == NULL; + // some kind of selction between access methods + // more work is going to have to be done to make + // switching actually work + + System::MemoryMode state = sys->getMemoryMode(); + if (state == System::Timing) { + DPRINTF(DMA, "Attempting to send Packet %#x with addr: %#x\n", + pkt, pkt->getAddr()); + if (transmitList.size() || !sendTiming(pkt)) { + if (front) + transmitList.push_front(pkt); + else + transmitList.push_back(pkt); + DPRINTF(DMA, "-- Failed: queued\n"); + } else { + DPRINTF(DMA, "-- Done\n"); + } + } else if (state == System::Atomic) { + Tick lat; + lat = sendAtomic(pkt); + assert(pkt->senderState); + DmaReqState *state = dynamic_cast<DmaReqState*>(pkt->senderState); + assert(state); + + state->numBytes += pkt->req->getSize(); + if (state->totBytes == state->numBytes) { + state->completionEvent->schedule(curTick + lat); + delete state; + delete pkt->req; + } + pendingCount--; + assert(pendingCount >= 0); + delete pkt; + + if (pendingCount == 0 && drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } + } else panic("Unknown memory command state."); - */ } DmaDevice::~DmaDevice() diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh index 195ca0fb7..fa3f98247 100644 --- a/src/dev/io_device.hh +++ b/src/dev/io_device.hh @@ -60,9 +60,9 @@ class PioPort : public Port /** The device that this port serves. */ PioDevice *device; - /** The platform that device/port are in. This is used to select which mode + /** The system that device/port are in. This is used to select which mode * we are currently operating in. */ - Platform *platform; + System *sys; /** A list of outgoing timing response packets that haven't been serviced * yet. */ @@ -82,6 +82,8 @@ class PioPort : public Port virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop); + void resendNacked(Packet *pkt); + /** * This class is used to implemented sendTiming() with a delay. When a delay * is requested a new event is created. When the event time expires it @@ -104,55 +106,72 @@ class PioPort : public Port friend class PioPort; }; + /** Number of timing requests that are emulating the device timing before + * attempting to end up on the bus. + */ + int outTiming; + + /** If we need to drain, keep the drain event around until we're done + * here.*/ + Event *drainEvent; + /** Schedule a sendTiming() event to be called in the future. */ void sendTiming(Packet *pkt, Tick time) - { new PioPort::SendEvent(this, pkt, time); } + { outTiming++; new PioPort::SendEvent(this, pkt, time); } /** This function is notification that the device should attempt to send a * packet again. */ virtual void recvRetry(); public: - PioPort(PioDevice *dev, Platform *p); + PioPort(PioDevice *dev, System *s, std::string pname = "-pioport"); + + unsigned int drain(Event *de); friend class PioPort::SendEvent; }; -struct DmaReqState : public Packet::SenderState +class DmaPort : public Port { - /** Event to call on the device when this transaction (all packets) - * complete. */ - Event *completionEvent; + protected: + struct DmaReqState : public Packet::SenderState + { + /** Event to call on the device when this transaction (all packets) + * complete. */ + Event *completionEvent; - /** Where we came from for some sanity checking. */ - Port *outPort; + /** Where we came from for some sanity checking. */ + Port *outPort; - /** Total number of bytes that this transaction involves. */ - Addr totBytes; + /** Total number of bytes that this transaction involves. */ + Addr totBytes; - /** Number of bytes that have been acked for this transaction. */ - Addr numBytes; + /** Number of bytes that have been acked for this transaction. */ + Addr numBytes; - bool final; - DmaReqState(Event *ce, Port *p, Addr tb) - : completionEvent(ce), outPort(p), totBytes(tb), numBytes(0) - {} -}; + DmaReqState(Event *ce, Port *p, Addr tb) + : completionEvent(ce), outPort(p), totBytes(tb), numBytes(0) + {} + }; -class DmaPort : public Port -{ - protected: DmaDevice *device; std::list<Packet*> transmitList; - /** The platform that device/port are in. This is used to select which mode + /** The system that device/port are in. This is used to select which mode * we are currently operating in. */ - Platform *platform; + System *sys; /** Number of outstanding packets the dma port has. */ int pendingCount; + /** If a dmaAction is in progress. */ + int actionInProgress; + + /** If we need to drain, keep the drain event around until we're done + * here.*/ + Event *drainEvent; + virtual bool recvTiming(Packet *pkt); virtual Tick recvAtomic(Packet *pkt) { panic("dma port shouldn't be used for pio access."); } @@ -170,13 +189,14 @@ class DmaPort : public Port void sendDma(Packet *pkt, bool front = false); public: - DmaPort(DmaDevice *dev, Platform *p); + DmaPort(DmaDevice *dev, System *s); void dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, uint8_t *data = NULL); bool dmaPending() { return pendingCount > 0; } + unsigned int drain(Event *de); }; /** @@ -195,6 +215,8 @@ class PioDevice : public MemObject * transaction we should perform. */ Platform *platform; + System *sys; + /** The pioPort that handles the requests for us and provides us requests * that it sees. */ PioPort *pioPort; @@ -239,20 +261,22 @@ class PioDevice : public MemObject const Params *params() const { return _params; } PioDevice(Params *p) - : MemObject(p->name), platform(p->platform), pioPort(NULL), - _params(p) + : MemObject(p->name), platform(p->platform), sys(p->system), + pioPort(NULL), _params(p) {} virtual ~PioDevice(); virtual void init(); - virtual Port *getPort(const std::string &if_name) + virtual unsigned int drain(Event *de); + + virtual Port *getPort(const std::string &if_name, int idx = -1) { if (if_name == "pio") { if (pioPort != NULL) panic("pio port already connected to."); - pioPort = new PioPort(this, params()->platform); + pioPort = new PioPort(this, sys); return pioPort; } else return NULL; @@ -309,17 +333,19 @@ class DmaDevice : public PioDevice bool dmaPending() { return dmaPort->dmaPending(); } - virtual Port *getPort(const std::string &if_name) + virtual unsigned int drain(Event *de); + + virtual Port *getPort(const std::string &if_name, int idx = -1) { if (if_name == "pio") { if (pioPort != NULL) panic("pio port already connected to."); - pioPort = new PioPort(this, params()->platform); + pioPort = new PioPort(this, sys); return pioPort; } else if (if_name == "dma") { if (dmaPort != NULL) panic("dma port already connected to."); - dmaPort = new DmaPort(this, params()->platform); + dmaPort = new DmaPort(this, sys); return dmaPort; } else return NULL; diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc index decffaf73..bf2279d93 100644 --- a/src/dev/ns_gige.cc +++ b/src/dev/ns_gige.cc @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Lisa Hsu + * Authors: Nathan Binkert + * Lisa Hsu */ /** @file @@ -464,11 +465,12 @@ NSGigE::regStats() /** * This is to write to the PCI general configuration registers */ -void -NSGigE::writeConfig(int offset, const uint16_t data) +Tick +NSGigE::writeConfig(Packet *pkt) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset < PCI_DEVICE_SPECIFIC) - PciDev::writeConfig(offset, data); + PciDev::writeConfig(pkt); else panic("Device specific PCI config space not implemented!\n"); @@ -483,6 +485,8 @@ NSGigE::writeConfig(int offset, const uint16_t data) ioEnable = false; break; } + pkt->result = Packet::Success; + return configDelay; } /** @@ -507,14 +511,7 @@ NSGigE::read(Packet *pkt) if (daddr > LAST && daddr <= RESERVED) { panic("Accessing reserved register"); } else if (daddr > RESERVED && daddr <= 0x3FC) { - if (pkt->getSize() == sizeof(uint8_t)) - readConfig(daddr & 0xff, pkt->getPtr<uint8_t>()); - if (pkt->getSize() == sizeof(uint16_t)) - readConfig(daddr & 0xff, pkt->getPtr<uint16_t>()); - if (pkt->getSize() == sizeof(uint32_t)) - readConfig(daddr & 0xff, pkt->getPtr<uint32_t>()); - pkt->result = Packet::Success; - return pioDelay; + return readConfig(pkt); } else if (daddr >= MIB_START && daddr <= MIB_END) { // don't implement all the MIB's. hopefully the kernel // doesn't actually DEPEND upon their values @@ -732,14 +729,7 @@ NSGigE::write(Packet *pkt) if (daddr > LAST && daddr <= RESERVED) { panic("Accessing reserved register"); } else if (daddr > RESERVED && daddr <= 0x3FC) { - if (pkt->getSize() == sizeof(uint8_t)) - writeConfig(daddr & 0xff, pkt->get<uint8_t>()); - if (pkt->getSize() == sizeof(uint16_t)) - writeConfig(daddr & 0xff, pkt->get<uint16_t>()); - if (pkt->getSize() == sizeof(uint32_t)) - writeConfig(daddr & 0xff, pkt->get<uint32_t>()); - pkt->result = Packet::Success; - return pioDelay; + return writeConfig(pkt); } else if (daddr > 0x3FC) panic("Something is messed up!\n"); @@ -1387,7 +1377,7 @@ NSGigE::doRxDmaRead() assert(rxDmaState == dmaIdle || rxDmaState == dmaReadWaiting); rxDmaState = dmaReading; - if (dmaPending()) + if (dmaPending() || getState() != Running) rxDmaState = dmaReadWaiting; else dmaRead(rxDmaAddr, rxDmaLen, &rxDmaReadEvent, (uint8_t*)rxDmaData); @@ -1418,7 +1408,7 @@ NSGigE::doRxDmaWrite() assert(rxDmaState == dmaIdle || rxDmaState == dmaWriteWaiting); rxDmaState = dmaWriting; - if (dmaPending()) + if (dmaPending() || getState() != Running) rxDmaState = dmaWriteWaiting; else dmaWrite(rxDmaAddr, rxDmaLen, &rxDmaWriteEvent, (uint8_t*)rxDmaData); @@ -1836,7 +1826,7 @@ NSGigE::doTxDmaRead() assert(txDmaState == dmaIdle || txDmaState == dmaReadWaiting); txDmaState = dmaReading; - if (dmaPending()) + if (dmaPending() || getState() != Running) txDmaState = dmaReadWaiting; else dmaRead(txDmaAddr, txDmaLen, &txDmaReadEvent, (uint8_t*)txDmaData); @@ -1867,7 +1857,7 @@ NSGigE::doTxDmaWrite() assert(txDmaState == dmaIdle || txDmaState == dmaWriteWaiting); txDmaState = dmaWriting; - if (dmaPending()) + if (dmaPending() || getState() != Running) txDmaState = dmaWriteWaiting; else dmaWrite(txDmaAddr, txDmaLen, &txDmaWriteEvent, (uint8_t*)txDmaData); @@ -2416,6 +2406,20 @@ NSGigE::recvPacket(EthPacketPtr packet) return true; } + +void +NSGigE::resume() +{ + SimObject::resume(); + + // During drain we could have left the state machines in a waiting state and + // they wouldn't get out until some other event occured to kick them. + // This way they'll get out immediately + txKick(); + rxKick(); +} + + //===================================================================== // // @@ -2806,7 +2810,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(NSGigE) SimObjectParam<System *> system; SimObjectParam<Platform *> platform; - SimObjectParam<PciConfigAll *> configspace; SimObjectParam<PciConfigData *> configdata; Param<uint32_t> pci_bus; Param<uint32_t> pci_dev; @@ -2840,7 +2843,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(NSGigE) INIT_PARAM(system, "System pointer"), INIT_PARAM(platform, "Platform pointer"), - INIT_PARAM(configspace, "PCI Configspace"), INIT_PARAM(configdata, "PCI Config data"), INIT_PARAM(pci_bus, "PCI bus ID"), INIT_PARAM(pci_dev, "PCI device number"), @@ -2878,7 +2880,6 @@ CREATE_SIM_OBJECT(NSGigE) params->name = getInstanceName(); params->platform = platform; params->system = system; - params->configSpace = configspace; params->configData = configdata; params->busNum = pci_bus; params->deviceNum = pci_dev; diff --git a/src/dev/ns_gige.hh b/src/dev/ns_gige.hh index 2de11c951..080c0b1f3 100644 --- a/src/dev/ns_gige.hh +++ b/src/dev/ns_gige.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Lisa Hsu + * Authors: Nathan Binkert + * Lisa Hsu */ /** @file @@ -113,7 +114,6 @@ struct dp_rom { class NSGigEInt; class Packet; -class PciConfigAll; /** * NS DP83820 Ethernet device model @@ -375,7 +375,7 @@ class NSGigE : public PciDev ~NSGigE(); const Params *params() const { return (const Params *)_params; } - virtual void writeConfig(int offset, const uint16_t data); + virtual Tick writeConfig(Packet *pkt); virtual Tick read(Packet *pkt); virtual Tick write(Packet *pkt); @@ -391,6 +391,8 @@ class NSGigE : public PciDev virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + virtual void resume(); + public: void regStats(); diff --git a/src/dev/pciconfigall.cc b/src/dev/pciconfigall.cc index 785774ff4..68013eab8 100644 --- a/src/dev/pciconfigall.cc +++ b/src/dev/pciconfigall.cc @@ -33,14 +33,8 @@ * PCI Configspace implementation */ -#include <deque> -#include <string> -#include <vector> -#include <bitset> - #include "base/trace.hh" #include "dev/pciconfigall.hh" -#include "dev/pcidev.hh" #include "dev/pcireg.h" #include "dev/platform.hh" #include "mem/packet.hh" @@ -50,151 +44,61 @@ using namespace std; PciConfigAll::PciConfigAll(Params *p) - : BasicPioDevice(p) + : PioDevice(p) { - pioSize = 0xffffff; - - // Set backpointer for pci config. Really the config stuff should be able to - // automagically do this - p->platform->pciconfig = this; - - // Make all the pointers to devices null - for(int x=0; x < MAX_PCI_DEV; x++) - for(int y=0; y < MAX_PCI_FUNC; y++) - devices[x][y] = NULL; + pioAddr = p->platform->calcConfigAddr(params()->bus,0,0); } -// If two interrupts share the same line largely bad things will happen. -// Since we don't track how many times an interrupt was set and correspondingly -// cleared two devices on the same interrupt line and assert and deassert each -// others interrupt "line". Interrupts will not work correctly. -void -PciConfigAll::startup() -{ - bitset<256> intLines; - PciDev *tempDev; - uint8_t intline; - - for (int x = 0; x < MAX_PCI_DEV; x++) { - for (int y = 0; y < MAX_PCI_FUNC; y++) { - if (devices[x][y] != NULL) { - tempDev = devices[x][y]; - intline = tempDev->interruptLine(); - if (intLines.test(intline)) - warn("Interrupt line %#X is used multiple times" - "(You probably want to fix this).\n", (uint32_t)intline); - else - intLines.set(intline); - } // devices != NULL - } // PCI_FUNC - } // PCI_DEV - -} Tick PciConfigAll::read(Packet *pkt) { assert(pkt->result == Packet::Unknown); - assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); - - Addr daddr = pkt->getAddr() - pioAddr; - int device = (daddr >> 11) & 0x1F; - int func = (daddr >> 8) & 0x7; - int reg = daddr & 0xFF; pkt->allocate(); - DPRINTF(PciConfigAll, "read va=%#x da=%#x size=%d\n", pkt->getAddr(), daddr, + DPRINTF(PciConfigAll, "read va=%#x size=%d\n", pkt->getAddr(), pkt->getSize()); switch (pkt->getSize()) { case sizeof(uint32_t): - if (devices[device][func] == NULL) - pkt->set<uint32_t>(0xFFFFFFFF); - else - devices[device][func]->readConfig(reg, pkt->getPtr<uint32_t>()); + pkt->set<uint32_t>(0xFFFFFFFF); break; case sizeof(uint16_t): - if (devices[device][func] == NULL) - pkt->set<uint16_t>(0xFFFF); - else - devices[device][func]->readConfig(reg, pkt->getPtr<uint16_t>()); + pkt->set<uint16_t>(0xFFFF); break; case sizeof(uint8_t): - if (devices[device][func] == NULL) - pkt->set<uint8_t>(0xFF); - else - devices[device][func]->readConfig(reg, pkt->getPtr<uint8_t>()); + pkt->set<uint8_t>(0xFF); break; default: panic("invalid access size(?) for PCI configspace!\n"); } pkt->result = Packet::Success; - return pioDelay; + return params()->pio_delay; } Tick PciConfigAll::write(Packet *pkt) { assert(pkt->result == Packet::Unknown); - assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); - assert(pkt->getSize() == sizeof(uint8_t) || pkt->getSize() == sizeof(uint16_t) || - pkt->getSize() == sizeof(uint32_t)); - Addr daddr = pkt->getAddr() - pioAddr; - - int device = (daddr >> 11) & 0x1F; - int func = (daddr >> 8) & 0x7; - int reg = daddr & 0xFF; - - if (devices[device][func] == NULL) - panic("Attempting to write to config space on non-existant device\n"); - - DPRINTF(PciConfigAll, "write - va=%#x size=%d data=%#x\n", - pkt->getAddr(), pkt->getSize(), pkt->get<uint32_t>()); - - switch (pkt->getSize()) { - case sizeof(uint8_t): - devices[device][func]->writeConfig(reg, pkt->get<uint8_t>()); - break; - case sizeof(uint16_t): - devices[device][func]->writeConfig(reg, pkt->get<uint16_t>()); - break; - case sizeof(uint32_t): - devices[device][func]->writeConfig(reg, pkt->get<uint32_t>()); - break; - default: - panic("invalid pci config write size\n"); - } - pkt->result = Packet::Success; - return pioDelay; + panic("Attempting to write to config space on non-existant device\n"); } void -PciConfigAll::serialize(std::ostream &os) +PciConfigAll::addressRanges(AddrRangeList &range_list) { - /* - * There is no state associated with this object that requires - * serialization. The only real state are the device pointers - * which are all setup by the constructor of the PciDev class - */ + range_list.clear(); + range_list.push_back(RangeSize(pioAddr, params()->size)); } -void -PciConfigAll::unserialize(Checkpoint *cp, const std::string §ion) -{ - /* - * There is no state associated with this object that requires - * serialization. The only real state are the device pointers - * which are all setup by the constructor of the PciDev class - */ -} #ifndef DOXYGEN_SHOULD_SKIP_THIS BEGIN_DECLARE_SIM_OBJECT_PARAMS(PciConfigAll) - Param<Addr> pio_addr; Param<Tick> pio_latency; + Param<int> bus; + Param<Addr> size; SimObjectParam<Platform *> platform; SimObjectParam<System *> system; @@ -202,8 +106,9 @@ END_DECLARE_SIM_OBJECT_PARAMS(PciConfigAll) BEGIN_INIT_SIM_OBJECT_PARAMS(PciConfigAll) - INIT_PARAM(pio_addr, "Device Address"), INIT_PARAM(pio_latency, "Programmed IO latency"), + INIT_PARAM(bus, "Bus that this object handles config space for"), + INIT_PARAM(size, "The size of config space"), INIT_PARAM(platform, "platform"), INIT_PARAM(system, "system object") @@ -211,11 +116,13 @@ END_INIT_SIM_OBJECT_PARAMS(PciConfigAll) CREATE_SIM_OBJECT(PciConfigAll) { - BasicPioDevice::Params *p = new BasicPioDevice::Params; - p->pio_addr = pio_addr; + PciConfigAll::Params *p = new PciConfigAll::Params; p->pio_delay = pio_latency; p->platform = platform; p->system = system; + p->bus = bus; + p->size = size; + return new PciConfigAll(p); } diff --git a/src/dev/pciconfigall.hh b/src/dev/pciconfigall.hh index e60fd949b..07eaf8112 100644 --- a/src/dev/pciconfigall.hh +++ b/src/dev/pciconfigall.hh @@ -42,11 +42,6 @@ #include "dev/io_device.hh" -static const uint32_t MAX_PCI_DEV = 32; -static const uint32_t MAX_PCI_FUNC = 8; - -class PciDev; - /** * PCI Config Space * All of PCI config space needs to return -1 on Tsunami, except @@ -54,16 +49,17 @@ class PciDev; * space and passes the requests on to TsunamiPCIDev devices as * appropriate. */ -class PciConfigAll : public BasicPioDevice +class PciConfigAll : public PioDevice { - private: - /** - * Pointers to all the devices that are registered with this - * particular config space. - */ - PciDev* devices[MAX_PCI_DEV][MAX_PCI_FUNC]; - public: + struct Params : public PioDevice::Params + { + Tick pio_delay; + Addr size; + int bus; + }; + const Params *params() const { return (const Params *)_params; } + /** * Constructor for PCIConfigAll * @param p parameters structure @@ -71,28 +67,10 @@ class PciConfigAll : public BasicPioDevice PciConfigAll(Params *p); /** - * Check if a device exists. - * @param pcidev PCI device to check - * @param pcifunc PCI function to check - * @return true if device exists, false otherwise - */ - bool deviceExists(uint32_t pcidev, uint32_t pcifunc) - { return devices[pcidev][pcifunc] != NULL ? true : false; } - - /** - * Registers a device with the config space object. - * @param pcidev PCI device to register - * @param pcifunc PCI function to register - * @param device device to register - */ - void registerDevice(uint8_t pcidev, uint8_t pcifunc, PciDev *device) - { devices[pcidev][pcifunc] = device; } - - /** * Read something in PCI config space. If the device does not exist * -1 is returned, if the device does exist its PciDev::ReadConfig (or the * virtual function that overrides) it is called. - * @param pkt Contains the address of the field to read. + * @param pkt Contains information about the read operation * @return Amount of time to do the read */ virtual Tick read(Packet *pkt); @@ -101,31 +79,17 @@ class PciConfigAll : public BasicPioDevice * Write to PCI config spcae. If the device does not exit the simulator * panics. If it does it is passed on the PciDev::WriteConfig (or the virtual * function that overrides it). - * @param req Contains the address to write to. - * @param data The data to write. - * @return The fault condition of the access. + * @param pkt Contains information about the write operation + * @return Amount of time to do the read */ virtual Tick write(Packet *pkt); - /** - * Start up function to check if more than one person is using an interrupt line - * and print a warning if such a case exists - */ - virtual void startup(); + void addressRanges(AddrRangeList &range_list); - /** - * Serialize this object to the given output stream. - * @param os The stream to serialize to. - */ - virtual void serialize(std::ostream &os); + private: + Addr pioAddr; - /** - * Reconstruct the state of this object from a checkpoint. - * @param cp The checkpoint use. - * @param section The section name of this object - */ - virtual void unserialize(Checkpoint *cp, const std::string §ion); }; #endif // __PCICONFIGALL_HH__ diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc index f8db2efbc..e81e0d1ee 100644 --- a/src/dev/pcidev.cc +++ b/src/dev/pcidev.cc @@ -53,201 +53,268 @@ using namespace std; -PciDev::PciDev(Params *p) - : DmaDevice(p), plat(p->platform), configData(p->configData), - pioDelay(p->pio_delay) -{ - // copy the config data from the PciConfigData object - if (configData) { - memcpy(config.data, configData->config.data, sizeof(config.data)); - memcpy(BARSize, configData->BARSize, sizeof(BARSize)); - memcpy(BARAddrs, configData->BARAddrs, sizeof(BARAddrs)); - } else - panic("NULL pointer to configuration data"); - // Setup pointer in config space to point to this entry - if (p->configSpace->deviceExists(p->deviceNum, p->functionNum)) - panic("Two PCI devices occuping same dev: %#x func: %#x", - p->deviceNum, p->functionNum); - else - p->configSpace->registerDevice(p->deviceNum, p->functionNum, this); -} - -void -PciDev::readConfig(int offset, uint8_t *data) +PciDev::PciConfigPort::PciConfigPort(PciDev *dev, int busid, int devid, + int funcid, Platform *p) + : PioPort(dev,p->system,"-pciconf"), device(dev), platform(p), + busId(busid), deviceId(devid), functionId(funcid) { - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); + configAddr = platform->calcConfigAddr(busId, deviceId, functionId); +} - *data = config.data[offset]; - DPRINTF(PCIDEV, - "read device: %#x function: %#x register: %#x 1 bytes: data: %#x\n", - params()->deviceNum, params()->functionNum, offset, *data); +Tick +PciDev::PciConfigPort::recvAtomic(Packet *pkt) +{ + assert(pkt->result == Packet::Unknown); + assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + + PCI_CONFIG_SIZE); + return device->recvConfig(pkt); } void -PciDev::addressRanges(AddrRangeList &range_list) +PciDev::PciConfigPort::recvFunctional(Packet *pkt) { - int x = 0; - range_list.clear(); - for (x = 0; x < 6; x++) - if (BARAddrs[x] != 0) - range_list.push_back(RangeSize(BARAddrs[x],BARSize[x])); + assert(pkt->result == Packet::Unknown); + assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + + PCI_CONFIG_SIZE); + device->recvConfig(pkt); } void -PciDev::readConfig(int offset, uint16_t *data) +PciDev::PciConfigPort::getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) { - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); + snoop.clear(); + resp.push_back(RangeSize(configAddr, PCI_CONFIG_SIZE+1)); +} - *data = *(uint16_t*)&config.data[offset]; - DPRINTF(PCIDEV, - "read device: %#x function: %#x register: %#x 2 bytes: data: %#x\n", - params()->deviceNum, params()->functionNum, offset, *data); +bool +PciDev::PciConfigPort::recvTiming(Packet *pkt) +{ + if (pkt->result == Packet::Nacked) { + resendNacked(pkt); + } else { + assert(pkt->result == Packet::Unknown); + assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + + PCI_CONFIG_SIZE); + Tick latency = device->recvConfig(pkt); + // turn packet around to go back to requester + pkt->makeTimingResponse(); + sendTiming(pkt, latency); + } + return true; } -void -PciDev::readConfig(int offset, uint32_t *data) +PciDev::PciDev(Params *p) + : DmaDevice(p), plat(p->platform), configData(p->configData), + pioDelay(p->pio_delay), configDelay(p->config_delay), + configPort(NULL) { - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); + // copy the config data from the PciConfigData object + if (configData) { + memcpy(config.data, configData->config.data, sizeof(config.data)); + memcpy(BARSize, configData->BARSize, sizeof(BARSize)); + memcpy(BARAddrs, configData->BARAddrs, sizeof(BARAddrs)); + } else + panic("NULL pointer to configuration data"); - *data = *(uint32_t*)&config.data[offset]; + plat->registerPciDevice(0, p->deviceNum, p->functionNum, + letoh(configData->config.interruptLine)); +} - DPRINTF(PCIDEV, - "read device: %#x function: %#x register: %#x 4 bytes: data: %#x\n", - params()->deviceNum, params()->functionNum, offset, *data); +void +PciDev::init() +{ + if (!configPort) + panic("pci config port not connected to anything!"); + configPort->sendStatusChange(Port::RangeChange); + PioDevice::init(); } +unsigned int +PciDev::drain(Event *de) +{ + unsigned int count; + count = pioPort->drain(de) + dmaPort->drain(de) + configPort->drain(de); + if (count) + changeState(Draining); + else + changeState(Drained); + return count; +} -void -PciDev::writeConfig(int offset, const uint8_t data) +Tick +PciDev::readConfig(Packet *pkt) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset >= PCI_DEVICE_SPECIFIC) panic("Device specific PCI config space not implemented!\n"); - DPRINTF(PCIDEV, - "write device: %#x function: %#x reg: %#x size: 1 data: %#x\n", - params()->deviceNum, params()->functionNum, offset, data); - - switch (offset) { - case PCI0_INTERRUPT_LINE: - config.interruptLine = data; - case PCI_CACHE_LINE_SIZE: - config.cacheLineSize = data; - case PCI_LATENCY_TIMER: - config.latencyTimer = data; + pkt->allocate(); + + switch (pkt->getSize()) { + case sizeof(uint8_t): + pkt->set<uint8_t>(config.data[offset]); + DPRINTF(PCIDEV, + "read device: %#x function: %#x register: %#x 1 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint8_t>()); break; - /* Do nothing for these read-only registers */ - case PCI0_INTERRUPT_PIN: - case PCI0_MINIMUM_GRANT: - case PCI0_MAXIMUM_LATENCY: - case PCI_CLASS_CODE: - case PCI_REVISION_ID: + case sizeof(uint16_t): + pkt->set<uint16_t>(*(uint16_t*)&config.data[offset]); + DPRINTF(PCIDEV, + "read device: %#x function: %#x register: %#x 2 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint16_t>()); + break; + case sizeof(uint32_t): + pkt->set<uint32_t>(*(uint32_t*)&config.data[offset]); + DPRINTF(PCIDEV, + "read device: %#x function: %#x register: %#x 4 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint32_t>()); break; default: - panic("writing to a read only register"); + panic("invalid access size(?) for PCI configspace!\n"); } + pkt->result = Packet::Success; + return configDelay; + } void -PciDev::writeConfig(int offset, const uint16_t data) +PciDev::addressRanges(AddrRangeList &range_list) { - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); - - DPRINTF(PCIDEV, - "write device: %#x function: %#x reg: %#x size: 2 data: %#x\n", - params()->deviceNum, params()->functionNum, offset, data); - - switch (offset) { - case PCI_COMMAND: - config.command = data; - case PCI_STATUS: - config.status = data; - case PCI_CACHE_LINE_SIZE: - config.cacheLineSize = data; - break; - default: - panic("writing to a read only register"); - } + int x = 0; + range_list.clear(); + for (x = 0; x < 6; x++) + if (BARAddrs[x] != 0) + range_list.push_back(RangeSize(BARAddrs[x],BARSize[x])); } - -void -PciDev::writeConfig(int offset, const uint32_t data) +Tick +PciDev::writeConfig(Packet *pkt) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset >= PCI_DEVICE_SPECIFIC) panic("Device specific PCI config space not implemented!\n"); - DPRINTF(PCIDEV, - "write device: %#x function: %#x reg: %#x size: 4 data: %#x\n", - params()->deviceNum, params()->functionNum, offset, data); - - switch (offset) { - case PCI0_BASE_ADDR0: - case PCI0_BASE_ADDR1: - case PCI0_BASE_ADDR2: - case PCI0_BASE_ADDR3: - case PCI0_BASE_ADDR4: - case PCI0_BASE_ADDR5: - - uint32_t barnum, bar_mask; - Addr base_addr, base_size, space_base; - - barnum = BAR_NUMBER(offset); - - if (BAR_IO_SPACE(letoh(config.baseAddr[barnum]))) { - bar_mask = BAR_IO_MASK; - space_base = TSUNAMI_PCI0_IO; - } else { - bar_mask = BAR_MEM_MASK; - space_base = TSUNAMI_PCI0_MEMORY; + switch (pkt->getSize()) { + case sizeof(uint8_t): + switch (offset) { + case PCI0_INTERRUPT_LINE: + config.interruptLine = pkt->get<uint8_t>(); + case PCI_CACHE_LINE_SIZE: + config.cacheLineSize = pkt->get<uint8_t>(); + case PCI_LATENCY_TIMER: + config.latencyTimer = pkt->get<uint8_t>(); + break; + /* Do nothing for these read-only registers */ + case PCI0_INTERRUPT_PIN: + case PCI0_MINIMUM_GRANT: + case PCI0_MAXIMUM_LATENCY: + case PCI_CLASS_CODE: + case PCI_REVISION_ID: + break; + default: + panic("writing to a read only register"); } + DPRINTF(PCIDEV, + "write device: %#x function: %#x register: %#x 1 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint8_t>()); + break; + case sizeof(uint16_t): + switch (offset) { + case PCI_COMMAND: + config.command = pkt->get<uint8_t>(); + case PCI_STATUS: + config.status = pkt->get<uint8_t>(); + case PCI_CACHE_LINE_SIZE: + config.cacheLineSize = pkt->get<uint8_t>(); + break; + default: + panic("writing to a read only register"); + } + DPRINTF(PCIDEV, + "write device: %#x function: %#x register: %#x 2 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint16_t>()); + break; + case sizeof(uint32_t): + switch (offset) { + case PCI0_BASE_ADDR0: + case PCI0_BASE_ADDR1: + case PCI0_BASE_ADDR2: + case PCI0_BASE_ADDR3: + case PCI0_BASE_ADDR4: + case PCI0_BASE_ADDR5: + + uint32_t barnum, bar_mask; + Addr base_addr, base_size, space_base; + + barnum = BAR_NUMBER(offset); + + if (BAR_IO_SPACE(letoh(config.baseAddr[barnum]))) { + bar_mask = BAR_IO_MASK; + space_base = TSUNAMI_PCI0_IO; + } else { + bar_mask = BAR_MEM_MASK; + space_base = TSUNAMI_PCI0_MEMORY; + } - // Writing 0xffffffff to a BAR tells the card to set the - // value of the bar to size of memory it needs - if (letoh(data) == 0xffffffff) { - // This is I/O Space, bottom two bits are read only - - config.baseAddr[barnum] = letoh( - (~(BARSize[barnum] - 1) & ~bar_mask) | + // Writing 0xffffffff to a BAR tells the card to set the + // value of the bar to size of memory it needs + if (letoh(pkt->get<uint32_t>()) == 0xffffffff) { + // This is I/O Space, bottom two bits are read only + + config.baseAddr[barnum] = letoh( + (~(BARSize[barnum] - 1) & ~bar_mask) | + (letoh(config.baseAddr[barnum]) & bar_mask)); + } else { + config.baseAddr[barnum] = letoh( + (letoh(pkt->get<uint32_t>()) & ~bar_mask) | (letoh(config.baseAddr[barnum]) & bar_mask)); - } else { - config.baseAddr[barnum] = letoh( - (letoh(data) & ~bar_mask) | - (letoh(config.baseAddr[barnum]) & bar_mask)); - if (letoh(config.baseAddr[barnum]) & ~bar_mask) { - base_addr = (letoh(data) & ~bar_mask) + space_base; - base_size = BARSize[barnum]; - BARAddrs[barnum] = base_addr; + if (letoh(config.baseAddr[barnum]) & ~bar_mask) { + base_addr = (letoh(pkt->get<uint32_t>()) & ~bar_mask) + space_base; + base_size = BARSize[barnum]; + BARAddrs[barnum] = base_addr; - pioPort->sendStatusChange(Port::RangeChange); + pioPort->sendStatusChange(Port::RangeChange); + } } + break; + + case PCI0_ROM_BASE_ADDR: + if (letoh(pkt->get<uint32_t>()) == 0xfffffffe) + config.expansionROM = htole((uint32_t)0xffffffff); + else + config.expansionROM = pkt->get<uint32_t>(); + break; + + case PCI_COMMAND: + // This could also clear some of the error bits in the Status + // register. However they should never get set, so lets ignore + // it for now + config.command = pkt->get<uint32_t>(); + break; + + default: + DPRINTF(PCIDEV, "Writing to a read only register"); } + DPRINTF(PCIDEV, + "write device: %#x function: %#x register: %#x 4 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint32_t>()); break; - - case PCI0_ROM_BASE_ADDR: - if (letoh(data) == 0xfffffffe) - config.expansionROM = htole((uint32_t)0xffffffff); - else - config.expansionROM = data; - break; - - case PCI_COMMAND: - // This could also clear some of the error bits in the Status - // register. However they should never get set, so lets ignore - // it for now - config.command = data; - break; - default: - DPRINTF(PCIDEV, "Writing to a read only register"); + panic("invalid access size(?) for PCI configspace!\n"); } + pkt->result = Packet::Success; + return configDelay; + } void diff --git a/src/dev/pcidev.hh b/src/dev/pcidev.hh index 92786427b..847fb07d0 100644 --- a/src/dev/pcidev.hh +++ b/src/dev/pcidev.hh @@ -47,8 +47,6 @@ #define BAR_IO_SPACE(x) ((x) & BAR_IO_SPACE_BIT) #define BAR_NUMBER(x) (((x) - PCI0_BASE_ADDR0) >> 0x2); -class PciConfigAll; - /** * This class encapulates the first 64 bytes of a singles PCI @@ -78,24 +76,43 @@ class PciConfigData : public SimObject Addr BARAddrs[6]; }; + /** * PCI device, base implemnation is only config space. - * Each device is connected to a PCIConfigSpace device - * which returns -1 for everything but the pcidevs that - * register with it. This object registers with the PCIConfig space - * object. */ class PciDev : public DmaDevice { - public: - struct Params : public ::PioDevice::Params + class PciConfigPort : public PioPort { - /** - * A pointer to the configspace all object that calls us when - * a read comes to this particular device/function. - */ - PciConfigAll *configSpace; + protected: + PciDev *device; + + virtual bool recvTiming(Packet *pkt); + + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt) ; + + virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop); + + Platform *platform; + + int busId; + int deviceId; + int functionId; + + Addr configAddr; + + public: + PciConfigPort(PciDev *dev, int busid, int devid, int funcid, + Platform *p); + friend class PioPort::SendEvent; + }; + + public: + struct Params : public PioDevice::Params + { /** * A pointer to the object that contains the first 64 bytes of * config space @@ -113,6 +130,9 @@ class PciDev : public DmaDevice /** The latency for pio accesses. */ Tick pio_delay; + + /** The latency for a config access. */ + Tick config_delay; }; public: @@ -164,6 +184,25 @@ class PciDev : public DmaDevice Platform *plat; PciConfigData *configData; Tick pioDelay; + Tick configDelay; + PciConfigPort *configPort; + + /** + * Write to the PCI config space data that is stored locally. This may be + * overridden by the device but at some point it will eventually call this + * for normal operations that it does not need to override. + * @param pkt packet containing the write the offset into config space + */ + virtual Tick writeConfig(Packet *pkt); + + + /** + * Read from the PCI config space data that is stored locally. This may be + * overridden by the device but at some point it will eventually call this + * for normal operations that it does not need to override. + * @param pkt packet containing the write the offset into config space + */ + virtual Tick readConfig(Packet *pkt); public: Addr pciToDma(Addr pciAddr) const @@ -171,21 +210,25 @@ class PciDev : public DmaDevice void intrPost() - { plat->postPciInt(configData->config.interruptLine); } + { plat->postPciInt(letoh(configData->config.interruptLine)); } void intrClear() - { plat->clearPciInt(configData->config.interruptLine); } + { plat->clearPciInt(letoh(configData->config.interruptLine)); } uint8_t interruptLine() - { return configData->config.interruptLine; } + { return letoh(configData->config.interruptLine); } /** return the address ranges that this device responds to. * @params range_list range list to populate with ranges */ void addressRanges(AddrRangeList &range_list); + /** Do a PCI Configspace memory access. */ + Tick recvConfig(Packet *pkt) + { return pkt->isRead() ? readConfig(pkt) : writeConfig(pkt); } + /** * Constructor for PCI Dev. This function copies data from the * config file object PCIConfigData and registers the device with @@ -193,30 +236,7 @@ class PciDev : public DmaDevice */ PciDev(Params *params); - /** - * Write to the PCI config space data that is stored locally. This may be - * overridden by the device but at some point it will eventually call this - * for normal operations that it does not need to override. - * @param offset the offset into config space - * @param size the size of the write - * @param data the data to write - */ - virtual void writeConfig(int offset, const uint8_t data); - virtual void writeConfig(int offset, const uint16_t data); - virtual void writeConfig(int offset, const uint32_t data); - - - /** - * Read from the PCI config space data that is stored locally. This may be - * overridden by the device but at some point it will eventually call this - * for normal operations that it does not need to override. - * @param offset the offset into config space - * @param size the size of the read - * @param data pointer to the location where the read value should be stored - */ - virtual void readConfig(int offset, uint8_t *data); - virtual void readConfig(int offset, uint16_t *data); - virtual void readConfig(int offset, uint32_t *data); + virtual void init(); /** * Serialize this object to the given output stream. @@ -230,5 +250,22 @@ class PciDev : public DmaDevice * @param section The section name of this object */ virtual void unserialize(Checkpoint *cp, const std::string §ion); + + + virtual unsigned int drain(Event *de); + + virtual Port *getPort(const std::string &if_name, int idx = -1) + { + if (if_name == "config") { + if (configPort != NULL) + panic("pciconfig port already connected to."); + configPort = new PciConfigPort(this, params()->busNum, + params()->deviceNum, params()->functionNum, + params()->platform); + return configPort; + } + return DmaDevice::getPort(if_name, idx); + } + }; #endif // __DEV_PCIDEV_HH__ diff --git a/src/dev/pcireg.h b/src/dev/pcireg.h index 0aa4ba8ef..a48abd4fa 100644 --- a/src/dev/pcireg.h +++ b/src/dev/pcireg.h @@ -142,6 +142,7 @@ union PCIConfig { // Device specific offsets #define PCI_DEVICE_SPECIFIC 0x40 // 192 bytes +#define PCI_CONFIG_SIZE 0xFF // Some Vendor IDs #define PCI_VENDOR_DEC 0x1011 diff --git a/src/dev/platform.cc b/src/dev/platform.cc index ed021e3b6..8546b7805 100644 --- a/src/dev/platform.cc +++ b/src/dev/platform.cc @@ -63,5 +63,21 @@ Platform::pciToDma(Addr pciAddr) const panic("No PCI dma support in platform."); } +void +Platform::registerPciDevice(uint8_t bus, uint8_t dev, uint8_t func, uint8_t intr) +{ + uint32_t bdf = bus << 16 | dev << 8 | func << 0; + if (pciDevices.find(bdf) != pciDevices.end()) + fatal("Two PCI devices have same bus:device:function\n"); + + if (intLines.test(intr)) + fatal("Two PCI devices have same interrupt line: %d\n", intr); + + pciDevices.insert(bdf); + + intLines.set(intr); +} + + DEFINE_SIM_OBJECT_CLASS_NAME("Platform", Platform) diff --git a/src/dev/platform.hh b/src/dev/platform.hh index bfc229748..1940dcad6 100644 --- a/src/dev/platform.hh +++ b/src/dev/platform.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Andrew Schultz + * Nathan Binkert */ /** @@ -36,6 +37,9 @@ #ifndef __DEV_PLATFORM_HH__ #define __DEV_PLATFORM_HH__ +#include <bitset> +#include <set> + #include "sim/sim_object.hh" #include "arch/isa_traits.hh" @@ -51,9 +55,6 @@ class Platform : public SimObject /** Pointer to the interrupt controller */ IntrControl *intrctrl; - /** Pointer to the PCI configuration space */ - PciConfigAll *pciconfig; - /** Pointer to the UART, set by the uart */ Uart *uart; @@ -63,13 +64,20 @@ class Platform : public SimObject public: Platform(const std::string &name, IntrControl *intctrl); virtual ~Platform(); - virtual void init() { if (pciconfig == NULL) panic("PCI Config not set"); } virtual void postConsoleInt() = 0; virtual void clearConsoleInt() = 0; virtual Tick intrFrequency() = 0; virtual void postPciInt(int line); virtual void clearPciInt(int line); virtual Addr pciToDma(Addr pciAddr) const; + virtual Addr calcConfigAddr(int bus, int dev, int func) = 0; + virtual void registerPciDevice(uint8_t bus, uint8_t dev, uint8_t func, + uint8_t intr); + + private: + std::bitset<256> intLines; + std::set<uint32_t> pciDevices; + }; #endif // __DEV_PLATFORM_HH__ diff --git a/src/dev/sinic.cc b/src/dev/sinic.cc index a0223733b..815cecca5 100644 --- a/src/dev/sinic.cc +++ b/src/dev/sinic.cc @@ -37,7 +37,6 @@ #include "cpu/intr_control.hh" #include "dev/etherlink.hh" #include "dev/sinic.hh" -#include "dev/pciconfigall.hh" #include "mem/packet.hh" #include "sim/builder.hh" #include "sim/debug.hh" @@ -922,7 +921,7 @@ Device::rxKick() break; case rxBeginCopy: - if (dmaPending()) + if (dmaPending() || getState() != Running) goto exit; rxDmaAddr = params()->platform->pciToDma( @@ -1110,7 +1109,7 @@ Device::txKick() break; case txBeginCopy: - if (dmaPending()) + if (dmaPending() || getState() != Running) goto exit; txDmaAddr = params()->platform->pciToDma( @@ -1288,6 +1287,18 @@ Device::recvPacket(EthPacketPtr packet) return true; } +void +Device::resume() +{ + SimObject::resume(); + + // During drain we could have left the state machines in a waiting state and + // they wouldn't get out until some other event occured to kick them. + // This way they'll get out immediately + txKick(); + rxKick(); +} + //===================================================================== // // @@ -1623,7 +1634,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Device) SimObjectParam<System *> system; SimObjectParam<Platform *> platform; - SimObjectParam<PciConfigAll *> configspace; SimObjectParam<PciConfigData *> configdata; Param<uint32_t> pci_bus; Param<uint32_t> pci_dev; @@ -1666,7 +1676,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Device) INIT_PARAM(system, "System pointer"), INIT_PARAM(platform, "Platform pointer"), - INIT_PARAM(configspace, "PCI Configspace"), INIT_PARAM(configdata, "PCI Config data"), INIT_PARAM(pci_bus, "PCI bus ID"), INIT_PARAM(pci_dev, "PCI device number"), @@ -1711,7 +1720,6 @@ CREATE_SIM_OBJECT(Device) params->name = getInstanceName(); params->platform = platform; params->system = system; - params->configSpace = configspace; params->configData = configdata; params->busNum = pci_bus; params->deviceNum = pci_dev; diff --git a/src/dev/sinic.hh b/src/dev/sinic.hh index f6c229039..eece4ba6b 100644 --- a/src/dev/sinic.hh +++ b/src/dev/sinic.hh @@ -266,6 +266,7 @@ class Device : public Base public: virtual Tick read(Packet *pkt); virtual Tick write(Packet *pkt); + virtual void resume(); void prepareIO(int cpu, int index); void prepareRead(int cpu, int index); diff --git a/src/dev/tsunami.cc b/src/dev/tsunami.cc index c9e15581d..8e740a72f 100644 --- a/src/dev/tsunami.cc +++ b/src/dev/tsunami.cc @@ -95,6 +95,13 @@ Tsunami::pciToDma(Addr pciAddr) const return pchip->translatePciToDma(pciAddr); } + +Addr +Tsunami::calcConfigAddr(int bus, int dev, int func) +{ + return pchip->calcConfigAddr(bus, dev, func); +} + void Tsunami::serialize(std::ostream &os) { diff --git a/src/dev/tsunami.hh b/src/dev/tsunami.hh index 13fc4417c..8bb66e914 100644 --- a/src/dev/tsunami.hh +++ b/src/dev/tsunami.hh @@ -113,9 +113,15 @@ class Tsunami : public Platform */ virtual void clearPciInt(int line); + virtual Addr pciToDma(Addr pciAddr) const; /** + * Calculate the configuration address given a bus/dev/func. + */ + virtual Addr calcConfigAddr(int bus, int dev, int func); + + /** * Serialize this object to the given output stream. * @param os The stream to serialize to. */ diff --git a/src/dev/tsunami_pchip.cc b/src/dev/tsunami_pchip.cc index a376b908d..8a542b9b0 100644 --- a/src/dev/tsunami_pchip.cc +++ b/src/dev/tsunami_pchip.cc @@ -302,6 +302,17 @@ TsunamiPChip::translatePciToDma(Addr busAddr) // if no match was found, then return the original address return busAddr; } +Addr +TsunamiPChip::calcConfigAddr(int bus, int dev, int func) +{ + assert(func < 8); + assert(dev < 32); + assert(bus == 0); + + return TsunamiPciBus0Config | (func << 8) | (dev << 11); +} + + void TsunamiPChip::serialize(std::ostream &os) diff --git a/src/dev/tsunami_pchip.hh b/src/dev/tsunami_pchip.hh index 9f80f7d68..2c97a1fea 100644 --- a/src/dev/tsunami_pchip.hh +++ b/src/dev/tsunami_pchip.hh @@ -45,6 +45,9 @@ class TsunamiPChip : public BasicPioDevice { protected: + + static const Addr TsunamiPciBus0Config = ULL(0x801fe000000); + /** Pchip control register */ uint64_t pctl; @@ -80,6 +83,8 @@ class TsunamiPChip : public BasicPioDevice */ Addr translatePciToDma(Addr busAddr); + Addr calcConfigAddr(int bus, int dev, int func); + virtual Tick read(Packet *pkt); virtual Tick write(Packet *pkt); diff --git a/src/kern/linux/events.cc b/src/kern/linux/events.cc index 5ff7e26db..289ece5ce 100644 --- a/src/kern/linux/events.cc +++ b/src/kern/linux/events.cc @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Ali Saidi + * Authors: Nathan Binkert + * Ali Saidi */ #include "arch/arguments.hh" diff --git a/src/kern/linux/events.hh b/src/kern/linux/events.hh index 65f794a9c..b0510c18f 100644 --- a/src/kern/linux/events.hh +++ b/src/kern/linux/events.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Ali Saidi + * Authors: Nathan Binkert + * Ali Saidi */ #ifndef __KERN_LINUX_EVENTS_HH__ diff --git a/src/kern/linux/linux.hh b/src/kern/linux/linux.hh index af5e23b95..e3f554a22 100644 --- a/src/kern/linux/linux.hh +++ b/src/kern/linux/linux.hh @@ -69,7 +69,7 @@ class Linux { typedef uint32_t gid_t; //@} -#if BSD_HOST +#if NO_STAT64 typedef struct stat hst_stat; typedef struct stat hst_stat64; #else @@ -176,7 +176,7 @@ class Linux { /// Helper function to convert a host stat buffer to a target stat /// buffer. Also copies the target buffer out to the simulated /// memory space. Used by stat(), fstat(), and lstat(). -#if !BSD_HOST +#if !NO_STAT64 static void copyOutStatBuf(TranslatingPort *mem, Addr addr, hst_stat *host) { diff --git a/src/kern/linux/printk.cc b/src/kern/linux/printk.cc index e39a15982..004d1be2f 100644 --- a/src/kern/linux/printk.cc +++ b/src/kern/linux/printk.cc @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Ali Saidi + * Authors: Nathan Binkert + * Ali Saidi */ #include <sys/types.h> diff --git a/src/kern/linux/printk.hh b/src/kern/linux/printk.hh index f9203717a..5ddf0a018 100644 --- a/src/kern/linux/printk.hh +++ b/src/kern/linux/printk.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Ali Saidi + * Authors: Nathan Binkert + * Ali Saidi */ #ifndef __PRINTK_HH__ diff --git a/src/kern/solaris/solaris.hh b/src/kern/solaris/solaris.hh index 0fec0bcce..b819fb6d2 100644 --- a/src/kern/solaris/solaris.hh +++ b/src/kern/solaris/solaris.hh @@ -74,7 +74,7 @@ class Solaris { typedef uint32_t nlink_t; //@} -#if BSD_HOST +#if NO_STAT64 typedef struct stat hst_stat; typedef struct stat hst_stat64; #else @@ -177,7 +177,7 @@ class Solaris { /// Helper function to convert a host stat buffer to a target stat /// buffer. Also copies the target buffer out to the simulated /// memory space. Used by stat(), fstat(), and lstat(). -#if !BSD_HOST +#if !NO_STAT64 static void copyOutStatBuf(TranslatingPort *mem, Addr addr, hst_stat *host) { diff --git a/src/kern/system_events.hh b/src/kern/system_events.hh index ccd6bd9a4..93b5eb528 100644 --- a/src/kern/system_events.hh +++ b/src/kern/system_events.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Lisa Hsu + * Authors: Nathan Binkert + * Lisa Hsu * Ali Saidi */ diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 3718cbaaf..29ea2e12f 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -59,7 +59,7 @@ Bridge::Bridge(const std::string &n, int qsa, int qsb, } Port * -Bridge::getPort(const std::string &if_name) +Bridge::getPort(const std::string &if_name, int idx) { BridgePort *port; diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh index 37fb92662..b3525d3e0 100644 --- a/src/mem/bridge.hh +++ b/src/mem/bridge.hh @@ -177,7 +177,7 @@ class Bridge : public MemObject public: /** A function used to return the port associated with this bus object. */ - virtual Port *getPort(const std::string &if_name); + virtual Port *getPort(const std::string &if_name, int idx = -1); virtual void init(); diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 919acd23c..31271106b 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -33,13 +33,22 @@ */ +#include "base/misc.hh" #include "base/trace.hh" #include "mem/bus.hh" #include "sim/builder.hh" Port * -Bus::getPort(const std::string &if_name) +Bus::getPort(const std::string &if_name, int idx) { + if (if_name == "default") + if (defaultPort == NULL) { + defaultPort = new BusPort(csprintf("%s-default",name()), this, + defaultId); + return defaultPort; + } else + fatal("Default port already set\n"); + // if_name ignored? forced to be empty? int id = interfaces.size(); BusPort *bp = new BusPort(csprintf("%s-p%d", name(), id), this, id); @@ -47,11 +56,12 @@ Bus::getPort(const std::string &if_name) return bp; } -/** Get the ranges of anyone that we are connected to. */ +/** Get the ranges of anyone other buses that we are connected to. */ void Bus::init() { std::vector<Port*>::iterator intIter; + for (intIter = interfaces.begin(); intIter != interfaces.end(); intIter++) (*intIter)->sendStatusChange(Port::RangeChange); } @@ -110,6 +120,7 @@ Bus::findPort(Addr addr, int id) int dest_id = -1; int i = 0; bool found = false; + AddrRangeIter iter; while (i < portList.size() && !found) { @@ -120,8 +131,18 @@ Bus::findPort(Addr addr, int id) } i++; } - if (dest_id == -1) + + // Check if this matches the default range + if (dest_id == -1) { + for (iter = defaultRange.begin(); iter != defaultRange.end(); iter++) { + if (*iter == addr) { + DPRINTF(Bus, " found addr 0x%llx on default\n", addr); + return defaultPort; + } + } panic("Unable to find destination for addr: %llx", addr); + } + // we shouldn't be sending this back to where it came from assert(dest_id != id); @@ -155,39 +176,52 @@ Bus::recvFunctional(Packet *pkt) void Bus::recvStatusChange(Port::Status status, int id) { + AddrRangeList ranges; + AddrRangeList snoops; + int x; + AddrRangeIter iter; + assert(status == Port::RangeChange && "The other statuses need to be implemented."); DPRINTF(BusAddrRanges, "received RangeChange from device id %d\n", id); - assert(id < interfaces.size() && id >= 0); - int x; - Port *port = interfaces[id]; - AddrRangeList ranges; - AddrRangeList snoops; - AddrRangeIter iter; - std::vector<DevMap>::iterator portIter; + if (id == defaultId) { + defaultRange.clear(); + defaultPort->getPeerAddressRanges(ranges, snoops); + assert(snoops.size() == 0); + for(iter = ranges.begin(); iter != ranges.end(); iter++) { + defaultRange.push_back(*iter); + DPRINTF(BusAddrRanges, "Adding range %llx - %llx for default\n", + iter->start, iter->end); + } + } else { - // Clean out any previously existent ids - for (portIter = portList.begin(); portIter != portList.end(); ) { - if (portIter->portId == id) - portIter = portList.erase(portIter); - else - portIter++; - } + assert((id < interfaces.size() && id >= 0) || id == -1); + Port *port = interfaces[id]; + std::vector<DevMap>::iterator portIter; + + // Clean out any previously existent ids + for (portIter = portList.begin(); portIter != portList.end(); ) { + if (portIter->portId == id) + portIter = portList.erase(portIter); + else + portIter++; + } - port->getPeerAddressRanges(ranges, snoops); + port->getPeerAddressRanges(ranges, snoops); - // not dealing with snooping yet either - assert(snoops.size() == 0); - for(iter = ranges.begin(); iter != ranges.end(); iter++) { - DevMap dm; - dm.portId = id; - dm.range = *iter; + // not dealing with snooping yet either + assert(snoops.size() == 0); + for(iter = ranges.begin(); iter != ranges.end(); iter++) { + DevMap dm; + dm.portId = id; + dm.range = *iter; - DPRINTF(BusAddrRanges, "Adding range %llx - %llx for id %d\n", - dm.range.start, dm.range.end, id); - portList.push_back(dm); + DPRINTF(BusAddrRanges, "Adding range %llx - %llx for id %d\n", + dm.range.start, dm.range.end, id); + portList.push_back(dm); + } } DPRINTF(MMU, "port list has %d entries\n", portList.size()); @@ -196,19 +230,47 @@ Bus::recvStatusChange(Port::Status status, int id) for (x = 0; x < interfaces.size(); x++) if (x != id) interfaces[x]->sendStatusChange(Port::RangeChange); + + if (id != defaultId && defaultPort) + defaultPort->sendStatusChange(Port::RangeChange); } void Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) { std::vector<DevMap>::iterator portIter; + AddrRangeIter dflt_iter; + bool subset; resp.clear(); snoop.clear(); DPRINTF(BusAddrRanges, "received address range request, returning:\n"); + + for (dflt_iter = defaultRange.begin(); dflt_iter != defaultRange.end(); + dflt_iter++) { + resp.push_back(*dflt_iter); + DPRINTF(BusAddrRanges, " -- %#llX : %#llX\n",dflt_iter->start, + dflt_iter->end); + } for (portIter = portList.begin(); portIter != portList.end(); portIter++) { - if (portIter->portId != id) { + subset = false; + for (dflt_iter = defaultRange.begin(); dflt_iter != defaultRange.end(); + dflt_iter++) { + if ((portIter->range.start < dflt_iter->start && + portIter->range.end >= dflt_iter->start) || + (portIter->range.start < dflt_iter->end && + portIter->range.end >= dflt_iter->end)) + fatal("Devices can not set ranges that itersect the default set\ + but are not a subset of the default set.\n"); + if (portIter->range.start >= dflt_iter->start && + portIter->range.end <= dflt_iter->end) { + subset = true; + DPRINTF(BusAddrRanges, " -- %#llX : %#llX is a SUBSET\n", + portIter->range.start, portIter->range.end); + } + } + if (portIter->portId != id && !subset) { resp.push_back(portIter->range); DPRINTF(BusAddrRanges, " -- %#llX : %#llX\n", portIter->range.start, portIter->range.end); diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 50bfba6e4..3a2896886 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Ron Dreslinski + * Ali Saidi */ /** @@ -50,19 +51,22 @@ class Bus : public MemObject /** a globally unique id for this bus. */ int busId; + static const int defaultId = -1; + struct DevMap { int portId; Range<Addr> range; }; std::vector<DevMap> portList; + AddrRangeList defaultRange; /** Function called by the port when the bus is recieving a Timing - transaction.*/ + transaction.*/ bool recvTiming(Packet *pkt); /** Function called by the port when the bus is recieving a Atomic - transaction.*/ + transaction.*/ Tick recvAtomic(Packet *pkt); /** Function called by the port when the bus is recieving a Functional @@ -158,15 +162,18 @@ class Bus : public MemObject * original send failed for whatever reason.*/ std::list<Port*> retryList; + /** Port that handles requests that don't match any of the interfaces.*/ + Port *defaultPort; + public: /** A function used to return the port associated with this bus object. */ - virtual Port *getPort(const std::string &if_name); + virtual Port *getPort(const std::string &if_name, int idx = -1); virtual void init(); Bus(const std::string &n, int bus_id) - : MemObject(n), busId(bus_id) {} + : MemObject(n), busId(bus_id), defaultPort(NULL) {} }; diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc new file mode 100644 index 000000000..451da28e8 --- /dev/null +++ b/src/mem/cache/base_cache.cc @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definition of BaseCache functions. + */ + +#include "mem/cache/base_cache.hh" +#include "cpu/smt.hh" +#include "cpu/base.hh" + +using namespace std; + +BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache, + bool _isCpuSide) + : Port(_name), cache(_cache), isCpuSide(_isCpuSide) +{ + blocked = false; + //Start ports at null if more than one is created we should panic + //cpuSidePort = NULL; + //memSidePort = NULL; +} + +void +BaseCache::CachePort::recvStatusChange(Port::Status status) +{ + cache->recvStatusChange(status, isCpuSide); +} + +void +BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) +{ + cache->getAddressRanges(resp, snoop, isCpuSide); +} + +int +BaseCache::CachePort::deviceBlockSize() +{ + return cache->getBlockSize(); +} + +bool +BaseCache::CachePort::recvTiming(Packet *pkt) +{ + return cache->doTimingAccess(pkt, this, isCpuSide); +} + +Tick +BaseCache::CachePort::recvAtomic(Packet *pkt) +{ + return cache->doAtomicAccess(pkt, isCpuSide); +} + +void +BaseCache::CachePort::recvFunctional(Packet *pkt) +{ + cache->doFunctionalAccess(pkt, isCpuSide); +} + +void +BaseCache::CachePort::setBlocked() +{ + blocked = true; +} + +void +BaseCache::CachePort::clearBlocked() +{ + blocked = false; +} + +BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort) + : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort) +{ + this->setFlags(AutoDelete); + pkt = NULL; +} + +BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort, Packet *_pkt) + : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort), pkt(_pkt) +{ + this->setFlags(AutoDelete); +} + +void +BaseCache::CacheEvent::process() +{ + if (!pkt) + { + if (!cachePort->isCpuSide) + { + pkt = cachePort->cache->getPacket(); + bool success = cachePort->sendTiming(pkt); + DPRINTF(Cache, "Address %x was %s in sending the timing request\n", + pkt->getAddr(), success ? "succesful" : "unsuccesful"); + cachePort->cache->sendResult(pkt, success); + if (success && cachePort->cache->doMasterRequest()) + { + //Still more to issue, rerequest in 1 cycle + pkt = NULL; + this->schedule(curTick+1); + } + } + else + { + pkt = cachePort->cache->getCoherencePacket(); + cachePort->sendTiming(pkt); + } + return; + } + //Know the packet to send, no need to mark in service (must succed) + bool success = cachePort->sendTiming(pkt); + assert(success); +} + +const char * +BaseCache::CacheEvent::description() +{ + return "timing event\n"; +} + +Port* +BaseCache::getPort(const std::string &if_name, int idx) +{ + if (if_name == "") + { + if(cpuSidePort == NULL) + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + return cpuSidePort; + } + else if (if_name == "functional") + { + if(cpuSidePort == NULL) + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + return cpuSidePort; + } + else if (if_name == "cpu_side") + { + if(cpuSidePort == NULL) + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + return cpuSidePort; + } + else if (if_name == "mem_side") + { + if (memSidePort != NULL) + panic("Already have a mem side for this cache\n"); + memSidePort = new CachePort(name() + "-mem_side_port", this, false); + return memSidePort; + } + else panic("Port name %s unrecognized\n", if_name); +} + +void +BaseCache::init() +{ + if (!cpuSidePort || !memSidePort) + panic("Cache not hooked up on both sides\n"); + cpuSidePort->sendStatusChange(Port::RangeChange); +} + +void +BaseCache::regStats() +{ + Request temp_req((Addr) NULL, 4, 0); + Packet::Command temp_cmd = Packet::ReadReq; + Packet temp_pkt(&temp_req, temp_cmd, 0); //@todo FIx command strings so this isn't neccessary + temp_pkt.allocate(); //Temp allocate, all need data + + using namespace Stats; + + // Hit statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + hits[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_hits") + .desc("number of " + cstr + " hits") + .flags(total | nozero | nonan) + ; + } + + demandHits + .name(name() + ".demand_hits") + .desc("number of demand (read+write) hits") + .flags(total) + ; + demandHits = hits[Packet::ReadReq] + hits[Packet::WriteReq]; + + overallHits + .name(name() + ".overall_hits") + .desc("number of overall hits") + .flags(total) + ; + overallHits = demandHits + hits[Packet::SoftPFReq] + hits[Packet::HardPFReq] + + hits[Packet::Writeback]; + + // Miss statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + misses[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_misses") + .desc("number of " + cstr + " misses") + .flags(total | nozero | nonan) + ; + } + + demandMisses + .name(name() + ".demand_misses") + .desc("number of demand (read+write) misses") + .flags(total) + ; + demandMisses = misses[Packet::ReadReq] + misses[Packet::WriteReq]; + + overallMisses + .name(name() + ".overall_misses") + .desc("number of overall misses") + .flags(total) + ; + overallMisses = demandMisses + misses[Packet::SoftPFReq] + + misses[Packet::HardPFReq] + misses[Packet::Writeback]; + + // Miss latency statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + missLatency[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_miss_latency") + .desc("number of " + cstr + " miss cycles") + .flags(total | nozero | nonan) + ; + } + + demandMissLatency + .name(name() + ".demand_miss_latency") + .desc("number of demand (read+write) miss cycles") + .flags(total) + ; + demandMissLatency = missLatency[Packet::ReadReq] + missLatency[Packet::WriteReq]; + + overallMissLatency + .name(name() + ".overall_miss_latency") + .desc("number of overall miss cycles") + .flags(total) + ; + overallMissLatency = demandMissLatency + missLatency[Packet::SoftPFReq] + + missLatency[Packet::HardPFReq]; + + // access formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + accesses[access_idx] + .name(name() + "." + cstr + "_accesses") + .desc("number of " + cstr + " accesses(hits+misses)") + .flags(total | nozero | nonan) + ; + + accesses[access_idx] = hits[access_idx] + misses[access_idx]; + } + + demandAccesses + .name(name() + ".demand_accesses") + .desc("number of demand (read+write) accesses") + .flags(total) + ; + demandAccesses = demandHits + demandMisses; + + overallAccesses + .name(name() + ".overall_accesses") + .desc("number of overall (read+write) accesses") + .flags(total) + ; + overallAccesses = overallHits + overallMisses; + + // miss rate formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + missRate[access_idx] + .name(name() + "." + cstr + "_miss_rate") + .desc("miss rate for " + cstr + " accesses") + .flags(total | nozero | nonan) + ; + + missRate[access_idx] = misses[access_idx] / accesses[access_idx]; + } + + demandMissRate + .name(name() + ".demand_miss_rate") + .desc("miss rate for demand accesses") + .flags(total) + ; + demandMissRate = demandMisses / demandAccesses; + + overallMissRate + .name(name() + ".overall_miss_rate") + .desc("miss rate for overall accesses") + .flags(total) + ; + overallMissRate = overallMisses / overallAccesses; + + // miss latency formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + avgMissLatency[access_idx] + .name(name() + "." + cstr + "_avg_miss_latency") + .desc("average " + cstr + " miss latency") + .flags(total | nozero | nonan) + ; + + avgMissLatency[access_idx] = + missLatency[access_idx] / misses[access_idx]; + } + + demandAvgMissLatency + .name(name() + ".demand_avg_miss_latency") + .desc("average overall miss latency") + .flags(total) + ; + demandAvgMissLatency = demandMissLatency / demandMisses; + + overallAvgMissLatency + .name(name() + ".overall_avg_miss_latency") + .desc("average overall miss latency") + .flags(total) + ; + overallAvgMissLatency = overallMissLatency / overallMisses; + + blocked_cycles.init(NUM_BLOCKED_CAUSES); + blocked_cycles + .name(name() + ".blocked_cycles") + .desc("number of cycles access was blocked") + .subname(Blocked_NoMSHRs, "no_mshrs") + .subname(Blocked_NoTargets, "no_targets") + ; + + + blocked_causes.init(NUM_BLOCKED_CAUSES); + blocked_causes + .name(name() + ".blocked") + .desc("number of cycles access was blocked") + .subname(Blocked_NoMSHRs, "no_mshrs") + .subname(Blocked_NoTargets, "no_targets") + ; + + avg_blocked + .name(name() + ".avg_blocked_cycles") + .desc("average number of cycles each access was blocked") + .subname(Blocked_NoMSHRs, "no_mshrs") + .subname(Blocked_NoTargets, "no_targets") + ; + + avg_blocked = blocked_cycles / blocked_causes; + + fastWrites + .name(name() + ".fast_writes") + .desc("number of fast writes performed") + ; + + cacheCopies + .name(name() + ".cache_copies") + .desc("number of cache copies performed") + ; + +} diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh new file mode 100644 index 000000000..0d1bfdfdb --- /dev/null +++ b/src/mem/cache/base_cache.hh @@ -0,0 +1,549 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declares a basic cache interface BaseCache. + */ + +#ifndef __BASE_CACHE_HH__ +#define __BASE_CACHE_HH__ + +#include <vector> +#include <string> +#include <list> +#include <inttypes.h> + +#include "base/misc.hh" +#include "base/statistics.hh" +#include "base/trace.hh" +#include "mem/mem_object.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +#include "mem/request.hh" +#include "sim/eventq.hh" + +/** + * Reasons for Caches to be Blocked. + */ +enum BlockedCause{ + Blocked_NoMSHRs, + Blocked_NoTargets, + Blocked_NoWBBuffers, + Blocked_Coherence, + Blocked_Copy, + NUM_BLOCKED_CAUSES +}; + +/** + * Reasons for cache to request a bus. + */ +enum RequestCause{ + Request_MSHR, + Request_WB, + Request_Coherence, + Request_PF +}; + +/** + * A basic cache interface. Implements some common functions for speed. + */ +class BaseCache : public MemObject +{ + class CachePort : public Port + { + public: + BaseCache *cache; + + CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide); + + protected: + virtual bool recvTiming(Packet *pkt); + + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop); + + virtual int deviceBlockSize(); + + public: + void setBlocked(); + + void clearBlocked(); + + bool blocked; + + bool isCpuSide; + }; + + struct CacheEvent : public Event + { + CachePort *cachePort; + Packet *pkt; + + CacheEvent(CachePort *_cachePort); + CacheEvent(CachePort *_cachePort, Packet *_pkt); + void process(); + const char *description(); + }; + + protected: + CachePort *cpuSidePort; + CachePort *memSidePort; + + public: + virtual Port *getPort(const std::string &if_name, int idx = -1); + + private: + //To be defined in cache_impl.hh not in base class + virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) + { + fatal("No implementation"); + } + + virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide) + { + fatal("No implementation"); + } + + virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide) + { + fatal("No implementation"); + } + + void recvStatusChange(Port::Status status, bool isCpuSide) + { + if (status == Port::RangeChange) + { + if (!isCpuSide) + { + cpuSidePort->sendStatusChange(Port::RangeChange); + } + else + { + memSidePort->sendStatusChange(Port::RangeChange); + } + } + } + + virtual Packet *getPacket() + { + fatal("No implementation"); + } + + virtual Packet *getCoherencePacket() + { + fatal("No implementation"); + } + + virtual void sendResult(Packet* &pkt, bool success) + { + + fatal("No implementation"); + } + + /** + * Bit vector of the blocking reasons for the access path. + * @sa #BlockedCause + */ + uint8_t blocked; + + /** + * Bit vector for the blocking reasons for the snoop path. + * @sa #BlockedCause + */ + uint8_t blockedSnoop; + + /** + * Bit vector for the outstanding requests for the master interface. + */ + uint8_t masterRequests; + + /** + * Bit vector for the outstanding requests for the slave interface. + */ + uint8_t slaveRequests; + + protected: + + /** True if this cache is connected to the CPU. */ + bool topLevelCache; + + /** Stores time the cache blocked for statistics. */ + Tick blockedCycle; + + /** Block size of this cache */ + const int blkSize; + + /** The number of misses to trigger an exit event. */ + Counter missCount; + + public: + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + + /** Number of hits per thread for each type of command. @sa Packet::Command */ + Stats::Vector<> hits[NUM_MEM_CMDS]; + /** Number of hits for demand accesses. */ + Stats::Formula demandHits; + /** Number of hit for all accesses. */ + Stats::Formula overallHits; + + /** Number of misses per thread for each type of command. @sa Packet::Command */ + Stats::Vector<> misses[NUM_MEM_CMDS]; + /** Number of misses for demand accesses. */ + Stats::Formula demandMisses; + /** Number of misses for all accesses. */ + Stats::Formula overallMisses; + + /** + * Total number of cycles per thread/command spent waiting for a miss. + * Used to calculate the average miss latency. + */ + Stats::Vector<> missLatency[NUM_MEM_CMDS]; + /** Total number of cycles spent waiting for demand misses. */ + Stats::Formula demandMissLatency; + /** Total number of cycles spent waiting for all misses. */ + Stats::Formula overallMissLatency; + + /** The number of accesses per command and thread. */ + Stats::Formula accesses[NUM_MEM_CMDS]; + /** The number of demand accesses. */ + Stats::Formula demandAccesses; + /** The number of overall accesses. */ + Stats::Formula overallAccesses; + + /** The miss rate per command and thread. */ + Stats::Formula missRate[NUM_MEM_CMDS]; + /** The miss rate of all demand accesses. */ + Stats::Formula demandMissRate; + /** The miss rate for all accesses. */ + Stats::Formula overallMissRate; + + /** The average miss latency per command and thread. */ + Stats::Formula avgMissLatency[NUM_MEM_CMDS]; + /** The average miss latency for demand misses. */ + Stats::Formula demandAvgMissLatency; + /** The average miss latency for all misses. */ + Stats::Formula overallAvgMissLatency; + + /** The total number of cycles blocked for each blocked cause. */ + Stats::Vector<> blocked_cycles; + /** The number of times this cache blocked for each blocked cause. */ + Stats::Vector<> blocked_causes; + + /** The average number of cycles blocked for each blocked cause. */ + Stats::Formula avg_blocked; + + /** The number of fast writes (WH64) performed. */ + Stats::Scalar<> fastWrites; + + /** The number of cache copies performed. */ + Stats::Scalar<> cacheCopies; + + /** + * @} + */ + + /** + * Register stats for this object. + */ + virtual void regStats(); + + public: + + class Params + { + public: + /** List of address ranges of this cache. */ + std::vector<Range<Addr> > addrRange; + /** The hit latency for this cache. */ + int hitLatency; + /** The block size of this cache. */ + int blkSize; + /** + * The maximum number of misses this cache should handle before + * ending the simulation. + */ + Counter maxMisses; + + /** + * Construct an instance of this parameter class. + */ + Params(std::vector<Range<Addr> > addr_range, + int hit_latency, int _blkSize, Counter max_misses) + : addrRange(addr_range), hitLatency(hit_latency), blkSize(_blkSize), + maxMisses(max_misses) + { + } + }; + + /** + * Create and initialize a basic cache object. + * @param name The name of this cache. + * @param hier_params Pointer to the HierParams object for this hierarchy + * of this cache. + * @param params The parameter object for this BaseCache. + */ + BaseCache(const std::string &name, Params ¶ms) + : MemObject(name), blocked(0), blockedSnoop(0), masterRequests(0), + slaveRequests(0), topLevelCache(false), blkSize(params.blkSize), + missCount(params.maxMisses) + { + //Start ports at null if more than one is created we should panic + cpuSidePort = NULL; + memSidePort = NULL; + } + + virtual void init(); + + /** + * Query block size of a cache. + * @return The block size + */ + int getBlockSize() const + { + return blkSize; + } + + /** + * Returns true if this cache is connect to the CPU. + * @return True if this is a L1 cache. + */ + bool isTopLevel() + { + return topLevelCache; + } + + /** + * Returns true if the cache is blocked for accesses. + */ + bool isBlocked() + { + return blocked != 0; + } + + /** + * Returns true if the cache is blocked for snoops. + */ + bool isBlockedForSnoop() + { + return blockedSnoop != 0; + } + + /** + * Marks the access path of the cache as blocked for the given cause. This + * also sets the blocked flag in the slave interface. + * @param cause The reason for the cache blocking. + */ + void setBlocked(BlockedCause cause) + { + uint8_t flag = 1 << cause; + if (blocked == 0) { + blocked_causes[cause]++; + blockedCycle = curTick; + } + blocked |= flag; + DPRINTF(Cache,"Blocking for cause %s\n", cause); + cpuSidePort->setBlocked(); + } + + /** + * Marks the snoop path of the cache as blocked for the given cause. This + * also sets the blocked flag in the master interface. + * @param cause The reason to block the snoop path. + */ + void setBlockedForSnoop(BlockedCause cause) + { + uint8_t flag = 1 << cause; + blockedSnoop |= flag; + memSidePort->setBlocked(); + } + + /** + * Marks the cache as unblocked for the given cause. This also clears the + * blocked flags in the appropriate interfaces. + * @param cause The newly unblocked cause. + * @warning Calling this function can cause a blocked request on the bus to + * access the cache. The cache must be in a state to handle that request. + */ + void clearBlocked(BlockedCause cause) + { + uint8_t flag = 1 << cause; + blocked &= ~flag; + blockedSnoop &= ~flag; + DPRINTF(Cache,"Unblocking for cause %s, causes left=%i\n", + cause, blocked); + if (!isBlocked()) { + blocked_cycles[cause] += curTick - blockedCycle; + DPRINTF(Cache,"Unblocking from all causes\n"); + cpuSidePort->clearBlocked(); + } + if (!isBlockedForSnoop()) { + memSidePort->clearBlocked(); + } + } + + /** + * True if the master bus should be requested. + * @return True if there are outstanding requests for the master bus. + */ + bool doMasterRequest() + { + return masterRequests != 0; + } + + /** + * Request the master bus for the given cause and time. + * @param cause The reason for the request. + * @param time The time to make the request. + */ + void setMasterRequest(RequestCause cause, Tick time) + { + if (!doMasterRequest()) + { + BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(memSidePort); + reqCpu->schedule(time); + } + uint8_t flag = 1<<cause; + masterRequests |= flag; + } + + /** + * Clear the master bus request for the given cause. + * @param cause The request reason to clear. + */ + void clearMasterRequest(RequestCause cause) + { + uint8_t flag = 1<<cause; + masterRequests &= ~flag; + } + + /** + * Return true if the slave bus should be requested. + * @return True if there are outstanding requests for the slave bus. + */ + bool doSlaveRequest() + { + return slaveRequests != 0; + } + + /** + * Request the slave bus for the given reason and time. + * @param cause The reason for the request. + * @param time The time to make the request. + */ + void setSlaveRequest(RequestCause cause, Tick time) + { + uint8_t flag = 1<<cause; + slaveRequests |= flag; + assert("Implement\n" && 0); +// si->pktuest(time); + } + + /** + * Clear the slave bus request for the given reason. + * @param cause The request reason to clear. + */ + void clearSlaveRequest(RequestCause cause) + { + uint8_t flag = 1<<cause; + slaveRequests &= ~flag; + } + + /** + * Send a response to the slave interface. + * @param req The request being responded to. + * @param time The time the response is ready. + */ + void respond(Packet *pkt, Tick time) + { + pkt->makeTimingResponse(); + pkt->result = Packet::Success; + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); + } + + /** + * Send a reponse to the slave interface and calculate miss latency. + * @param req The request to respond to. + * @param time The time the response is ready. + */ + void respondToMiss(Packet *pkt, Tick time) + { + if (!pkt->req->isUncacheable()) { + missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time; + } + pkt->makeTimingResponse(); + pkt->result = Packet::Success; + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); + } + + /** + * Suppliess the data if cache to cache transfers are enabled. + * @param req The bus transaction to fulfill. + */ + void respondToSnoop(Packet *pkt) + { + assert("Implement\n" && 0); +// mi->respond(pkt,curTick + hitLatency); + } + + /** + * Notification from master interface that a address range changed. Nothing + * to do for a cache. + */ + void rangeChange() {} + + void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop, bool isCpuSide) + { + if (isCpuSide) + { + AddrRangeList dummy; + memSidePort->getPeerAddressRanges(resp, dummy); + } + else + { + //This is where snoops get updated + return; + } + } +}; + +#endif //__BASE_CACHE_HH__ diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc new file mode 100644 index 000000000..db66c096e --- /dev/null +++ b/src/mem/cache/cache.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + * Lisa Hsu + * Kevin Lim + */ + +/** + * @file + * Cache template instantiations. + */ + +#include "mem/config/cache.hh" +#include "mem/config/compression.hh" + +#include "mem/cache/tags/cache_tags.hh" + +#if defined(USE_CACHE_LRU) +#include "mem/cache/tags/lru.hh" +#endif + +#if defined(USE_CACHE_FALRU) +#include "mem/cache/tags/fa_lru.hh" +#endif + +#if defined(USE_CACHE_IIC) +#include "mem/cache/tags/iic.hh" +#endif + +#if defined(USE_CACHE_SPLIT) +#include "mem/cache/tags/split.hh" +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +#include "mem/cache/tags/split_lifo.hh" +#endif + +#include "base/compression/null_compression.hh" +#if defined(USE_LZSS_COMPRESSION) +#include "base/compression/lzss_compression.hh" +#endif + +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +#include "mem/cache/coherence/uni_coherence.hh" +#include "mem/cache/coherence/simple_coherence.hh" + +#include "mem/cache/cache_impl.hh" + +// Template Instantiations +#ifndef DOXYGEN_SHOULD_SKIP_THIS + + +#if defined(USE_CACHE_FALRU) +template class Cache<CacheTags<FALRU,NullCompression>, BlockingBuffer, SimpleCoherence>; +template class Cache<CacheTags<FALRU,NullCompression>, BlockingBuffer, UniCoherence>; +template class Cache<CacheTags<FALRU,NullCompression>, MissQueue, SimpleCoherence>; +template class Cache<CacheTags<FALRU,NullCompression>, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache<CacheTags<FALRU,LZSSCompression>, BlockingBuffer, SimpleCoherence>; +template class Cache<CacheTags<FALRU,LZSSCompression>, BlockingBuffer, UniCoherence>; +template class Cache<CacheTags<FALRU,LZSSCompression>, MissQueue, SimpleCoherence>; +template class Cache<CacheTags<FALRU,LZSSCompression>, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_IIC) +template class Cache<CacheTags<IIC,NullCompression>, BlockingBuffer, SimpleCoherence>; +template class Cache<CacheTags<IIC,NullCompression>, BlockingBuffer, UniCoherence>; +template class Cache<CacheTags<IIC,NullCompression>, MissQueue, SimpleCoherence>; +template class Cache<CacheTags<IIC,NullCompression>, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache<CacheTags<IIC,LZSSCompression>, BlockingBuffer, SimpleCoherence>; +template class Cache<CacheTags<IIC,LZSSCompression>, BlockingBuffer, UniCoherence>; +template class Cache<CacheTags<IIC,LZSSCompression>, MissQueue, SimpleCoherence>; +template class Cache<CacheTags<IIC,LZSSCompression>, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_LRU) +template class Cache<CacheTags<LRU,NullCompression>, BlockingBuffer, SimpleCoherence>; +template class Cache<CacheTags<LRU,NullCompression>, BlockingBuffer, UniCoherence>; +template class Cache<CacheTags<LRU,NullCompression>, MissQueue, SimpleCoherence>; +template class Cache<CacheTags<LRU,NullCompression>, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache<CacheTags<LRU,LZSSCompression>, BlockingBuffer, SimpleCoherence>; +template class Cache<CacheTags<LRU,LZSSCompression>, BlockingBuffer, UniCoherence>; +template class Cache<CacheTags<LRU,LZSSCompression>, MissQueue, SimpleCoherence>; +template class Cache<CacheTags<LRU,LZSSCompression>, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_SPLIT) +template class Cache<CacheTags<Split,NullCompression>, BlockingBuffer, SimpleCoherence>; +template class Cache<CacheTags<Split,NullCompression>, BlockingBuffer, UniCoherence>; +template class Cache<CacheTags<Split,NullCompression>, MissQueue, SimpleCoherence>; +template class Cache<CacheTags<Split,NullCompression>, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache<CacheTags<Split,LZSSCompression>, BlockingBuffer, SimpleCoherence>; +template class Cache<CacheTags<Split,LZSSCompression>, BlockingBuffer, UniCoherence>; +template class Cache<CacheTags<Split,LZSSCompression>, MissQueue, SimpleCoherence>; +template class Cache<CacheTags<Split,LZSSCompression>, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +template class Cache<CacheTags<SplitLIFO,NullCompression>, BlockingBuffer, SimpleCoherence>; +template class Cache<CacheTags<SplitLIFO,NullCompression>, BlockingBuffer, UniCoherence>; +template class Cache<CacheTags<SplitLIFO,NullCompression>, MissQueue, SimpleCoherence>; +template class Cache<CacheTags<SplitLIFO,NullCompression>, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache<CacheTags<SplitLIFO,LZSSCompression>, BlockingBuffer, SimpleCoherence>; +template class Cache<CacheTags<SplitLIFO,LZSSCompression>, BlockingBuffer, UniCoherence>; +template class Cache<CacheTags<SplitLIFO,LZSSCompression>, MissQueue, SimpleCoherence>; +template class Cache<CacheTags<SplitLIFO,LZSSCompression>, MissQueue, UniCoherence>; +#endif +#endif + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh new file mode 100644 index 000000000..ec5b800a8 --- /dev/null +++ b/src/mem/cache/cache.hh @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Dave Greene + * Steve Reinhardt + */ + +/** + * @file + * Describes a cache based on template policies. + */ + +#ifndef __CACHE_HH__ +#define __CACHE_HH__ + +#include "base/misc.hh" // fatal, panic, and warn +#include "cpu/smt.hh" // SMT_MAX_THREADS + +#include "mem/cache/base_cache.hh" +#include "mem/cache/prefetch/prefetcher.hh" + +//Forward decleration +class MSHR; + + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template <class TagStore, class Buffering, class Coherence> +class Cache : public BaseCache +{ + public: + /** Define the type of cache block to use. */ + typedef typename TagStore::BlkType BlkType; + + bool prefetchAccess; + protected: + + /** Tag and data Storage */ + TagStore *tags; + /** Miss and Writeback handler */ + Buffering *missQueue; + /** Coherence protocol. */ + Coherence *coherence; + + /** Prefetcher */ + Prefetcher<TagStore, Buffering> *prefetcher; + + /** Do fast copies in this cache. */ + bool doCopy; + + /** Block on a delayed copy. */ + bool blockOnCopy; + + /** + * The clock ratio of the outgoing bus. + * Used for calculating critical word first. + */ + int busRatio; + + /** + * The bus width in bytes of the outgoing bus. + * Used for calculating critical word first. + */ + int busWidth; + + /** + * The latency of a hit in this device. + */ + int hitLatency; + + /** + * A permanent mem req to always be used to cause invalidations. + * Used to append to target list, to cause an invalidation. + */ + Packet * invalidatePkt; + + /** + * Temporarily move a block into a MSHR. + * @todo Remove this when LSQ/SB are fixed and implemented in memtest. + */ + void pseudoFill(Addr addr, int asid); + + /** + * Temporarily move a block into an existing MSHR. + * @todo Remove this when LSQ/SB are fixed and implemented in memtest. + */ + void pseudoFill(MSHR *mshr); + + public: + + class Params + { + public: + TagStore *tags; + Buffering *missQueue; + Coherence *coherence; + bool doCopy; + bool blockOnCopy; + BaseCache::Params baseParams; + Prefetcher<TagStore, Buffering> *prefetcher; + bool prefetchAccess; + int hitLatency; + + Params(TagStore *_tags, Buffering *mq, Coherence *coh, + bool do_copy, BaseCache::Params params, + Prefetcher<TagStore, Buffering> *_prefetcher, + bool prefetch_access, int hit_latency) + : tags(_tags), missQueue(mq), coherence(coh), doCopy(do_copy), + blockOnCopy(false), baseParams(params), + prefetcher(_prefetcher), prefetchAccess(prefetch_access), + hitLatency(hit_latency) + { + } + }; + + /** Instantiates a basic cache object. */ + Cache(const std::string &_name, Params ¶ms); + + virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, + bool isCpuSide); + + virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide); + + virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide); + + virtual void recvStatusChange(Port::Status status, bool isCpuSide); + + void regStats(); + + /** + * Performs the access specified by the request. + * @param req The request to perform. + * @return The result of the access. + */ + bool access(Packet * &pkt); + + /** + * Selects a request to send on the bus. + * @return The memory request to service. + */ + virtual Packet * getPacket(); + + /** + * Was the request was sent successfully? + * @param req The request. + * @param success True if the request was sent successfully. + */ + virtual void sendResult(Packet * &pkt, bool success); + + /** + * Handles a response (cache line fill/write ack) from the bus. + * @param req The request being responded to. + */ + void handleResponse(Packet * &pkt); + + /** + * Start handling a copy transaction. + * @param req The copy request to perform. + */ + void startCopy(Packet * &pkt); + + /** + * Handle a delayed copy transaction. + * @param req The delayed copy request to continue. + * @param addr The address being responded to. + * @param blk The block of the current response. + * @param mshr The mshr being handled. + */ + void handleCopy(Packet * &pkt, Addr addr, BlkType *blk, MSHR *mshr); + + /** + * Selects a coherence message to forward to lower levels of the hierarchy. + * @return The coherence message to forward. + */ + virtual Packet * getCoherencePacket(); + + /** + * Snoops bus transactions to maintain coherence. + * @param req The current bus transaction. + */ + void snoop(Packet * &pkt); + + void snoopResponse(Packet * &pkt); + + /** + * Invalidates the block containing address if found. + * @param addr The address to look for. + * @param asid The address space ID of the address. + * @todo Is this function necessary? + */ + void invalidateBlk(Addr addr, int asid); + + /** + * Aquash all requests associated with specified thread. + * intended for use by I-cache. + * @param req->getThreadNum()ber The thread to squash. + */ + void squash(int threadNum) + { + missQueue->squash(threadNum); + } + + /** + * Return the number of outstanding misses in a Cache. + * Default returns 0. + * + * @retval unsigned The number of missing still outstanding. + */ + unsigned outstandingMisses() const + { + return missQueue->getMisses(); + } + + /** + * Perform the access specified in the request and return the estimated + * time of completion. This function can either update the hierarchy state + * or just perform the access wherever the data is found depending on the + * state of the update flag. + * @param req The memory request to satisfy + * @param update If true, update the hierarchy, otherwise just perform the + * request. + * @return The estimated completion time. + */ + Tick probe(Packet * &pkt, bool update); + + /** + * Snoop for the provided request in the cache and return the estimated + * time of completion. + * @todo Can a snoop probe not change state? + * @param req The memory request to satisfy + * @param update If true, update the hierarchy, otherwise just perform the + * request. + * @return The estimated completion time. + */ + Tick snoopProbe(Packet * &pkt, bool update); +}; + +#endif // __CACHE_HH__ diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh new file mode 100644 index 000000000..67e65d25b --- /dev/null +++ b/src/mem/cache/cache_blk.hh @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** @file + * Definitions of a simple cache block class. + */ + +#ifndef __CACHE_BLK_HH__ +#define __CACHE_BLK_HH__ + +#include "sim/root.hh" // for Tick +#include "arch/isa_traits.hh" // for Addr + +/** + * Cache block status bit assignments + */ +enum CacheBlkStatusBits { + /** valid, readable */ + BlkValid = 0x01, + /** write permission */ + BlkWritable = 0x02, + /** dirty (modified) */ + BlkDirty = 0x04, + /** compressed */ + BlkCompressed = 0x08, + /** block was referenced */ + BlkReferenced = 0x10, + /** block was a hardware prefetch yet unaccessed*/ + BlkHWPrefetched = 0x20 +}; + +/** + * A Basic Cache block. + * Contains the tag, status, and a pointer to data. + */ +class CacheBlk +{ + public: + /** The address space ID of this block. */ + int asid; + /** Data block tag value. */ + Addr tag; + /** + * Contains a copy of the data in this block for easy access. This is used + * for efficient execution when the data could be actually stored in + * another format (COW, compressed, sub-blocked, etc). In all cases the + * data stored here should be kept consistant with the actual data + * referenced by this block. + */ + uint8_t *data; + /** the number of bytes stored in this block. */ + int size; + + /** block state: OR of CacheBlkStatusBit */ + typedef unsigned State; + + /** The current status of this block. @sa CacheBlockStatusBits */ + State status; + + /** Which curTick will this block be accessable */ + Tick whenReady; + + /** + * The set this block belongs to. + * @todo Move this into subclasses when we fix CacheTags to use them. + */ + int set; + + /** Number of references to this block since it was brought in. */ + int refCount; + + CacheBlk() + : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), + set(-1), refCount(0) + {} + + /** + * Copy the state of the given block into this one. + * @param rhs The block to copy. + * @return a const reference to this block. + */ + const CacheBlk& operator=(const CacheBlk& rhs) + { + asid = rhs.asid; + tag = rhs.tag; + data = rhs.data; + size = rhs.size; + status = rhs.status; + whenReady = rhs.whenReady; + set = rhs.set; + refCount = rhs.refCount; + return *this; + } + + /** + * Checks the write permissions of this block. + * @return True if the block is writable. + */ + bool isWritable() const + { + const int needed_bits = BlkWritable | BlkValid; + return (status & needed_bits) == needed_bits; + } + + /** + * Checks that a block is valid (readable). + * @return True if the block is valid. + */ + bool isValid() const + { + return (status & BlkValid) != 0; + } + + /** + * Check to see if a block has been written. + * @return True if the block is dirty. + */ + bool isModified() const + { + return (status & BlkDirty) != 0; + } + + /** + * Check to see if this block contains compressed data. + * @return True iF the block's data is compressed. + */ + bool isCompressed() const + { + return (status & BlkCompressed) != 0; + } + + /** + * Check if this block has been referenced. + * @return True if the block has been referenced. + */ + bool isReferenced() const + { + return (status & BlkReferenced) != 0; + } + + /** + * Check if this block was the result of a hardware prefetch, yet to + * be touched. + * @return True if the block was a hardware prefetch, unaccesed. + */ + bool isPrefetch() const + { + return (status & BlkHWPrefetched) != 0; + } + + +}; + +/** + * Output a CacheBlk to the given ostream. + * @param out The stream for the output. + * @param blk The cache block to print. + * + * @return The output stream. + */ +inline std::ostream & +operator<<(std::ostream &out, const CacheBlk &blk) +{ + out << std::hex << std::endl; + out << " Tag: " << blk.tag << std::endl; + out << " Status: " << blk.status << std::endl; + + return(out << std::dec); +} + +#endif //__CACHE_BLK_HH__ diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc new file mode 100644 index 000000000..05a149a1c --- /dev/null +++ b/src/mem/cache/cache_builder.cc @@ -0,0 +1,480 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Nathan Binkert + */ + +/** + * @file + * Simobject instatiation of caches. + */ +#include <vector> + +// Must be included first to determine which caches we want +#include "mem/config/cache.hh" +#include "mem/config/compression.hh" +#include "mem/config/prefetch.hh" + +#include "mem/cache/base_cache.hh" +#include "mem/cache/cache.hh" +#include "mem/bus.hh" +#include "mem/cache/coherence/coherence_protocol.hh" +#include "sim/builder.hh" + +// Tag Templates +#if defined(USE_CACHE_LRU) +#include "mem/cache/tags/lru.hh" +#endif + +#if defined(USE_CACHE_FALRU) +#include "mem/cache/tags/fa_lru.hh" +#endif + +#if defined(USE_CACHE_IIC) +#include "mem/cache/tags/iic.hh" +#endif + +#if defined(USE_CACHE_SPLIT) +#include "mem/cache/tags/split.hh" +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +#include "mem/cache/tags/split_lifo.hh" +#endif + +// Compression Templates +#include "base/compression/null_compression.hh" +#if defined(USE_LZSS_COMPRESSION) +#include "base/compression/lzss_compression.hh" +#endif + +// CacheTags Templates +#include "mem/cache/tags/cache_tags.hh" + +// MissQueue Templates +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +// Coherence Templates +#include "mem/cache/coherence/uni_coherence.hh" +#include "mem/cache/coherence/simple_coherence.hh" + +//Prefetcher Headers +#if defined(USE_GHB) +#include "mem/cache/prefetch/ghb_prefetcher.hh" +#endif +#if defined(USE_TAGGED) +#include "mem/cache/prefetch/tagged_prefetcher.hh" +#endif +#if defined(USE_STRIDED) +#include "mem/cache/prefetch/stride_prefetcher.hh" +#endif + + +using namespace std; +using namespace TheISA; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache) + + Param<int> size; + Param<int> assoc; + Param<int> block_size; + Param<int> latency; + Param<int> mshrs; + Param<int> tgts_per_mshr; + Param<int> write_buffers; + Param<bool> prioritizeRequests; +// SimObjectParam<Bus *> in_bus; +// SimObjectParam<Bus *> out_bus; + Param<bool> do_copy; + SimObjectParam<CoherenceProtocol *> protocol; + Param<Addr> trace_addr; + Param<int> hash_delay; +#if defined(USE_CACHE_IIC) + SimObjectParam<Repl *> repl; +#endif + Param<bool> compressed_bus; + Param<bool> store_compressed; + Param<bool> adaptive_compression; + Param<int> compression_latency; + Param<int> subblock_size; + Param<Counter> max_miss_count; +// SimObjectParam<HierParams *> hier; + VectorParam<Range<Addr> > addr_range; +// SimObjectParam<MemTraceWriter *> mem_trace; + Param<bool> split; + Param<int> split_size; + Param<bool> lifo; + Param<bool> two_queue; + Param<bool> prefetch_miss; + Param<bool> prefetch_access; + Param<int> prefetcher_size; + Param<bool> prefetch_past_page; + Param<bool> prefetch_serial_squash; + Param<Tick> prefetch_latency; + Param<int> prefetch_degree; + Param<string> prefetch_policy; + Param<bool> prefetch_cache_check_push; + Param<bool> prefetch_use_cpu_id; + Param<bool> prefetch_data_accesses_only; + Param<int> hit_latency; + +END_DECLARE_SIM_OBJECT_PARAMS(BaseCache) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) + + INIT_PARAM(size, "capacity in bytes"), + INIT_PARAM(assoc, "associativity"), + INIT_PARAM(block_size, "block size in bytes"), + INIT_PARAM(latency, "hit latency in CPU cycles"), + INIT_PARAM(mshrs, "number of MSHRs (max outstanding requests)"), + INIT_PARAM(tgts_per_mshr, "max number of accesses per MSHR"), + INIT_PARAM_DFLT(write_buffers, "number of write buffers", 8), + INIT_PARAM_DFLT(prioritizeRequests, "always service demand misses first", + false), +/* INIT_PARAM_DFLT(in_bus, "incoming bus object", NULL), + INIT_PARAM(out_bus, "outgoing bus object"), +*/ + INIT_PARAM_DFLT(do_copy, "perform fast copies in the cache", false), + INIT_PARAM_DFLT(protocol, "coherence protocol to use in the cache", NULL), + INIT_PARAM_DFLT(trace_addr, "address to trace", 0), + + INIT_PARAM_DFLT(hash_delay, "time in cycles of hash access",1), +#if defined(USE_CACHE_IIC) + INIT_PARAM_DFLT(repl, "replacement policy",NULL), +#endif + INIT_PARAM_DFLT(compressed_bus, + "This cache connects to a compressed memory", + false), + INIT_PARAM_DFLT(store_compressed, "Store compressed data in the cache", + false), + INIT_PARAM_DFLT(adaptive_compression, "Use an adaptive compression scheme", + false), + INIT_PARAM_DFLT(compression_latency, + "Latency in cycles of compression algorithm", + 0), + INIT_PARAM_DFLT(subblock_size, + "Size of subblock in IIC used for compression", + 0), + INIT_PARAM_DFLT(max_miss_count, + "The number of misses to handle before calling exit", + 0), +/* INIT_PARAM_DFLT(hier, + "Hierarchy global variables", + &defaultHierParams), +*/ + INIT_PARAM_DFLT(addr_range, "The address range in bytes", + vector<Range<Addr> >(1,RangeIn((Addr)0, MaxAddr))), +// INIT_PARAM_DFLT(mem_trace, "Memory trace to write accesses to", NULL), + INIT_PARAM_DFLT(split, "Whether this is a partitioned cache", false), + INIT_PARAM_DFLT(split_size, "the number of \"ways\" belonging to the LRU partition", 0), + INIT_PARAM_DFLT(lifo, "whether you are using a LIFO repl. policy", false), + INIT_PARAM_DFLT(two_queue, "whether the lifo should have two queue replacement", false), + INIT_PARAM_DFLT(prefetch_miss, "wheter you are using the hardware prefetcher from Miss stream", false), + INIT_PARAM_DFLT(prefetch_access, "wheter you are using the hardware prefetcher from Access stream", false), + INIT_PARAM_DFLT(prefetcher_size, "Number of entries in the harware prefetch queue", 100), + INIT_PARAM_DFLT(prefetch_past_page, "Allow prefetches to cross virtual page boundaries", false), + INIT_PARAM_DFLT(prefetch_serial_squash, "Squash prefetches with a later time on a subsequent miss", false), + INIT_PARAM_DFLT(prefetch_latency, "Latency of the prefetcher", 10), + INIT_PARAM_DFLT(prefetch_degree, "Degree of the prefetch depth", 1), + INIT_PARAM_DFLT(prefetch_policy, "Type of prefetcher to use", "none"), + INIT_PARAM_DFLT(prefetch_cache_check_push, "Check if in cash on push or pop of prefetch queue", true), + INIT_PARAM_DFLT(prefetch_use_cpu_id, "Use the CPU ID to seperate calculations of prefetches", true), + INIT_PARAM_DFLT(prefetch_data_accesses_only, "Only prefetch on data not on instruction accesses", false), + INIT_PARAM_DFLT(hit_latency, "Hit Latecny for a succesful access", 1) +END_INIT_SIM_OBJECT_PARAMS(BaseCache) + + +#define BUILD_CACHE(t, comp, b, c) do { \ + Prefetcher<CacheTags<t, comp>, b> *pf; \ + if (pf_policy == "tagged") { \ + BUILD_TAGGED_PREFETCHER(t, comp, b); \ + } \ + else if (pf_policy == "stride") { \ + BUILD_STRIDED_PREFETCHER(t, comp, b); \ + } \ + else if (pf_policy == "ghb") { \ + BUILD_GHB_PREFETCHER(t, comp, b); \ + } \ + else { \ + BUILD_NULL_PREFETCHER(t, comp, b); \ + } \ + Cache<CacheTags<t, comp>, b, c>::Params params(tagStore, mq, coh, \ + do_copy, base_params, \ + /*in_bus, out_bus,*/ pf, \ + prefetch_access, hit_latency); \ + Cache<CacheTags<t, comp>, b, c> *retval = \ + new Cache<CacheTags<t, comp>, b, c>(getInstanceName(), /*hier,*/ \ + params); \ +/* if (in_bus == NULL) { \ + retval->setSlaveInterface(new MemoryInterface<Cache<CacheTags<t, comp>, b, c> >(getInstanceName(), hier, retval, mem_trace)); \ + } else { \ + retval->setSlaveInterface(new SlaveInterface<Cache<CacheTags<t, comp>, b, c>, Bus>(getInstanceName(), hier, retval, in_bus, mem_trace)); \ + } \ + retval->setMasterInterface(new MasterInterface<Cache<CacheTags<t, comp>, b, c>, Bus>(getInstanceName(), hier, retval, out_bus)); \ + out_bus->rangeChange(); \ + return retval; \ +*/return retval; \ + } while (0) + +#define BUILD_CACHE_PANIC(x) do { \ + panic("%s not compiled into M5", x); \ + } while (0) + +#if defined(USE_LZSS_COMPRESSION) +#define BUILD_COMPRESSED_CACHE(TAGS, tags, b, c) do { \ + if (compressed_bus || store_compressed){ \ + CacheTags<TAGS, LZSSCompression> *tagStore = \ + new CacheTags<TAGS, LZSSCompression>(tags, \ + compression_latency, \ + true, store_compressed, \ + adaptive_compression, \ + prefetch_miss); \ + BUILD_CACHE(TAGS, LZSSCompression, b, c); \ + } else { \ + CacheTags<TAGS, NullCompression> *tagStore = \ + new CacheTags<TAGS, NullCompression>(tags, \ + compression_latency, \ + true, store_compressed, \ + adaptive_compression, \ + prefetch_miss); \ + BUILD_CACHE(TAGS, NullCompression, b, c); \ + } \ + } while (0) +#else +#define BUILD_COMPRESSED_CACHE(TAGS, tags, b, c) do { \ + if (compressed_bus || store_compressed){ \ + BUILD_CACHE_PANIC("compressed caches"); \ + } else { \ + CacheTags<TAGS, NullCompression> *tagStore = \ + new CacheTags<TAGS, NullCompression>(tags, \ + compression_latency, \ + true, store_compressed, \ + adaptive_compression \ + prefetch_miss); \ + BUILD_CACHE(TAGS, NullCompression, b, c); \ + } \ + } while (0) +#endif + +#if defined(USE_CACHE_FALRU) +#define BUILD_FALRU_CACHE(b,c) do { \ + FALRU *tags = new FALRU(block_size, size, latency); \ + BUILD_COMPRESSED_CACHE(FALRU, tags, b, c); \ + } while (0) +#else +#define BUILD_FALRU_CACHE(b, c) BUILD_CACHE_PANIC("falru cache") +#endif + +#if defined(USE_CACHE_LRU) +#define BUILD_LRU_CACHE(b, c) do { \ + LRU *tags = new LRU(numSets, block_size, assoc, latency); \ + BUILD_COMPRESSED_CACHE(LRU, tags, b, c); \ + } while (0) +#else +#define BUILD_LRU_CACHE(b, c) BUILD_CACHE_PANIC("lru cache") +#endif + +#if defined(USE_CACHE_SPLIT) +#define BUILD_SPLIT_CACHE(b, c) do { \ + Split *tags = new Split(numSets, block_size, assoc, split_size, lifo, \ + two_queue, latency); \ + BUILD_COMPRESSED_CACHE(Split, tags, b, c); \ + } while (0) +#else +#define BUILD_SPLIT_CACHE(b, c) BUILD_CACHE_PANIC("split cache") +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +#define BUILD_SPLIT_LIFO_CACHE(b, c) do { \ + SplitLIFO *tags = new SplitLIFO(block_size, size, assoc, \ + latency, two_queue, -1); \ + BUILD_COMPRESSED_CACHE(SplitLIFO, tags, b, c); \ + } while (0) +#else +#define BUILD_SPLIT_LIFO_CACHE(b, c) BUILD_CACHE_PANIC("lifo cache") +#endif + +#if defined(USE_CACHE_IIC) +#define BUILD_IIC_CACHE(b ,c) do { \ + IIC *tags = new IIC(iic_params); \ + BUILD_COMPRESSED_CACHE(IIC, tags, b, c); \ + } while (0) +#else +#define BUILD_IIC_CACHE(b, c) BUILD_CACHE_PANIC("iic") +#endif + +#define BUILD_CACHES(b, c) do { \ + if (repl == NULL) { \ + if (numSets == 1) { \ + BUILD_FALRU_CACHE(b, c); \ + } else { \ + if (split == true) { \ + BUILD_SPLIT_CACHE(b, c); \ + } else if (lifo == true) { \ + BUILD_SPLIT_LIFO_CACHE(b, c); \ + } else { \ + BUILD_LRU_CACHE(b, c); \ + } \ + } \ + } else { \ + BUILD_IIC_CACHE(b, c); \ + } \ + } while (0) + +#define BUILD_COHERENCE(b) do { \ + if (protocol == NULL) { \ + UniCoherence *coh = new UniCoherence(); \ + BUILD_CACHES(b, UniCoherence); \ + } else { \ + SimpleCoherence *coh = new SimpleCoherence(protocol); \ + BUILD_CACHES(b, SimpleCoherence); \ + } \ + } while (0) + +#if defined(USE_TAGGED) +#define BUILD_TAGGED_PREFETCHER(t, comp, b) pf = new \ + TaggedPrefetcher<CacheTags<t, comp>, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree) +#else +#define BUILD_TAGGED_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("Tagged Prefetcher") +#endif + +#if defined(USE_STRIDED) +#define BUILD_STRIDED_PREFETCHER(t, comp, b) pf = new \ + StridePrefetcher<CacheTags<t, comp>, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree, \ + prefetch_use_cpu_id) +#else +#define BUILD_STRIDED_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("Stride Prefetcher") +#endif + +#if defined(USE_GHB) +#define BUILD_GHB_PREFETCHER(t, comp, b) pf = new \ + GHBPrefetcher<CacheTags<t, comp>, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree, \ + prefetch_use_cpu_id) +#else +#define BUILD_GHB_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("GHB Prefetcher") +#endif + +#if defined(USE_TAGGED) +#define BUILD_NULL_PREFETCHER(t, comp, b) pf = new \ + TaggedPrefetcher<CacheTags<t, comp>, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree) +#else +#define BUILD_NULL_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("NULL Prefetcher (uses Tagged)") +#endif + +CREATE_SIM_OBJECT(BaseCache) +{ + string name = getInstanceName(); + int numSets = size / (assoc * block_size); + string pf_policy = prefetch_policy; + if (subblock_size == 0) { + subblock_size = block_size; + } + + // Build BaseCache param object + BaseCache::Params base_params(addr_range, latency, + block_size, max_miss_count); + + //Warnings about prefetcher policy + if (pf_policy == "none" && (prefetch_miss || prefetch_access)) { + panic("With no prefetcher, you shouldn't prefetch from" + " either miss or access stream\n"); + } + if ((pf_policy == "tagged" || pf_policy == "stride" || + pf_policy == "ghb") && !(prefetch_miss || prefetch_access)) { + warn("With this prefetcher you should chose a prefetch" + " stream (miss or access)\nNo Prefetching will occur\n"); + } + if ((pf_policy == "tagged" || pf_policy == "stride" || + pf_policy == "ghb") && prefetch_miss && prefetch_access) { + panic("Can't do prefetches from both miss and access" + " stream\n"); + } + if (pf_policy != "tagged" && pf_policy != "stride" && + pf_policy != "ghb" && pf_policy != "none") { + panic("Unrecognized form of a prefetcher: %s, try using" + "['none','stride','tagged','ghb']\n", pf_policy); + } + +#if defined(USE_CACHE_IIC) + // Build IIC params + IIC::Params iic_params; + iic_params.size = size; + iic_params.numSets = numSets; + iic_params.blkSize = block_size; + iic_params.assoc = assoc; + iic_params.hashDelay = hash_delay; + iic_params.hitLatency = latency; + iic_params.rp = repl; + iic_params.subblockSize = subblock_size; +#else + const void *repl = NULL; +#endif + + if (mshrs == 1 /*|| out_bus->doEvents() == false*/) { + BlockingBuffer *mq = new BlockingBuffer(true); + BUILD_COHERENCE(BlockingBuffer); + } else { + MissQueue *mq = new MissQueue(mshrs, tgts_per_mshr, write_buffers, + true, prefetch_miss); + BUILD_COHERENCE(MissQueue); + } + return NULL; +} + +REGISTER_SIM_OBJECT("BaseCache", BaseCache) + + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh new file mode 100644 index 000000000..a447ae3d5 --- /dev/null +++ b/src/mem/cache/cache_impl.hh @@ -0,0 +1,660 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Dave Greene + * Nathan Binkert + */ + +/** + * @file + * Cache definitions. + */ + +#include <assert.h> +#include <math.h> + +#include <cassert> +#include <iostream> +#include <string> + +#include "sim/host.hh" +#include "base/misc.hh" +#include "cpu/smt.hh" + +#include "mem/cache/cache.hh" +#include "mem/cache/cache_blk.hh" +#include "mem/cache/miss/mshr.hh" +#include "mem/cache/prefetch/prefetcher.hh" + +#include "sim/sim_events.hh" // for SimExitEvent + +using namespace std; + +template<class TagStore, class Buffering, class Coherence> +bool +Cache<TagStore,Buffering,Coherence>:: +doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) +{ + if (isCpuSide) + { + access(pkt); + } + else + { + if (pkt->isResponse()) + handleResponse(pkt); + else + snoop(pkt); + } + return true; //Deal with blocking.... +} + +template<class TagStore, class Buffering, class Coherence> +Tick +Cache<TagStore,Buffering,Coherence>:: +doAtomicAccess(Packet *pkt, bool isCpuSide) +{ + if (isCpuSide) + { + probe(pkt, true); + //TEMP ALWAYS SUCCES FOR NOW + pkt->result = Packet::Success; + } + else + { + if (pkt->isResponse()) + handleResponse(pkt); + else + snoopProbe(pkt, true); + } + //Fix this timing info + return hitLatency; +} + +template<class TagStore, class Buffering, class Coherence> +void +Cache<TagStore,Buffering,Coherence>:: +doFunctionalAccess(Packet *pkt, bool isCpuSide) +{ + if (isCpuSide) + { + //TEMP USE CPU?THREAD 0 0 + pkt->req->setThreadContext(0,0); + probe(pkt, true); + //TEMP ALWAYS SUCCESFUL FOR NOW + pkt->result = Packet::Success; + } + else + { + if (pkt->isResponse()) + handleResponse(pkt); + else + snoopProbe(pkt, true); + } +} + +template<class TagStore, class Buffering, class Coherence> +void +Cache<TagStore,Buffering,Coherence>:: +recvStatusChange(Port::Status status, bool isCpuSide) +{ + +} + + +template<class TagStore, class Buffering, class Coherence> +Cache<TagStore,Buffering,Coherence>:: +Cache(const std::string &_name, + Cache<TagStore,Buffering,Coherence>::Params ¶ms) + : BaseCache(_name, params.baseParams), + prefetchAccess(params.prefetchAccess), + tags(params.tags), missQueue(params.missQueue), + coherence(params.coherence), prefetcher(params.prefetcher), + doCopy(params.doCopy), blockOnCopy(params.blockOnCopy) +{ +//FIX BUS POINTERS +// if (params.in == NULL) { + topLevelCache = true; +// } +//PLEASE FIX THIS, BUS SIZES NOT BEING USED + tags->setCache(this, blkSize, 1/*params.out->width, params.out->clockRate*/); + tags->setPrefetcher(prefetcher); + missQueue->setCache(this); + missQueue->setPrefetcher(prefetcher); + coherence->setCache(this); + prefetcher->setCache(this); + prefetcher->setTags(tags); + prefetcher->setBuffer(missQueue); +#if 0 + invalidatePkt = new Packet; + invalidatePkt->cmd = Packet::InvalidateReq; +#endif +} + +template<class TagStore, class Buffering, class Coherence> +void +Cache<TagStore,Buffering,Coherence>::regStats() +{ + BaseCache::regStats(); + tags->regStats(name()); + missQueue->regStats(name()); + coherence->regStats(name()); + prefetcher->regStats(name()); +} + +template<class TagStore, class Buffering, class Coherence> +bool +Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) +{ +//@todo Add back in MemDebug Calls +// MemDebug::cacheAccess(pkt); + BlkType *blk = NULL; + PacketList writebacks; + int size = blkSize; + int lat = hitLatency; + if (prefetchAccess) { + //We are determining prefetches on access stream, call prefetcher + prefetcher->handleMiss(pkt, curTick); + } + if (!pkt->req->isUncacheable()) { + if (pkt->isInvalidate() && !pkt->isRead() + && !pkt->isWrite()) { + //Upgrade or Invalidate + //Look into what happens if two slave caches on bus + DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(), + pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1), + pkt->getAddr() & ~((Addr)blkSize - 1)); + + //@todo Should this return latency have the hit latency in it? +// respond(pkt,curTick+lat); + pkt->flags |= SATISFIED; +// return MA_HIT; //@todo, return values + return true; + } + blk = tags->handleAccess(pkt, lat, writebacks); + } else { + size = pkt->getSize(); + } + // If this is a block size write/hint (WH64) allocate the block here + // if the coherence protocol allows it. + /** @todo make the fast write alloc (wh64) work with coherence. */ + /** @todo Do we want to do fast writes for writebacks as well? */ + if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() && + (pkt->cmd == Packet::WriteReq || pkt->cmd == Packet::WriteInvalidateReq) ) { + // not outstanding misses, can do this + MSHR* outstanding_miss = missQueue->findMSHR(pkt->getAddr(), pkt->req->getAsid()); + if (pkt->cmd == Packet::WriteInvalidateReq || !outstanding_miss) { + if (outstanding_miss) { + warn("WriteInv doing a fastallocate" + "with an outstanding miss to the same address\n"); + } + blk = tags->handleFill(NULL, pkt, BlkValid | BlkWritable, + writebacks); + ++fastWrites; + } + } + while (!writebacks.empty()) { + missQueue->doWriteback(writebacks.front()); + writebacks.pop_front(); + } + DPRINTF(Cache, "%s %d %x %s blk_addr: %x pc %x\n", pkt->cmdString(), + pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", + pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC()); + if (blk) { + // Hit + hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + // clear dirty bit if write through + if (pkt->needsResponse()) + respond(pkt, curTick+lat); +// return MA_HIT; + return true; + } + + // Miss + if (!pkt->req->isUncacheable()) { + misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + /** @todo Move miss count code into BaseCache */ + if (missCount) { + --missCount; + if (missCount == 0) + new SimLoopExitEvent(curTick, "A cache reached the maximum miss count"); + } + } + missQueue->handleMiss(pkt, size, curTick + hitLatency); +// return MA_CACHE_MISS; + return true; +} + + +template<class TagStore, class Buffering, class Coherence> +Packet * +Cache<TagStore,Buffering,Coherence>::getPacket() +{ + Packet * pkt = missQueue->getPacket(); + if (pkt) { + if (!pkt->req->isUncacheable()) { + if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][pkt->req->getThreadNum()]++; + BlkType *blk = tags->findBlock(pkt); + Packet::Command cmd = coherence->getBusCmd(pkt->cmd, + (blk)? blk->status : 0); + missQueue->setBusCmd(pkt, cmd); + } + } + + assert(!doMasterRequest() || missQueue->havePending()); + assert(!pkt || pkt->time <= curTick); + return pkt; +} + +template<class TagStore, class Buffering, class Coherence> +void +Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, bool success) +{ + if (success) { + missQueue->markInService(pkt); + //Temp Hack for UPGRADES + if (pkt->cmd == Packet::UpgradeReq) { + handleResponse(pkt); + } + } else if (pkt && !pkt->req->isUncacheable()) { + missQueue->restoreOrigCmd(pkt); + } +} + +template<class TagStore, class Buffering, class Coherence> +void +Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt) +{ + BlkType *blk = NULL; + if (pkt->senderState) { +// MemDebug::cacheResponse(pkt); + DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(), + pkt->getAddr() & (((ULL(1))<<48)-1)); + + if (pkt->isCacheFill() && !pkt->isNoAllocate()) { + blk = tags->findBlock(pkt); + CacheBlk::State old_state = (blk) ? blk->status : 0; + PacketList writebacks; + blk = tags->handleFill(blk, (MSHR*)pkt->senderState, + coherence->getNewState(pkt,old_state), + writebacks); + while (!writebacks.empty()) { + missQueue->doWriteback(writebacks.front()); + } + } + missQueue->handleResponse(pkt, curTick + hitLatency); + } +} + +template<class TagStore, class Buffering, class Coherence> +void +Cache<TagStore,Buffering,Coherence>::pseudoFill(Addr addr, int asid) +{ + // Need to temporarily move this blk into MSHRs + MSHR *mshr = missQueue->allocateTargetList(addr, asid); + int lat; + PacketList dummy; + // Read the data into the mshr + BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); + assert(dummy.empty()); + assert(mshr->pkt->flags & SATISFIED); + // can overload order since it isn't used on non pending blocks + mshr->order = blk->status; + // temporarily remove the block from the cache. + tags->invalidateBlk(addr, asid); +} + +template<class TagStore, class Buffering, class Coherence> +void +Cache<TagStore,Buffering,Coherence>::pseudoFill(MSHR *mshr) +{ + // Need to temporarily move this blk into MSHRs + assert(mshr->pkt->cmd == Packet::ReadReq); + int lat; + PacketList dummy; + // Read the data into the mshr + BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); + assert(dummy.empty()); + assert(mshr->pkt->flags & SATISFIED); + // can overload order since it isn't used on non pending blocks + mshr->order = blk->status; + // temporarily remove the block from the cache. + tags->invalidateBlk(mshr->pkt->getAddr(), mshr->pkt->req->getAsid()); +} + + +template<class TagStore, class Buffering, class Coherence> +Packet * +Cache<TagStore,Buffering,Coherence>::getCoherencePacket() +{ + return coherence->getPacket(); +} + + +template<class TagStore, class Buffering, class Coherence> +void +Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) +{ + + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); + BlkType *blk = tags->findBlock(pkt); + MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid()); + if (isTopLevel() && coherence->hasProtocol()) { //@todo Move this into handle bus req + //If we find an mshr, and it is in service, we need to NACK or invalidate + if (mshr) { + if (mshr->inService) { + if ((mshr->pkt->isInvalidate() || !mshr->pkt->isCacheFill()) + && (pkt->cmd != Packet::InvalidateReq && pkt->cmd != Packet::WriteInvalidateReq)) { + //If the outstanding request was an invalidate (upgrade,readex,..) + //Then we need to ACK the request until we get the data + //Also NACK if the outstanding request is not a cachefill (writeback) + pkt->flags |= NACKED_LINE; + return; + } + else { + //The supplier will be someone else, because we are waiting for + //the data. This should cause this cache to be forced to go to + //the shared state, not the exclusive even though the shared line + //won't be asserted. But for now we will just invlidate ourselves + //and allow the other cache to go into the exclusive state. + //@todo Make it so a read to a pending read doesn't invalidate. + //@todo Make it so that a read to a pending read can't be exclusive now. + + //Set the address so find match works + invalidatePkt->addrOverride(pkt->getAddr()); + + //Append the invalidate on + missQueue->addTarget(mshr,invalidatePkt); + DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1)); + return; + } + } + } + //We also need to check the writeback buffers and handle those + std::vector<MSHR *> writebacks; + if (missQueue->findWrites(blk_addr, pkt->req->getAsid(), writebacks)) { + DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1)); + + //Look through writebacks for any non-uncachable writes, use that + for (int i=0; i<writebacks.size(); i++) { + mshr = writebacks[i]; + + if (!mshr->pkt->req->isUncacheable()) { + if (pkt->isRead()) { + //Only Upgrades don't get here + //Supply the data + pkt->flags |= SATISFIED; + + //If we are in an exclusive protocol, make it ask again + //to get write permissions (upgrade), signal shared + pkt->flags |= SHARED_LINE; + + assert(pkt->isRead()); + Addr offset = pkt->getAddr() & ~(blkSize - 1); + assert(offset < blkSize); + assert(pkt->getSize() <= blkSize); + assert(offset + pkt->getSize() <=blkSize); + memcpy(pkt->getPtr<uint8_t>(), mshr->pkt->getPtr<uint8_t>() + offset, pkt->getSize()); + + respondToSnoop(pkt); + } + + if (pkt->isInvalidate()) { + //This must be an upgrade or other cache will take ownership + missQueue->markInService(mshr->pkt); + } + return; + } + } + } + } + CacheBlk::State new_state; + bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); + if (satisfy) { + tags->handleSnoop(blk, new_state, pkt); + respondToSnoop(pkt); + return; + } + tags->handleSnoop(blk, new_state); +} + +template<class TagStore, class Buffering, class Coherence> +void +Cache<TagStore,Buffering,Coherence>::snoopResponse(Packet * &pkt) +{ + //Need to handle the response, if NACKED + if (pkt->flags & NACKED_LINE) { + //Need to mark it as not in service, and retry for bus + assert(0); //Yeah, we saw a NACK come through + + //For now this should never get called, we return false when we see a NACK + //instead, by doing this we allow the bus_blocked mechanism to handle the retry + //For now it retrys in just 2 cycles, need to figure out how to change that + //Eventually we will want to also have success come in as a parameter + //Need to make sure that we handle the functionality that happens on successufl + //return of the sendAddr function + } +} + +template<class TagStore, class Buffering, class Coherence> +void +Cache<TagStore,Buffering,Coherence>::invalidateBlk(Addr addr, int asid) +{ + tags->invalidateBlk(addr,asid); +} + + +/** + * @todo Fix to not assume write allocate + */ +template<class TagStore, class Buffering, class Coherence> +Tick +Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update) +{ +// MemDebug::cacheProbe(pkt); + if (!pkt->req->isUncacheable()) { + if (pkt->isInvalidate() && !pkt->isRead() + && !pkt->isWrite()) { + //Upgrade or Invalidate, satisfy it, don't forward + DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(), + pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1), + pkt->getAddr() & ~((Addr)blkSize - 1)); + pkt->flags |= SATISFIED; + return 0; + } + } + + PacketList writebacks; + int lat; + BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); + + if (!blk) { + // Need to check for outstanding misses and writes + Addr blk_addr = pkt->getAddr() & ~(blkSize - 1); + + // There can only be one matching outstanding miss. + MSHR* mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid()); + + // There can be many matching outstanding writes. + vector<MSHR*> writes; + missQueue->findWrites(blk_addr, pkt->req->getAsid(), writes); + + if (!update) { + memSidePort->sendFunctional(pkt); + // Check for data in MSHR and writebuffer. + if (mshr) { + warn("Found outstanding miss on an non-update probe"); + MSHR::TargetList *targets = mshr->getTargetList(); + MSHR::TargetList::iterator i = targets->begin(); + MSHR::TargetList::iterator end = targets->end(); + for (; i != end; ++i) { + Packet * target = *i; + // If the target contains data, and it overlaps the + // probed request, need to update data + if (target->isWrite() && target->intersect(pkt)) { + uint8_t* pkt_data; + uint8_t* write_data; + int data_size; + if (target->getAddr() < pkt->getAddr()) { + int offset = pkt->getAddr() - target->getAddr(); + pkt_data = pkt->getPtr<uint8_t>(); + write_data = target->getPtr<uint8_t>() + offset; + data_size = target->getSize() - offset; + assert(data_size > 0); + if (data_size > pkt->getSize()) + data_size = pkt->getSize(); + } else { + int offset = target->getAddr() - pkt->getAddr(); + pkt_data = pkt->getPtr<uint8_t>() + offset; + write_data = target->getPtr<uint8_t>(); + data_size = pkt->getSize() - offset; + assert(data_size > pkt->getSize()); + if (data_size > target->getSize()) + data_size = target->getSize(); + } + + if (pkt->isWrite()) { + memcpy(pkt_data, write_data, data_size); + } else { + memcpy(write_data, pkt_data, data_size); + } + } + } + } + for (int i = 0; i < writes.size(); ++i) { + Packet * write = writes[i]->pkt; + if (write->intersect(pkt)) { + warn("Found outstanding write on an non-update probe"); + uint8_t* pkt_data; + uint8_t* write_data; + int data_size; + if (write->getAddr() < pkt->getAddr()) { + int offset = pkt->getAddr() - write->getAddr(); + pkt_data = pkt->getPtr<uint8_t>(); + write_data = write->getPtr<uint8_t>() + offset; + data_size = write->getSize() - offset; + assert(data_size > 0); + if (data_size > pkt->getSize()) + data_size = pkt->getSize(); + } else { + int offset = write->getAddr() - pkt->getAddr(); + pkt_data = pkt->getPtr<uint8_t>() + offset; + write_data = write->getPtr<uint8_t>(); + data_size = pkt->getSize() - offset; + assert(data_size > pkt->getSize()); + if (data_size > write->getSize()) + data_size = write->getSize(); + } + + if (pkt->isWrite()) { + memcpy(pkt_data, write_data, data_size); + } else { + memcpy(write_data, pkt_data, data_size); + } + + } + } + return 0; + } else { + // update the cache state and statistics + if (mshr || !writes.empty()){ + // Can't handle it, return pktuest unsatisfied. + return 0; + } + if (!pkt->req->isUncacheable()) { + // Fetch the cache block to fill + BlkType *blk = tags->findBlock(pkt); + Packet::Command temp_cmd = coherence->getBusCmd(pkt->cmd, + (blk)? blk->status : 0); + + Packet * busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize); + + busPkt->allocate(); + + busPkt->time = curTick; + + lat = memSidePort->sendAtomic(busPkt); + +/* if (!(busPkt->flags & SATISFIED)) { + // blocked at a higher level, just return + return 0; + } + +*/ misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + + CacheBlk::State old_state = (blk) ? blk->status : 0; + tags->handleFill(blk, busPkt, + coherence->getNewState(busPkt, old_state), + writebacks, pkt); + // Handle writebacks if needed + while (!writebacks.empty()){ + memSidePort->sendAtomic(writebacks.front()); + writebacks.pop_front(); + } + return lat + hitLatency; + } else { + return memSidePort->sendAtomic(pkt); + } + } + } else { + // There was a cache hit. + // Handle writebacks if needed + while (!writebacks.empty()){ + memSidePort->sendAtomic(writebacks.front()); + writebacks.pop_front(); + } + + if (update) { + hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + } else if (pkt->isWrite()) { + // Still need to change data in all locations. + return memSidePort->sendAtomic(pkt); + } + return curTick + lat; + } + fatal("Probe not handled.\n"); + return 0; +} + +template<class TagStore, class Buffering, class Coherence> +Tick +Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt, bool update) +{ + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); + BlkType *blk = tags->findBlock(pkt); + MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid()); + CacheBlk::State new_state = 0; + bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); + if (satisfy) { + tags->handleSnoop(blk, new_state, pkt); + return hitLatency; + } + tags->handleSnoop(blk, new_state); + return 0; +} + diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc new file mode 100644 index 000000000..bcf3ce9c5 --- /dev/null +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -0,0 +1,567 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + * Ron Dreslinski + */ + +/** + * @file + * Definitions of CoherenceProtocol. + */ + +#include <string> + +#include "base/misc.hh" +#include "mem/cache/miss/mshr.hh" +#include "mem/cache/cache.hh" +#include "mem/cache/coherence/coherence_protocol.hh" +#include "sim/builder.hh" + +using namespace std; + + +CoherenceProtocol::StateTransition::StateTransition() + : busCmd(Packet::InvalidCmd), newState(-1), snoopFunc(invalidTransition) +{ +} + + +void +CoherenceProtocol::regStats() +{ + // Even though we count all the possible transitions in the + // requestCount and snoopCount arrays, most of these are invalid, + // so we just select the interesting ones to print here. + + requestCount[Invalid][Packet::ReadReq] + .name(name() + ".read_invalid") + .desc("read misses to invalid blocks") + ; + + requestCount[Invalid][Packet::WriteReq] + .name(name() +".write_invalid") + .desc("write misses to invalid blocks") + ; + + requestCount[Invalid][Packet::SoftPFReq] + .name(name() +".swpf_invalid") + .desc("soft prefetch misses to invalid blocks") + ; + + requestCount[Invalid][Packet::HardPFReq] + .name(name() +".hwpf_invalid") + .desc("hard prefetch misses to invalid blocks") + ; + + requestCount[Shared][Packet::WriteReq] + .name(name() + ".write_shared") + .desc("write misses to shared blocks") + ; + + requestCount[Owned][Packet::WriteReq] + .name(name() + ".write_owned") + .desc("write misses to owned blocks") + ; + + snoopCount[Shared][Packet::ReadReq] + .name(name() + ".snoop_read_shared") + .desc("read snoops on shared blocks") + ; + + snoopCount[Shared][Packet::ReadExReq] + .name(name() + ".snoop_readex_shared") + .desc("readEx snoops on shared blocks") + ; + + snoopCount[Shared][Packet::UpgradeReq] + .name(name() + ".snoop_upgrade_shared") + .desc("upgradee snoops on shared blocks") + ; + + snoopCount[Modified][Packet::ReadReq] + .name(name() + ".snoop_read_modified") + .desc("read snoops on modified blocks") + ; + + snoopCount[Modified][Packet::ReadExReq] + .name(name() + ".snoop_readex_modified") + .desc("readEx snoops on modified blocks") + ; + + snoopCount[Owned][Packet::ReadReq] + .name(name() + ".snoop_read_owned") + .desc("read snoops on owned blocks") + ; + + snoopCount[Owned][Packet::ReadExReq] + .name(name() + ".snoop_readex_owned") + .desc("readEx snoops on owned blocks") + ; + + snoopCount[Owned][Packet::UpgradeReq] + .name(name() + ".snoop_upgrade_owned") + .desc("upgrade snoops on owned blocks") + ; + + snoopCount[Exclusive][Packet::ReadReq] + .name(name() + ".snoop_read_exclusive") + .desc("read snoops on exclusive blocks") + ; + + snoopCount[Exclusive][Packet::ReadExReq] + .name(name() + ".snoop_readex_exclusive") + .desc("readEx snoops on exclusive blocks") + ; + + snoopCount[Shared][Packet::InvalidateReq] + .name(name() + ".snoop_inv_shared") + .desc("Invalidate snoops on shared blocks") + ; + + snoopCount[Owned][Packet::InvalidateReq] + .name(name() + ".snoop_inv_owned") + .desc("Invalidate snoops on owned blocks") + ; + + snoopCount[Exclusive][Packet::InvalidateReq] + .name(name() + ".snoop_inv_exclusive") + .desc("Invalidate snoops on exclusive blocks") + ; + + snoopCount[Modified][Packet::InvalidateReq] + .name(name() + ".snoop_inv_modified") + .desc("Invalidate snoops on modified blocks") + ; + + snoopCount[Invalid][Packet::InvalidateReq] + .name(name() + ".snoop_inv_invalid") + .desc("Invalidate snoops on invalid blocks") + ; + + snoopCount[Shared][Packet::WriteInvalidateReq] + .name(name() + ".snoop_writeinv_shared") + .desc("WriteInvalidate snoops on shared blocks") + ; + + snoopCount[Owned][Packet::WriteInvalidateReq] + .name(name() + ".snoop_writeinv_owned") + .desc("WriteInvalidate snoops on owned blocks") + ; + + snoopCount[Exclusive][Packet::WriteInvalidateReq] + .name(name() + ".snoop_writeinv_exclusive") + .desc("WriteInvalidate snoops on exclusive blocks") + ; + + snoopCount[Modified][Packet::WriteInvalidateReq] + .name(name() + ".snoop_writeinv_modified") + .desc("WriteInvalidate snoops on modified blocks") + ; + + snoopCount[Invalid][Packet::WriteInvalidateReq] + .name(name() + ".snoop_writeinv_invalid") + .desc("WriteInvalidate snoops on invalid blocks") + ; +} + + +bool +CoherenceProtocol::invalidateTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, MSHR *mshr, + CacheBlk::State & new_state) +{ + // invalidate the block + new_state = (blk->status & ~stateMask) | Invalid; + return false; +} + + +bool +CoherenceProtocol::supplyTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state + ) +{ + return true; +} + + +bool +CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Shared; + pkt->flags |= SHARED_LINE; + return supplyTrans(cache, pkt, blk, mshr, new_state); +} + + +bool +CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Owned; + pkt->flags |= SHARED_LINE; + return supplyTrans(cache, pkt, blk, mshr, new_state); +} + + +bool +CoherenceProtocol::supplyAndInvalidateTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Invalid; + return supplyTrans(cache, pkt, blk, mshr, new_state); +} + +bool +CoherenceProtocol::assertShared(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Shared; + pkt->flags |= SHARED_LINE; + return false; +} + +CoherenceProtocol::CoherenceProtocol(const string &name, + const string &protocol, + const bool doUpgrades) + : SimObject(name) +{ + if ((protocol == "mosi" || protocol == "moesi") && !doUpgrades) { + cerr << "CoherenceProtocol: ownership protocols require upgrade transactions" + << "(write miss on owned block generates ReadExcl, which will clobber dirty block)" + << endl; + fatal(""); + } + + Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq; + Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeResp : Packet::ReadExResp; + +//@todo add in hardware prefetch to this list + if (protocol == "msi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); + + // on response to given request: specify new state + transitionTable[Invalid][Packet::ReadResp].onResponse(Shared); + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans); + //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + + if (doUpgrades) { + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); + } + } + + else if(protocol == "mesi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); + + // on response to given request: specify new state + transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive); + //It will move into shared if the shared line is asserted in the + //getNewState function + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans); + //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + + if (doUpgrades) { + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); + } + } + + else if(protocol == "mosi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); + transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); + + // on response to given request: specify new state + transitionTable[Invalid][Packet::ReadResp].onResponse(Shared); + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); + transitionTable[Owned][writeToSharedResp].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans); + //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + } + + else if(protocol == "moesi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); + transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); + + // on response to given request: specify new state + transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive); + //It will move into shared if the shared line is asserted in the + //getNewState function + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); + transitionTable[Owned][writeToSharedResp].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans); + //Transitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + } + + else { + cerr << "CoherenceProtocol: unrecognized protocol " << protocol + << endl; + fatal(""); + } +} + + +Packet::Command +CoherenceProtocol::getBusCmd(Packet::Command cmdIn, CacheBlk::State state, + MSHR *mshr) +{ + state &= stateMask; + int cmd_idx = (int) cmdIn; + + assert(0 <= state && state <= stateMax); + assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); + + Packet::Command cmdOut = transitionTable[state][cmd_idx].busCmd; + + assert(cmdOut != Packet::InvalidCmd); + + ++requestCount[state][cmd_idx]; + + return cmdOut; +} + + +CacheBlk::State +CoherenceProtocol::getNewState(Packet * &pkt, CacheBlk::State oldState) +{ + CacheBlk::State state = oldState & stateMask; + int cmd_idx = pkt->cmdToIndex(); + + assert(0 <= state && state <= stateMax); + assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); + + CacheBlk::State newState = transitionTable[state][cmd_idx].newState; + + //Check if it's exclusive and the shared line was asserted, + //then goto shared instead + if (newState == Exclusive && (pkt->flags & SHARED_LINE)) { + newState = Shared; + } + + assert(newState != -1); + + //Make sure not to loose any other state information + newState = (oldState & ~stateMask) | newState; + return newState; +} + + +bool +CoherenceProtocol::handleBusRequest(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + if (blk == NULL) { + // nothing to do if we don't have a block + return false; + } + + CacheBlk::State state = blk->status & stateMask; + int cmd_idx = pkt->cmdToIndex(); + + assert(0 <= state && state <= stateMax); + assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); + +// assert(mshr == NULL); // can't currently handle outstanding requests + //Check first if MSHR, and also insure, if there is one, that it is not in service + assert(!mshr || mshr->inService == 0); + ++snoopCount[state][cmd_idx]; + + bool ret = transitionTable[state][cmd_idx].snoopFunc(cache, pkt, blk, mshr, + new_state); + + + + return ret; +} + +bool +CoherenceProtocol::nullTransition(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, MSHR *mshr, + CacheBlk::State & new_state) +{ + // do nothing + if (blk) + new_state = blk->status; + return false; +} + + +bool +CoherenceProtocol::invalidTransition(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, MSHR *mshr, + CacheBlk::State & new_state) +{ + panic("Invalid transition"); + return false; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol) + + Param<string> protocol; + Param<bool> do_upgrades; + +END_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol) + + INIT_PARAM(protocol, "name of coherence protocol"), + INIT_PARAM_DFLT(do_upgrades, "use upgrade transactions?", true) + +END_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol) + + +CREATE_SIM_OBJECT(CoherenceProtocol) +{ + return new CoherenceProtocol(getInstanceName(), protocol, + do_upgrades); +} + +REGISTER_SIM_OBJECT("CoherenceProtocol", CoherenceProtocol) + +#endif // DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh new file mode 100644 index 000000000..21351ace4 --- /dev/null +++ b/src/mem/cache/coherence/coherence_protocol.hh @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + * Steve Reinhardt + */ + +/** + * @file + * Declaration of CoherenceProcotol a basic coherence policy. + */ +#ifndef __COHERENCE_PROTOCOL_HH__ +#define __COHERENCE_PROTOCOL_HH__ + +#include <string> + +#include "sim/sim_object.hh" +#include "mem/packet.hh" +#include "mem/cache/cache_blk.hh" +#include "base/statistics.hh" + +class BaseCache; +class MSHR; + +/** + * A simple coherence policy for the memory hierarchy. Currently implements + * MSI, MESI, and MOESI protocols. + */ +class CoherenceProtocol : public SimObject +{ + public: + /** + * Contruct and initialize this policy. + * @param name The name of this policy. + * @param protocol The string representation of the protocol to use. + * @param doUpgrades True if bus upgrades should be used. + */ + CoherenceProtocol(const std::string &name, const std::string &protocol, + const bool doUpgrades); + + /** + * Destructor. + */ + virtual ~CoherenceProtocol() {}; + + /** + * Register statistics + */ + virtual void regStats(); + + /** + * Get the proper bus command for the given command and status. + * @param cmd The request's command. + * @param status The current state of the cache block. + * @param mshr The MSHR matching the request. + * @return The proper bus command, as determined by the protocol. + */ + Packet::Command getBusCmd(Packet::Command cmd, CacheBlk::State status, + MSHR *mshr = NULL); + + /** + * Return the proper state given the current state and the bus response. + * @param req The bus response. + * @param oldState The current block state. + * @return The new state. + */ + CacheBlk::State getNewState(Packet * &pkt, + CacheBlk::State oldState); + + /** + * Handle snooped bus requests. + * @param cache The cache that snooped the request. + * @param req The snooped bus request. + * @param blk The cache block corresponding to the request, if any. + * @param mshr The MSHR corresponding to the request, if any. + * @param new_state The new coherence state of the block. + * @return True if the request should be satisfied locally. + */ + bool handleBusRequest(BaseCache *cache, Packet * &pkt, CacheBlk *blk, + MSHR *mshr, CacheBlk::State &new_state); + + protected: + /** Snoop function type. */ + typedef bool (*SnoopFuncType)(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + // + // Standard snoop transition functions + // + + /** + * Do nothing transition. + */ + static bool nullTransition(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Invalid transition, basically panic. + */ + static bool invalidTransition(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Invalidate block, move to Invalid state. + */ + static bool invalidateTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Supply data, no state transition. + */ + static bool supplyTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Supply data and go to Shared state. + */ + static bool supplyAndGotoSharedTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Supply data and go to Owned state. + */ + static bool supplyAndGotoOwnedTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Invalidate block, supply data, and go to Invalid state. + */ + static bool supplyAndInvalidateTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Assert the shared line for a block that is shared/exclusive. + */ + static bool assertShared(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Definition of protocol state transitions. + */ + class StateTransition + { + friend class CoherenceProtocol; + + /** The bus command of this transition. */ + Packet::Command busCmd; + /** The state to transition to. */ + int newState; + /** The snoop function for this transition. */ + SnoopFuncType snoopFunc; + + /** + * Constructor, defaults to invalid transition. + */ + StateTransition(); + + /** + * Initialize bus command. + * @param cmd The bus command to use. + */ + void onRequest(Packet::Command cmd) + { + busCmd = cmd; + } + + /** + * Set the transition state. + * @param s The new state. + */ + void onResponse(CacheBlk::State s) + { + newState = s; + } + + /** + * Initialize the snoop function. + * @param f The new snoop function. + */ + void onSnoop(SnoopFuncType f) + { + snoopFunc = f; + } + }; + + friend class CoherenceProtocol::StateTransition; + + /** Mask to select status bits relevant to coherence protocol. */ + const static CacheBlk::State + stateMask = BlkValid | BlkWritable | BlkDirty; + + /** The Modified (M) state. */ + const static CacheBlk::State + Modified = BlkValid | BlkWritable | BlkDirty; + /** The Owned (O) state. */ + const static CacheBlk::State + Owned = BlkValid | BlkDirty; + /** The Exclusive (E) state. */ + const static CacheBlk::State + Exclusive = BlkValid | BlkWritable; + /** The Shared (S) state. */ + const static CacheBlk::State + Shared = BlkValid; + /** The Invalid (I) state. */ + const static CacheBlk::State + Invalid = 0; + + /** + * Maximum state encoding value (used to size transition lookup + * table). Could be more than number of states, depends on + * encoding of status bits. + */ + const static int stateMax = stateMask; + + /** + * The table of all possible transitions, organized by starting state and + * request command. + */ + StateTransition transitionTable[stateMax+1][NUM_MEM_CMDS]; + + /** + * @addtogroup CoherenceStatistics + * @{ + */ + /** + * State accesses from parent cache. + */ + Stats::Scalar<> requestCount[stateMax+1][NUM_MEM_CMDS]; + /** + * State accesses from snooped requests. + */ + Stats::Scalar<> snoopCount[stateMax+1][NUM_MEM_CMDS]; + /** + * @} + */ +}; + +#endif // __COHERENCE_PROTOCOL_HH__ diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh new file mode 100644 index 000000000..ca9d18beb --- /dev/null +++ b/src/mem/cache/coherence/simple_coherence.hh @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + */ + +/** + * @file + * Declaration of a simple coherence policy. + */ + +#ifndef __SIMPLE_COHERENCE_HH__ +#define __SIMPLE_COHERENCE_HH__ + +#include <string> + +#include "mem/packet.hh" +#include "mem/cache/cache_blk.hh" +#include "mem/cache/miss/mshr_queue.hh" +#include "mem/cache/coherence/coherence_protocol.hh" + +class BaseCache; + +/** + * A simple MP coherence policy. This policy assumes an atomic bus and only one + * level of cache. + */ +class SimpleCoherence +{ + protected: + /** Pointer to the parent cache. */ + BaseCache *cache; + /** Pointer to the coherence protocol. */ + CoherenceProtocol *protocol; + + public: + /** + * Construct and initialize this coherence policy. + * @param _protocol The coherence protocol to use. + */ + SimpleCoherence(CoherenceProtocol *_protocol) + : protocol(_protocol) + { + } + + /** + * Set the pointer to the parent cache. + * @param _cache The parent cache. + */ + void setCache(BaseCache *_cache) + { + cache = _cache; + } + + /** + * Register statistics. + * @param name The name to prepend to stat descriptions. + */ + void regStats(const std::string &name) + { + } + + /** + * This policy does not forward invalidates, return NULL. + * @return NULL. + */ + Packet * getPacket() + { + return NULL; + } + + /** + * Return the proper state given the current state and the bus response. + * @param req The bus response. + * @param current The current block state. + * @return The new state. + */ + CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State current) + { + return protocol->getNewState(pkt, current); + } + + /** + * Handle snooped bus requests. + * @param req The snooped bus request. + * @param blk The cache block corresponding to the request, if any. + * @param mshr The MSHR corresponding to the request, if any. + * @param new_state Return the new state for the block. + */ + bool handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, + CacheBlk::State &new_state) + { +// assert(mshr == NULL); +//Got rid of, there could be an MSHR, but it can't be in service + if (blk != NULL) + { + if (pkt->cmd != Packet::Writeback) { + return protocol->handleBusRequest(cache, pkt, blk, mshr, + new_state); + } + else { //It is a writeback, must be ownership protocol, just keep state + new_state = blk->status; + } + } + return false; + } + + /** + * Get the proper bus command for the given command and status. + * @param cmd The request's command. + * @param state The current state of the cache block. + * @return The proper bus command, as determined by the protocol. + */ + Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state) + { + if (cmd == Packet::Writeback) return Packet::Writeback; + return protocol->getBusCmd(cmd, state); + } + + /** + * Return true if this coherence policy can handle fast cache writes. + */ + bool allowFastWrites() { return false; } + + bool hasProtocol() { return true; } +}; + +#endif //__SIMPLE_COHERENCE_HH__ + + + + + + + + diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc new file mode 100644 index 000000000..5ab706269 --- /dev/null +++ b/src/mem/cache/coherence/uni_coherence.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +#include "mem/cache/coherence/uni_coherence.hh" +#include "mem/cache/base_cache.hh" + +#include "base/trace.hh" + +using namespace std; + +UniCoherence::UniCoherence() + : cshrs(50) +{ +} + +Packet * +UniCoherence::getPacket() +{ + bool unblock = cshrs.isFull(); + Packet* pkt = cshrs.getReq(); + cshrs.markInService((MSHR*)pkt->senderState); + if (!cshrs.havePending()) { + cache->clearSlaveRequest(Request_Coherence); + } + if (unblock) { + //since CSHRs are always used as buffers, should always get rid of one + assert(!cshrs.isFull()); + cache->clearBlocked(Blocked_Coherence); + } + return pkt; +} + +/** + * @todo add support for returning slave requests, not doing them here. + */ +bool +UniCoherence::handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, + CacheBlk::State &new_state) +{ + new_state = 0; + if (pkt->isInvalidate()) { + DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n", + pkt->getAddr(), blk); + if (!cache->isTopLevel()) { + // Forward to other caches + Packet * tmp = new Packet(pkt->req, Packet::InvalidateReq, -1); + cshrs.allocate(tmp); + cache->setSlaveRequest(Request_Coherence, curTick); + if (cshrs.isFull()) { + cache->setBlockedForSnoop(Blocked_Coherence); + } + } + } else { + if (blk) { + new_state = blk->status; + } + } + return false; +} diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh new file mode 100644 index 000000000..764bf6276 --- /dev/null +++ b/src/mem/cache/coherence/uni_coherence.hh @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +#ifndef __UNI_COHERENCE_HH__ +#define __UNI_COHERENCE_HH__ + +#include "base/trace.hh" +#include "base/misc.hh" +#include "mem/cache/cache_blk.hh" +#include "mem/cache/miss/mshr_queue.hh" +#include "mem/packet.hh" + +class BaseCache; + +class UniCoherence +{ + protected: + /** Buffers to hold forwarded invalidates. */ + MSHRQueue cshrs; + /** Pointer to the parent cache. */ + BaseCache *cache; + + public: + /** + * Construct and initialize this coherence policy. + */ + UniCoherence(); + + /** + * Set the pointer to the parent cache. + * @param _cache The parent cache. + */ + void setCache(BaseCache *_cache) + { + cache = _cache; + } + + /** + * Register statistics. + * @param name The name to prepend to stat descriptions. + */ + void regStats(const std::string &name) + { + } + + /** + * Return Read. + * @param cmd The request's command. + * @param state The current state of the cache block. + * @return The proper bus command, as determined by the protocol. + * @todo Make changes so writebacks don't get here. + */ + Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state) + { + if (cmd == Packet::HardPFReq && state) + warn("Trying to issue a prefetch to a block we already have\n"); + if (cmd == Packet::Writeback) + return Packet::Writeback; + return Packet::ReadReq; + } + + /** + * Just return readable and writeable. + * @param req The bus response. + * @param current The current block state. + * @return The new state. + */ + CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State current) + { + if (pkt->senderState) //Blocking Buffers don't get mshrs + { + if (((MSHR *)(pkt->senderState))->originalCmd == Packet::HardPFReq) { + DPRINTF(HWPrefetch, "Marking a hardware prefetch as such in the state\n"); + return BlkHWPrefetched | BlkValid | BlkWritable; + } + else { + return BlkValid | BlkWritable; + } + } + //@todo What about prefetching with blocking buffers + else + return BlkValid | BlkWritable; + } + /** + * Return outstanding invalidate to forward. + * @return The next invalidate to forward to lower levels of cache. + */ + Packet * getPacket(); + + /** + * Handle snooped bus requests. + * @param req The snooped bus request. + * @param blk The cache block corresponding to the request, if any. + * @param mshr The MSHR corresponding to the request, if any. + * @param new_state The new coherence state of the block. + * @return True if the request should be satisfied locally. + */ + bool handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, + CacheBlk::State &new_state); + + /** + * Return true if this coherence policy can handle fast cache writes. + */ + bool allowFastWrites() { return true; } + + bool hasProtocol() { return false; } +}; + +#endif //__UNI_COHERENCE_HH__ diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc new file mode 100644 index 000000000..10d53b109 --- /dev/null +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions of a simple buffer for a blocking cache. + */ + +#include "cpu/smt.hh" //for maxThreadsPerCPU +#include "mem/cache/base_cache.hh" +#include "mem/cache/miss/blocking_buffer.hh" +#include "mem/cache/prefetch/base_prefetcher.hh" +#include "sim/eventq.hh" // for Event declaration. +#include "mem/request.hh" + +using namespace TheISA; + +/** + * @todo Move writebacks into shared BaseBuffer class. + */ +void +BlockingBuffer::regStats(const std::string &name) +{ + using namespace Stats; + writebacks + .init(maxThreadsPerCPU) + .name(name + ".writebacks") + .desc("number of writebacks") + .flags(total) + ; +} + +void +BlockingBuffer::setCache(BaseCache *_cache) +{ + cache = _cache; + blkSize = cache->getBlockSize(); +} + +void +BlockingBuffer::setPrefetcher(BasePrefetcher *_prefetcher) +{ + prefetcher = _prefetcher; +} +void +BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time) +{ + Addr blk_addr = pkt->getAddr() & ~(Addr)(blk_size - 1); + if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate || + !pkt->needsResponse())) { + if (!pkt->needsResponse()) { + wb.allocateAsBuffer(pkt); + } else { + wb.allocate(pkt->cmd, blk_addr, pkt->req->getAsid(), blk_size, pkt); + } + + memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), blk_size); + + cache->setBlocked(Blocked_NoWBBuffers); + cache->setMasterRequest(Request_WB, time); + return; + } + + if (!pkt->needsResponse()) { + miss.allocateAsBuffer(pkt); + } else { + miss.allocate(pkt->cmd, blk_addr, pkt->req->getAsid(), blk_size, pkt); + } + if (!pkt->req->isUncacheable()) { + miss.pkt->flags |= CACHE_LINE_FILL; + } + cache->setBlocked(Blocked_NoMSHRs); + cache->setMasterRequest(Request_MSHR, time); +} + +Packet * +BlockingBuffer::getPacket() +{ + if (miss.pkt && !miss.inService) { + return miss.pkt; + } + return wb.pkt; +} + +void +BlockingBuffer::setBusCmd(Packet * &pkt, Packet::Command cmd) +{ + MSHR *mshr = (MSHR*) pkt->senderState; + mshr->originalCmd = pkt->cmd; + if (pkt->isCacheFill()) + pkt->cmdOverride(cmd); +} + +void +BlockingBuffer::restoreOrigCmd(Packet * &pkt) +{ + pkt->cmdOverride(((MSHR*)(pkt->senderState))->originalCmd); +} + +void +BlockingBuffer::markInService(Packet * &pkt) +{ + if (!pkt->isCacheFill() && pkt->isWrite()) { + // Forwarding a write/ writeback, don't need to change + // the command + assert((MSHR*)pkt->senderState == &wb); + cache->clearMasterRequest(Request_WB); + if (!pkt->needsResponse()) { + assert(wb.getNumTargets() == 0); + wb.deallocate(); + cache->clearBlocked(Blocked_NoWBBuffers); + } else { + wb.inService = true; + } + } else { + assert((MSHR*)pkt->senderState == &miss); + cache->clearMasterRequest(Request_MSHR); + if (!pkt->needsResponse()) { + assert(miss.getNumTargets() == 0); + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + } else { + //mark in service + miss.inService = true; + } + } +} + +void +BlockingBuffer::handleResponse(Packet * &pkt, Tick time) +{ + if (pkt->isCacheFill()) { + // targets were handled in the cache tags + assert((MSHR*)pkt->senderState == &miss); + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + } else { + if (((MSHR*)(pkt->senderState))->hasTargets()) { + // Should only have 1 target if we had any + assert(((MSHR*)(pkt->senderState))->getNumTargets() == 1); + Packet * target = ((MSHR*)(pkt->senderState))->getTarget(); + ((MSHR*)(pkt->senderState))->popTarget(); + if (pkt->isRead()) { + memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), target->getSize()); + } + cache->respond(target, time); + assert(!((MSHR*)(pkt->senderState))->hasTargets()); + } + + if (pkt->isWrite()) { + assert(((MSHR*)(pkt->senderState)) == &wb); + wb.deallocate(); + cache->clearBlocked(Blocked_NoWBBuffers); + } else { + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + } + } +} + +void +BlockingBuffer::squash(int threadNum) +{ + if (miss.threadNum == threadNum) { + Packet * target = miss.getTarget(); + miss.popTarget(); + assert(target->req->getThreadNum() == threadNum); + target = NULL; + assert(!miss.hasTargets()); + miss.ntargets=0; + if (!miss.inService) { + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + cache->clearMasterRequest(Request_MSHR); + } + } +} + +void +BlockingBuffer::doWriteback(Addr addr, int asid, + int size, uint8_t *data, bool compressed) +{ + // Generate request + Request * req = new Request(addr, size, 0); + Packet * pkt = new Packet(req, Packet::Writeback, -1); + pkt->allocate(); + if (data) { + memcpy(pkt->getPtr<uint8_t>(), data, size); + } + + if (compressed) { + pkt->flags |= COMPRESSED; + } + + ///All writebacks charged to same thread @todo figure this out + writebacks[pkt->req->getThreadNum()]++; + + wb.allocateAsBuffer(pkt); + cache->setMasterRequest(Request_WB, curTick); + cache->setBlocked(Blocked_NoWBBuffers); +} + + + +void +BlockingBuffer::doWriteback(Packet * &pkt) +{ + writebacks[pkt->req->getThreadNum()]++; + + wb.allocateAsBuffer(pkt); + + // Since allocate as buffer copies the request, + // need to copy data here. + memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize()); + + cache->setBlocked(Blocked_NoWBBuffers); + cache->setMasterRequest(Request_WB, curTick); +} diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh new file mode 100644 index 000000000..39a06a377 --- /dev/null +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a simple buffer for a blocking cache. + */ + +#ifndef __BLOCKING_BUFFER_HH__ +#define __BLOCKING_BUFFER_HH__ + +#include <vector> + +#include "mem/cache/miss/mshr.hh" +#include "base/statistics.hh" + +class BaseCache; +class BasePrefetcher; + +/** + * Miss and writeback storage for a blocking cache. + */ +class BlockingBuffer +{ +protected: + /** Miss storage. */ + MSHR miss; + /** WB storage. */ + MSHR wb; + + //Params + + /** Allocate on write misses. */ + const bool writeAllocate; + + /** Pointer to the parent cache. */ + BaseCache* cache; + + BasePrefetcher* prefetcher; + + /** Block size of the parent cache. */ + int blkSize; + + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + /** Number of blocks written back per thread. */ + Stats::Vector<> writebacks; + + /** + * @} + */ + +public: + /** + * Builds and initializes this buffer. + * @param write_allocate If true, treat write misses the same as reads. + */ + BlockingBuffer(bool write_allocate) + : writeAllocate(write_allocate) + { + } + + /** + * Register statistics for this object. + * @param name The name of the parent cache. + */ + void regStats(const std::string &name); + + /** + * Called by the parent cache to set the back pointer. + * @param _cache A pointer to the parent cache. + */ + void setCache(BaseCache *_cache); + + void setPrefetcher(BasePrefetcher *_prefetcher); + + /** + * Handle a cache miss properly. Requests the bus and marks the cache as + * blocked. + * @param req The request that missed in the cache. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + */ + void handleMiss(Packet * &pkt, int blk_size, Tick time); + + /** + * Fetch the block for the given address and buffer the given target. + * @param addr The address to fetch. + * @param asid The address space of the address. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + * @param target The target for the fetch. + */ + MSHR* fetchBlock(Addr addr, int asid, int blk_size, Tick time, + Packet * &target) + { + fatal("Unimplemented"); + } + + /** + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. + */ + Packet * getPacket(); + + /** + * Set the command to the given bus command. + * @param req The request to update. + * @param cmd The bus command to use. + */ + void setBusCmd(Packet * &pkt, Packet::Command cmd); + + /** + * Restore the original command in case of a bus transmission error. + * @param req The request to reset. + */ + void restoreOrigCmd(Packet * &pkt); + + /** + * Marks a request as in service (sent on the bus). This can have side + * effect since storage for no response commands is deallocated once they + * are successfully sent. + * @param req The request that was sent on the bus. + */ + void markInService(Packet * &pkt); + + /** + * Frees the resources of the request and unblock the cache. + * @param req The request that has been satisfied. + * @param time The time when the request is satisfied. + */ + void handleResponse(Packet * &pkt, Tick time); + + /** + * Removes all outstanding requests for a given thread number. If a request + * has been sent to the bus, this function removes all of its targets. + * @param req->getThreadNum()ber The thread number of the requests to squash. + */ + void squash(int threadNum); + + /** + * Return the current number of outstanding misses. + * @return the number of outstanding misses. + */ + int getMisses() + { + return miss.getNumTargets(); + } + + /** + * Searches for the supplied address in the miss "queue". + * @param addr The address to look for. + * @param asid The address space id. + * @return A pointer to miss if it matches. + */ + MSHR* findMSHR(Addr addr, int asid) + { + if (miss.addr == addr && miss.pkt) + return &miss; + return NULL; + } + + /** + * Searches for the supplied address in the write buffer. + * @param addr The address to look for. + * @param asid The address space id. + * @param writes List of pointers to the matching writes. + * @return True if there is a matching write. + */ + bool findWrites(Addr addr, int asid, std::vector<MSHR*>& writes) + { + if (wb.addr == addr && wb.pkt) { + writes.push_back(&wb); + return true; + } + return false; + } + + + + /** + * Perform a writeback of dirty data to the given address. + * @param addr The address to write to. + * @param asid The address space id. + * @param size The number of bytes to write. + * @param data The data to write, can be NULL. + * @param compressed True if the data is compressed. + */ + void doWriteback(Addr addr, int asid, + int size, uint8_t *data, bool compressed); + + /** + * Perform a writeback request. + * @param req The writeback request. + */ + void doWriteback(Packet * &pkt); + + /** + * Returns true if there are outstanding requests. + * @return True if there are outstanding requests. + */ + bool havePending() + { + return !miss.inService || !wb.inService; + } + + /** + * Add a target to the given MSHR. This assumes it is in the miss queue. + * @param mshr The mshr to add a target to. + * @param req The target to add. + */ + void addTarget(MSHR *mshr, Packet * &pkt) + { + fatal("Shouldn't call this on a blocking buffer."); + } + + /** + * Dummy implmentation. + */ + MSHR* allocateTargetList(Addr addr, int asid) + { + fatal("Unimplemented"); + } +}; + +#endif // __BLOCKING_BUFFER_HH__ diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc new file mode 100644 index 000000000..4a3dc1062 --- /dev/null +++ b/src/mem/cache/miss/miss_queue.cc @@ -0,0 +1,757 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + */ + +/** + * @file + * Miss and writeback queue definitions. + */ + +#include "cpu/smt.hh" //for maxThreadsPerCPU +#include "mem/cache/base_cache.hh" +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/prefetch/base_prefetcher.hh" + +using namespace std; + +// simple constructor +/** + * @todo Remove the +16 from the write buffer constructor once we handle + * stalling on writebacks do to compression writes. + */ +MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers, + bool write_allocate, bool prefetch_miss) + : mq(numMSHRs, 4), wb(write_buffers,numMSHRs+1000), numMSHR(numMSHRs), + numTarget(numTargets), writeBuffers(write_buffers), + writeAllocate(write_allocate), order(0), prefetchMiss(prefetch_miss) +{ + noTargetMSHR = NULL; +} + +void +MissQueue::regStats(const string &name) +{ + Request temp_req((Addr) NULL, 4, 0); + Packet::Command temp_cmd = Packet::ReadReq; + Packet temp_pkt(&temp_req, temp_cmd, 0); //@todo FIx command strings so this isn't neccessary + temp_pkt.allocate(); + + using namespace Stats; + + writebacks + .init(maxThreadsPerCPU) + .name(name + ".writebacks") + .desc("number of writebacks") + .flags(total) + ; + + // MSHR hit statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + mshr_hits[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_hits") + .desc("number of " + cstr + " MSHR hits") + .flags(total | nozero | nonan) + ; + } + + demandMshrHits + .name(name + ".demand_mshr_hits") + .desc("number of demand (read+write) MSHR hits") + .flags(total) + ; + demandMshrHits = mshr_hits[Packet::ReadReq] + mshr_hits[Packet::WriteReq]; + + overallMshrHits + .name(name + ".overall_mshr_hits") + .desc("number of overall MSHR hits") + .flags(total) + ; + overallMshrHits = demandMshrHits + mshr_hits[Packet::SoftPFReq] + + mshr_hits[Packet::HardPFReq]; + + // MSHR miss statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + mshr_misses[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_misses") + .desc("number of " + cstr + " MSHR misses") + .flags(total | nozero | nonan) + ; + } + + demandMshrMisses + .name(name + ".demand_mshr_misses") + .desc("number of demand (read+write) MSHR misses") + .flags(total) + ; + demandMshrMisses = mshr_misses[Packet::ReadReq] + mshr_misses[Packet::WriteReq]; + + overallMshrMisses + .name(name + ".overall_mshr_misses") + .desc("number of overall MSHR misses") + .flags(total) + ; + overallMshrMisses = demandMshrMisses + mshr_misses[Packet::SoftPFReq] + + mshr_misses[Packet::HardPFReq]; + + // MSHR miss latency statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + mshr_miss_latency[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_miss_latency") + .desc("number of " + cstr + " MSHR miss cycles") + .flags(total | nozero | nonan) + ; + } + + demandMshrMissLatency + .name(name + ".demand_mshr_miss_latency") + .desc("number of demand (read+write) MSHR miss cycles") + .flags(total) + ; + demandMshrMissLatency = mshr_miss_latency[Packet::ReadReq] + + mshr_miss_latency[Packet::WriteReq]; + + overallMshrMissLatency + .name(name + ".overall_mshr_miss_latency") + .desc("number of overall MSHR miss cycles") + .flags(total) + ; + overallMshrMissLatency = demandMshrMissLatency + + mshr_miss_latency[Packet::SoftPFReq] + mshr_miss_latency[Packet::HardPFReq]; + + // MSHR uncacheable statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + mshr_uncacheable[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_uncacheable") + .desc("number of " + cstr + " MSHR uncacheable") + .flags(total | nozero | nonan) + ; + } + + overallMshrUncacheable + .name(name + ".overall_mshr_uncacheable_misses") + .desc("number of overall MSHR uncacheable misses") + .flags(total) + ; + overallMshrUncacheable = mshr_uncacheable[Packet::ReadReq] + + mshr_uncacheable[Packet::WriteReq] + mshr_uncacheable[Packet::SoftPFReq] + + mshr_uncacheable[Packet::HardPFReq]; + + // MSHR miss latency statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + mshr_uncacheable_lat[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_uncacheable_latency") + .desc("number of " + cstr + " MSHR uncacheable cycles") + .flags(total | nozero | nonan) + ; + } + + overallMshrUncacheableLatency + .name(name + ".overall_mshr_uncacheable_latency") + .desc("number of overall MSHR uncacheable cycles") + .flags(total) + ; + overallMshrUncacheableLatency = mshr_uncacheable_lat[Packet::ReadReq] + + mshr_uncacheable_lat[Packet::WriteReq] + + mshr_uncacheable_lat[Packet::SoftPFReq] + + mshr_uncacheable_lat[Packet::HardPFReq]; + +#if 0 + // MSHR access formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + mshrAccesses[access_idx] + .name(name + "." + cstr + "_mshr_accesses") + .desc("number of " + cstr + " mshr accesses(hits+misses)") + .flags(total | nozero | nonan) + ; + mshrAccesses[access_idx] = + mshr_hits[access_idx] + mshr_misses[access_idx] + + mshr_uncacheable[access_idx]; + } + + demandMshrAccesses + .name(name + ".demand_mshr_accesses") + .desc("number of demand (read+write) mshr accesses") + .flags(total | nozero | nonan) + ; + demandMshrAccesses = demandMshrHits + demandMshrMisses; + + overallMshrAccesses + .name(name + ".overall_mshr_accesses") + .desc("number of overall (read+write) mshr accesses") + .flags(total | nozero | nonan) + ; + overallMshrAccesses = overallMshrHits + overallMshrMisses + + overallMshrUncacheable; +#endif + + // MSHR miss rate formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + mshrMissRate[access_idx] + .name(name + "." + cstr + "_mshr_miss_rate") + .desc("mshr miss rate for " + cstr + " accesses") + .flags(total | nozero | nonan) + ; + + mshrMissRate[access_idx] = + mshr_misses[access_idx] / cache->accesses[access_idx]; + } + + demandMshrMissRate + .name(name + ".demand_mshr_miss_rate") + .desc("mshr miss rate for demand accesses") + .flags(total) + ; + demandMshrMissRate = demandMshrMisses / cache->demandAccesses; + + overallMshrMissRate + .name(name + ".overall_mshr_miss_rate") + .desc("mshr miss rate for overall accesses") + .flags(total) + ; + overallMshrMissRate = overallMshrMisses / cache->overallAccesses; + + // mshrMiss latency formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + avgMshrMissLatency[access_idx] + .name(name + "." + cstr + "_avg_mshr_miss_latency") + .desc("average " + cstr + " mshr miss latency") + .flags(total | nozero | nonan) + ; + + avgMshrMissLatency[access_idx] = + mshr_miss_latency[access_idx] / mshr_misses[access_idx]; + } + + demandAvgMshrMissLatency + .name(name + ".demand_avg_mshr_miss_latency") + .desc("average overall mshr miss latency") + .flags(total) + ; + demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses; + + overallAvgMshrMissLatency + .name(name + ".overall_avg_mshr_miss_latency") + .desc("average overall mshr miss latency") + .flags(total) + ; + overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses; + + // mshrUncacheable latency formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); + + avgMshrUncacheableLatency[access_idx] + .name(name + "." + cstr + "_avg_mshr_uncacheable_latency") + .desc("average " + cstr + " mshr uncacheable latency") + .flags(total | nozero | nonan) + ; + + avgMshrUncacheableLatency[access_idx] = + mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx]; + } + + overallAvgMshrUncacheableLatency + .name(name + ".overall_avg_mshr_uncacheable_latency") + .desc("average overall mshr uncacheable latency") + .flags(total) + ; + overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable; + + mshr_cap_events + .init(maxThreadsPerCPU) + .name(name + ".mshr_cap_events") + .desc("number of times MSHR cap was activated") + .flags(total) + ; + + //software prefetching stats + soft_prefetch_mshr_full + .init(maxThreadsPerCPU) + .name(name + ".soft_prefetch_mshr_full") + .desc("number of mshr full events for SW prefetching instrutions") + .flags(total) + ; + + mshr_no_allocate_misses + .name(name +".no_allocate_misses") + .desc("Number of misses that were no-allocate") + ; + +} + +void +MissQueue::setCache(BaseCache *_cache) +{ + cache = _cache; + blkSize = cache->getBlockSize(); +} + +void +MissQueue::setPrefetcher(BasePrefetcher *_prefetcher) +{ + prefetcher = _prefetcher; +} + +MSHR* +MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) +{ + MSHR* mshr = mq.allocate(pkt, blkSize); + mshr->order = order++; + if (!pkt->req->isUncacheable() ){//&& !pkt->isNoAllocate()) { + // Mark this as a cache line fill + mshr->pkt->flags |= CACHE_LINE_FILL; + } + if (mq.isFull()) { + cache->setBlocked(Blocked_NoMSHRs); + } + if (pkt->cmd != Packet::HardPFReq) { + //If we need to request the bus (not on HW prefetch), do so + cache->setMasterRequest(Request_MSHR, time); + } + return mshr; +} + + +MSHR* +MissQueue::allocateWrite(Packet * &pkt, int size, Tick time) +{ + MSHR* mshr = wb.allocate(pkt,blkSize); + mshr->order = order++; + +//REMOVING COMPRESSION FOR NOW +#if 0 + if (pkt->isCompressed()) { + mshr->pkt->deleteData(); + mshr->pkt->actualSize = pkt->actualSize; + mshr->pkt->data = new uint8_t[pkt->actualSize]; + memcpy(mshr->pkt->data, pkt->data, pkt->actualSize); + } else { +#endif + memcpy(mshr->pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize()); + //{ + + if (wb.isFull()) { + cache->setBlocked(Blocked_NoWBBuffers); + } + + cache->setMasterRequest(Request_WB, time); + + return mshr; +} + + +/** + * @todo Remove SW prefetches on mshr hits. + */ +void +MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) +{ +// if (!cache->isTopLevel()) + if (prefetchMiss) prefetcher->handleMiss(pkt, time); + + int size = blkSize; + Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); + MSHR* mshr = NULL; + if (!pkt->req->isUncacheable()) { + mshr = mq.findMatch(blkAddr, pkt->req->getAsid()); + if (mshr) { + //@todo remove hw_pf here + mshr_hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + if (mshr->threadNum != pkt->req->getThreadNum()) { + mshr->threadNum = -1; + } + mq.allocateTarget(mshr, pkt); + if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) { + //We are adding an allocate after a no-allocate + mshr->pkt->flags &= ~NO_ALLOCATE; + } + if (mshr->getNumTargets() == numTarget) { + noTargetMSHR = mshr; + cache->setBlocked(Blocked_NoTargets); + mq.moveToFront(mshr); + } + return; + } + if (pkt->isNoAllocate()) { + //Count no-allocate requests differently + mshr_no_allocate_misses++; + } + else { + mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + } + } else { + //Count uncacheable accesses + mshr_uncacheable[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + size = pkt->getSize(); + } + if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate || + !pkt->needsResponse())) { + /** + * @todo Add write merging here. + */ + mshr = allocateWrite(pkt, blkSize, time); + return; + } + + mshr = allocateMiss(pkt, blkSize, time); +} + +MSHR* +MissQueue::fetchBlock(Addr addr, int asid, int blk_size, Tick time, + Packet * &target) +{ + Addr blkAddr = addr & ~(Addr)(blk_size - 1); + assert(mq.findMatch(addr, asid) == NULL); + MSHR *mshr = mq.allocateFetch(blkAddr, asid, blk_size, target); + mshr->order = order++; + mshr->pkt->flags |= CACHE_LINE_FILL; + if (mq.isFull()) { + cache->setBlocked(Blocked_NoMSHRs); + } + cache->setMasterRequest(Request_MSHR, time); + return mshr; +} + +Packet * +MissQueue::getPacket() +{ + Packet * pkt = mq.getReq(); + if (((wb.isFull() && wb.inServiceMSHRs == 0) || !pkt || + pkt->time > curTick) && wb.havePending()) { + pkt = wb.getReq(); + // Need to search for earlier miss. + MSHR *mshr = mq.findPending(pkt); + if (mshr && mshr->order < ((MSHR*)(pkt->senderState))->order) { + // Service misses in order until conflict is cleared. + return mq.getReq(); + } + } + if (pkt) { + MSHR* mshr = wb.findPending(pkt); + if (mshr /*&& mshr->order < pkt->senderState->order*/) { + // The only way this happens is if we are + // doing a write and we didn't have permissions + // then subsequently saw a writeback(owned got evicted) + // We need to make sure to perform the writeback first + // To preserve the dirty data, then we can issue the write + return wb.getReq(); + } + } + else if (!mq.isFull()){ + //If we have a miss queue slot, we can try a prefetch + pkt = prefetcher->getPacket(); + if (pkt) { + //Update statistic on number of prefetches issued (hwpf_mshr_misses) + mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + //It will request the bus for the future, but should clear that immedieatley + allocateMiss(pkt, pkt->getSize(), curTick); + pkt = mq.getReq(); + assert(pkt); //We should get back a req b/c we just put one in + } + } + return pkt; +} + +void +MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd) +{ + assert(pkt->senderState != 0); + MSHR * mshr = (MSHR*)pkt->senderState; + mshr->originalCmd = pkt->cmd; + if (pkt->isCacheFill() || pkt->isNoAllocate()) + pkt->cmd = cmd; +} + +void +MissQueue::restoreOrigCmd(Packet * &pkt) +{ + pkt->cmd = ((MSHR*)(pkt->senderState))->originalCmd; +} + +void +MissQueue::markInService(Packet * &pkt) +{ + assert(pkt->senderState != 0); + bool unblock = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + /** + * @todo Should include MSHRQueue pointer in MSHR to select the correct + * one. + */ + if ((!pkt->isCacheFill() && pkt->isWrite())) { + // Forwarding a write/ writeback, don't need to change + // the command + unblock = wb.isFull(); + wb.markInService((MSHR*)pkt->senderState); + if (!wb.havePending()){ + cache->clearMasterRequest(Request_WB); + } + if (unblock) { + // Do we really unblock? + unblock = !wb.isFull(); + cause = Blocked_NoWBBuffers; + } + } else { + unblock = mq.isFull(); + mq.markInService((MSHR*)pkt->senderState); + if (!mq.havePending()){ + cache->clearMasterRequest(Request_MSHR); + } + if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) { + DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", + cache->name()); + //Also clear pending if need be + if (!prefetcher->havePending()) + { + cache->clearMasterRequest(Request_PF); + } + } + if (unblock) { + unblock = !mq.isFull(); + cause = Blocked_NoMSHRs; + } + } + if (unblock) { + cache->clearBlocked(cause); + } +} + + +void +MissQueue::handleResponse(Packet * &pkt, Tick time) +{ + MSHR* mshr = (MSHR*)pkt->senderState; + if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) { + DPRINTF(HWPrefetch, "%s:Handling the response to a HW_PF\n", + cache->name()); + } +#ifndef NDEBUG + int num_targets = mshr->getNumTargets(); +#endif + + bool unblock = false; + bool unblock_target = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + if (pkt->isCacheFill() && !pkt->isNoAllocate()) { + mshr_miss_latency[mshr->originalCmd][pkt->req->getThreadNum()] += + curTick - pkt->time; + // targets were handled in the cache tags + if (mshr == noTargetMSHR) { + // we always clear at least one target + unblock_target = true; + cause = Blocked_NoTargets; + noTargetMSHR = NULL; + } + + if (mshr->hasTargets()) { + // Didn't satisfy all the targets, need to resend + Packet::Command cmd = mshr->getTarget()->cmd; + mq.markPending(mshr, cmd); + mshr->order = order++; + cache->setMasterRequest(Request_MSHR, time); + } + else { + unblock = mq.isFull(); + mq.deallocate(mshr); + if (unblock) { + unblock = !mq.isFull(); + cause = Blocked_NoMSHRs; + } + } + } else { + if (pkt->req->isUncacheable()) { + mshr_uncacheable_lat[pkt->cmd][pkt->req->getThreadNum()] += + curTick - pkt->time; + } + if (mshr->hasTargets() && pkt->req->isUncacheable()) { + // Should only have 1 target if we had any + assert(num_targets == 1); + Packet * target = mshr->getTarget(); + mshr->popTarget(); + if (pkt->isRead()) { + memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), + target->getSize()); + } + cache->respond(target, time); + assert(!mshr->hasTargets()); + } + else if (mshr->hasTargets()) { + //Must be a no_allocate with possibly more than one target + assert(mshr->pkt->isNoAllocate()); + while (mshr->hasTargets()) { + Packet * target = mshr->getTarget(); + mshr->popTarget(); + if (pkt->isRead()) { + memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), + target->getSize()); + } + cache->respond(target, time); + } + } + + if (pkt->isWrite()) { + // If the wrtie buffer is full, we might unblock now + unblock = wb.isFull(); + wb.deallocate(mshr); + if (unblock) { + // Did we really unblock? + unblock = !wb.isFull(); + cause = Blocked_NoWBBuffers; + } + } else { + unblock = mq.isFull(); + mq.deallocate(mshr); + if (unblock) { + unblock = !mq.isFull(); + cause = Blocked_NoMSHRs; + } + } + } + if (unblock || unblock_target) { + cache->clearBlocked(cause); + } +} + +void +MissQueue::squash(int threadNum) +{ + bool unblock = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) { + noTargetMSHR = NULL; + unblock = true; + cause = Blocked_NoTargets; + } + if (mq.isFull()) { + unblock = true; + cause = Blocked_NoMSHRs; + } + mq.squash(threadNum); + if (!mq.havePending()) { + cache->clearMasterRequest(Request_MSHR); + } + if (unblock && !mq.isFull()) { + cache->clearBlocked(cause); + } + +} + +MSHR* +MissQueue::findMSHR(Addr addr, int asid) const +{ + return mq.findMatch(addr,asid); +} + +bool +MissQueue::findWrites(Addr addr, int asid, vector<MSHR*> &writes) const +{ + return wb.findMatches(addr,asid,writes); +} + +void +MissQueue::doWriteback(Addr addr, int asid, + int size, uint8_t *data, bool compressed) +{ + // Generate request + Request * req = new Request(addr, size, 0); + Packet * pkt = new Packet(req, Packet::Writeback, -1); + pkt->allocate(); + if (data) { + memcpy(pkt->getPtr<uint8_t>(), data, size); + } + + if (compressed) { + pkt->flags |= COMPRESSED; + } + + ///All writebacks charged to same thread @todo figure this out + writebacks[pkt->req->getThreadNum()]++; + + allocateWrite(pkt, 0, curTick); +} + + +void +MissQueue::doWriteback(Packet * &pkt) +{ + writebacks[pkt->req->getThreadNum()]++; + allocateWrite(pkt, 0, curTick); +} + + +MSHR* +MissQueue::allocateTargetList(Addr addr, int asid) +{ + MSHR* mshr = mq.allocateTargetList(addr, asid, blkSize); + mshr->pkt->flags |= CACHE_LINE_FILL; + if (mq.isFull()) { + cache->setBlocked(Blocked_NoMSHRs); + } + return mshr; +} + +bool +MissQueue::havePending() +{ + return mq.havePending() || wb.havePending() || prefetcher->havePending(); +} diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh new file mode 100644 index 000000000..b88b7038c --- /dev/null +++ b/src/mem/cache/miss/miss_queue.hh @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Miss and writeback queue declarations. + */ + +#ifndef __MISS_QUEUE_HH__ +#define __MISS_QUEUE_HH__ + +#include <vector> + +#include "mem/cache/miss/mshr.hh" +#include "mem/cache/miss/mshr_queue.hh" +#include "base/statistics.hh" + +class BaseCache; +class BasePrefetcher; +/** + * Manages cache misses and writebacks. Contains MSHRs to store miss data + * and the writebuffer for writes/writebacks. + * @todo need to handle data on writes better (encapsulate). + * @todo need to make replacements/writebacks happen in Cache::access + */ +class MissQueue +{ + protected: + /** The MSHRs. */ + MSHRQueue mq; + /** Write Buffer. */ + MSHRQueue wb; + + // PARAMTERS + + /** The number of MSHRs in the miss queue. */ + const int numMSHR; + /** The number of targets for each MSHR. */ + const int numTarget; + /** The number of write buffers. */ + const int writeBuffers; + /** True if the cache should allocate on a write miss. */ + const bool writeAllocate; + /** Pointer to the parent cache. */ + BaseCache* cache; + + /** The Prefetcher */ + BasePrefetcher *prefetcher; + + /** The block size of the parent cache. */ + int blkSize; + + /** Increasing order number assigned to each incoming request. */ + uint64_t order; + + bool prefetchMiss; + + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + /** Number of blocks written back per thread. */ + Stats::Vector<> writebacks; + + /** Number of misses that hit in the MSHRs per command and thread. */ + Stats::Vector<> mshr_hits[NUM_MEM_CMDS]; + /** Demand misses that hit in the MSHRs. */ + Stats::Formula demandMshrHits; + /** Total number of misses that hit in the MSHRs. */ + Stats::Formula overallMshrHits; + + /** Number of misses that miss in the MSHRs, per command and thread. */ + Stats::Vector<> mshr_misses[NUM_MEM_CMDS]; + /** Demand misses that miss in the MSHRs. */ + Stats::Formula demandMshrMisses; + /** Total number of misses that miss in the MSHRs. */ + Stats::Formula overallMshrMisses; + + /** Number of misses that miss in the MSHRs, per command and thread. */ + Stats::Vector<> mshr_uncacheable[NUM_MEM_CMDS]; + /** Total number of misses that miss in the MSHRs. */ + Stats::Formula overallMshrUncacheable; + + /** Total cycle latency of each MSHR miss, per command and thread. */ + Stats::Vector<> mshr_miss_latency[NUM_MEM_CMDS]; + /** Total cycle latency of demand MSHR misses. */ + Stats::Formula demandMshrMissLatency; + /** Total cycle latency of overall MSHR misses. */ + Stats::Formula overallMshrMissLatency; + + /** Total cycle latency of each MSHR miss, per command and thread. */ + Stats::Vector<> mshr_uncacheable_lat[NUM_MEM_CMDS]; + /** Total cycle latency of overall MSHR misses. */ + Stats::Formula overallMshrUncacheableLatency; + + /** The total number of MSHR accesses per command and thread. */ + Stats::Formula mshrAccesses[NUM_MEM_CMDS]; + /** The total number of demand MSHR accesses. */ + Stats::Formula demandMshrAccesses; + /** The total number of MSHR accesses. */ + Stats::Formula overallMshrAccesses; + + /** The miss rate in the MSHRs pre command and thread. */ + Stats::Formula mshrMissRate[NUM_MEM_CMDS]; + /** The demand miss rate in the MSHRs. */ + Stats::Formula demandMshrMissRate; + /** The overall miss rate in the MSHRs. */ + Stats::Formula overallMshrMissRate; + + /** The average latency of an MSHR miss, per command and thread. */ + Stats::Formula avgMshrMissLatency[NUM_MEM_CMDS]; + /** The average latency of a demand MSHR miss. */ + Stats::Formula demandAvgMshrMissLatency; + /** The average overall latency of an MSHR miss. */ + Stats::Formula overallAvgMshrMissLatency; + + /** The average latency of an MSHR miss, per command and thread. */ + Stats::Formula avgMshrUncacheableLatency[NUM_MEM_CMDS]; + /** The average overall latency of an MSHR miss. */ + Stats::Formula overallAvgMshrUncacheableLatency; + + /** The number of times a thread hit its MSHR cap. */ + Stats::Vector<> mshr_cap_events; + /** The number of times software prefetches caused the MSHR to block. */ + Stats::Vector<> soft_prefetch_mshr_full; + + Stats::Scalar<> mshr_no_allocate_misses; + + /** + * @} + */ + + private: + /** Pointer to the MSHR that has no targets. */ + MSHR* noTargetMSHR; + + /** + * Allocate a new MSHR to handle the provided miss. + * @param req The miss to buffer. + * @param size The number of bytes to fetch. + * @param time The time the miss occurs. + * @return A pointer to the new MSHR. + */ + MSHR* allocateMiss(Packet * &pkt, int size, Tick time); + + /** + * Allocate a new WriteBuffer to handle the provided write. + * @param req The write to handle. + * @param size The number of bytes to write. + * @param time The time the write occurs. + * @return A pointer to the new write buffer. + */ + MSHR* allocateWrite(Packet * &pkt, int size, Tick time); + + public: + /** + * Simple Constructor. Initializes all needed internal storage and sets + * parameters. + * @param numMSHRs The number of outstanding misses to handle. + * @param numTargets The number of outstanding targets to each miss. + * @param write_buffers The number of outstanding writes to handle. + * @param write_allocate If true, treat write misses the same as reads. + */ + MissQueue(int numMSHRs, int numTargets, int write_buffers, + bool write_allocate, bool prefetch_miss); + + /** + * Deletes all allocated internal storage. + */ + ~MissQueue(); + + /** + * Register statistics for this object. + * @param name The name of the parent cache. + */ + void regStats(const std::string &name); + + /** + * Called by the parent cache to set the back pointer. + * @param _cache A pointer to the parent cache. + */ + void setCache(BaseCache *_cache); + + void setPrefetcher(BasePrefetcher *_prefetcher); + + /** + * Handle a cache miss properly. Either allocate an MSHR for the request, + * or forward it through the write buffer. + * @param req The request that missed in the cache. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + */ + void handleMiss(Packet * &pkt, int blk_size, Tick time); + + /** + * Fetch the block for the given address and buffer the given target. + * @param addr The address to fetch. + * @param asid The address space of the address. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + * @param target The target for the fetch. + */ + MSHR* fetchBlock(Addr addr, int asid, int blk_size, Tick time, + Packet * &target); + + /** + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. + */ + Packet * getPacket(); + + /** + * Set the command to the given bus command. + * @param req The request to update. + * @param cmd The bus command to use. + */ + void setBusCmd(Packet * &pkt, Packet::Command cmd); + + /** + * Restore the original command in case of a bus transmission error. + * @param req The request to reset. + */ + void restoreOrigCmd(Packet * &pkt); + + /** + * Marks a request as in service (sent on the bus). This can have side + * effect since storage for no response commands is deallocated once they + * are successfully sent. + * @param req The request that was sent on the bus. + */ + void markInService(Packet * &pkt); + + /** + * Collect statistics and free resources of a satisfied request. + * @param req The request that has been satisfied. + * @param time The time when the request is satisfied. + */ + void handleResponse(Packet * &pkt, Tick time); + + /** + * Removes all outstanding requests for a given thread number. If a request + * has been sent to the bus, this function removes all of its targets. + * @param req->getThreadNum()ber The thread number of the requests to squash. + */ + void squash(int threadNum); + + /** + * Return the current number of outstanding misses. + * @return the number of outstanding misses. + */ + int getMisses() + { + return mq.getAllocatedTargets(); + } + + /** + * Searches for the supplied address in the miss queue. + * @param addr The address to look for. + * @param asid The address space id. + * @return The MSHR that contains the address, NULL if not found. + * @warning Currently only searches the miss queue. If non write allocate + * might need to search the write buffer for coherence. + */ + MSHR* findMSHR(Addr addr, int asid) const; + + /** + * Searches for the supplied address in the write buffer. + * @param addr The address to look for. + * @param asid The address space id. + * @param writes The list of writes that match the address. + * @return True if any writes are found + */ + bool findWrites(Addr addr, int asid, std::vector<MSHR*>& writes) const; + + /** + * Perform a writeback of dirty data to the given address. + * @param addr The address to write to. + * @param asid The address space id. + * @param xc The execution context of the address space. + * @param size The number of bytes to write. + * @param data The data to write, can be NULL. + * @param compressed True if the data is compressed. + */ + void doWriteback(Addr addr, int asid, + int size, uint8_t *data, bool compressed); + + /** + * Perform the given writeback request. + * @param req The writeback request. + */ + void doWriteback(Packet * &pkt); + + /** + * Returns true if there are outstanding requests. + * @return True if there are outstanding requests. + */ + bool havePending(); + + /** + * Add a target to the given MSHR. This assumes it is in the miss queue. + * @param mshr The mshr to add a target to. + * @param req The target to add. + */ + void addTarget(MSHR *mshr, Packet * &pkt) + { + mq.allocateTarget(mshr, pkt); + } + + /** + * Allocate a MSHR to hold a list of targets to a block involved in a copy. + * If the block is marked done then the MSHR already holds the data to + * fill the block. Otherwise the block needs to be fetched. + * @param addr The address to buffer. + * @param asid The address space ID. + * @return A pointer to the allocated MSHR. + */ + MSHR* allocateTargetList(Addr addr, int asid); + +}; + +#endif //__MISS_QUEUE_HH__ diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc new file mode 100644 index 000000000..db2f40c56 --- /dev/null +++ b/src/mem/cache/miss/mshr.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Dave Greene + */ + +/** + * @file + * Miss Status and Handling Register (MSHR) definitions. + */ + +#include <assert.h> +#include <string> +#include <vector> + +#include "mem/cache/miss/mshr.hh" +#include "sim/root.hh" // for curTick +#include "sim/host.hh" +#include "base/misc.hh" +#include "mem/cache/cache.hh" + +using namespace std; + +MSHR::MSHR() +{ + inService = false; + ntargets = 0; + threadNum = -1; +} + +void +MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, + Packet * &target) +{ + addr = _addr; + if (target) + { + //Have a request, just use it + pkt = new Packet(target->req, cmd, Packet::Broadcast, size); + pkt->time = curTick; + pkt->allocate(); + pkt->senderState = (Packet::SenderState *)this; + allocateTarget(target); + } + else + { + //need a request first + Request * req = new Request(); + req->setPhys(addr, size, 0); + //Thread context?? + pkt = new Packet(req, cmd, Packet::Broadcast, size); + pkt->time = curTick; + pkt->allocate(); + pkt->senderState = (Packet::SenderState *)this; + } +} + +// Since we aren't sure if data is being used, don't copy here. +/** + * @todo When we have a "global" data flag, might want to copy data here. + */ +void +MSHR::allocateAsBuffer(Packet * &target) +{ + addr = target->getAddr(); + asid = target->req->getAsid(); + threadNum = target->req->getThreadNum(); + pkt = new Packet(target->req, target->cmd, -1); + pkt->allocate(); + pkt->senderState = (Packet::SenderState*)this; + pkt->time = curTick; +} + +void +MSHR::deallocate() +{ + assert(targets.empty()); + assert(ntargets == 0); + pkt = NULL; + inService = false; + allocIter = NULL; + readyIter = NULL; +} + +/* + * Adds a target to an MSHR + */ +void +MSHR::allocateTarget(Packet * &target) +{ + //If we append an invalidate and we issued a read to the bus, + //but now have some pending writes, we need to move + //the invalidate to before the first non-read + if (inService && pkt->isRead() && target->isInvalidate()) { + std::list<Packet *> temp; + + while (!targets.empty()) { + if (!targets.front()->isRead()) break; + //Place on top of temp stack + temp.push_front(targets.front()); + //Remove from targets + targets.pop_front(); + } + + //Now that we have all the reads off until first non-read, we can + //place the invalidate on + targets.push_front(target); + + //Now we pop off the temp_stack and put them back + while (!temp.empty()) { + targets.push_front(temp.front()); + temp.pop_front(); + } + } + else { + targets.push_back(target); + } + + ++ntargets; + assert(targets.size() == ntargets); + /** + * @todo really prioritize the target commands. + */ + + if (!inService && target->isWrite()) { + pkt->cmd = Packet::WriteReq; + } +} + + + +void +MSHR::dump() +{ + ccprintf(cerr, + "inService: %d thread: %d\n" + "Addr: %x asid: %d ntargets %d\n" + "Targets:\n", + inService, threadNum, addr, asid, ntargets); + + TargetListIterator tar_it = targets.begin(); + for (int i = 0; i < ntargets; i++) { + assert(tar_it != targets.end()); + + ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n", + i, (*tar_it)->getAddr(), (*tar_it)->cmdToIndex()); + + tar_it++; + } + ccprintf(cerr, "\n"); +} + +MSHR::~MSHR() +{ + if (pkt) + pkt = NULL; +} diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh new file mode 100644 index 000000000..167aa26cd --- /dev/null +++ b/src/mem/cache/miss/mshr.hh @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Miss Status and Handling Register (MSHR) declaration. + */ + +#ifndef __MSHR_HH__ +#define __MSHR_HH__ + +#include "mem/packet.hh" +#include <list> +#include <deque> + +class MSHR; + +/** + * Miss Status and handling Register. This class keeps all the information + * needed to handle a cache miss including a list of target requests. + */ +class MSHR { + public: + /** Defines the Data structure of the MSHR targetlist. */ + typedef std::list<Packet *> TargetList; + /** Target list iterator. */ + typedef std::list<Packet *>::iterator TargetListIterator; + /** A list of MSHRs. */ + typedef std::list<MSHR *> List; + /** MSHR list iterator. */ + typedef List::iterator Iterator; + /** MSHR list const_iterator. */ + typedef List::const_iterator ConstIterator; + + /** Address of the miss. */ + Addr addr; + /** Adress space id of the miss. */ + short asid; + /** True if the request has been sent to the bus. */ + bool inService; + /** Thread number of the miss. */ + int threadNum; + /** The request that is forwarded to the next level of the hierarchy. */ + Packet * pkt; + /** The number of currently allocated targets. */ + short ntargets; + /** The original requesting command. */ + Packet::Command originalCmd; + /** Order number of assigned by the miss queue. */ + uint64_t order; + + /** + * Pointer to this MSHR on the ready list. + * @sa MissQueue, MSHRQueue::readyList + */ + Iterator readyIter; + /** + * Pointer to this MSHR on the allocated list. + * @sa MissQueue, MSHRQueue::allocatedList + */ + Iterator allocIter; + +private: + /** List of all requests that match the address */ + TargetList targets; + +public: + /** + * Allocate a miss to this MSHR. + * @param cmd The requesting command. + * @param addr The address of the miss. + * @param asid The address space id of the miss. + * @param size The number of bytes to request. + * @param req The original miss. + */ + void allocate(Packet::Command cmd, Addr addr, int asid, int size, + Packet * &pkt); + + /** + * Allocate this MSHR as a buffer for the given request. + * @param target The memory request to buffer. + */ + void allocateAsBuffer(Packet * &target); + + /** + * Mark this MSHR as free. + */ + void deallocate(); + + /** + * Add a request to the list of targets. + * @param target The target. + */ + void allocateTarget(Packet * &target); + + /** A simple constructor. */ + MSHR(); + /** A simple destructor. */ + ~MSHR(); + + /** + * Returns the current number of allocated targets. + * @return The current number of allocated targets. + */ + int getNumTargets() + { + return(ntargets); + } + + /** + * Returns a pointer to the target list. + * @return a pointer to the target list. + */ + TargetList* getTargetList() + { + return &targets; + } + + /** + * Returns a reference to the first target. + * @return A pointer to the first target. + */ + Packet * getTarget() + { + return targets.front(); + } + + /** + * Pop first target. + */ + void popTarget() + { + --ntargets; + targets.pop_front(); + } + + /** + * Returns true if there are targets left. + * @return true if there are targets + */ + bool hasTargets() + { + return !targets.empty(); + } + + /** + * Prints the contents of this MSHR to stderr. + */ + void dump(); +}; + +#endif //__MSHR_HH__ diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc new file mode 100644 index 000000000..6516a99f8 --- /dev/null +++ b/src/mem/cache/miss/mshr_queue.cc @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** @file + * Definition of the MSHRQueue. + */ + +#include "mem/cache/miss/mshr_queue.hh" +#include "sim/eventq.hh" + +using namespace std; + +MSHRQueue::MSHRQueue(int num_mshrs, int reserve) + : numMSHRs(num_mshrs + reserve - 1), numReserve(reserve) +{ + allocated = 0; + inServiceMSHRs = 0; + allocatedTargets = 0; + registers = new MSHR[numMSHRs]; + for (int i = 0; i < numMSHRs; ++i) { + freeList.push_back(®isters[i]); + } +} + +MSHRQueue::~MSHRQueue() +{ + delete [] registers; +} + +MSHR* +MSHRQueue::findMatch(Addr addr, int asid) const +{ + MSHR::ConstIterator i = allocatedList.begin(); + MSHR::ConstIterator end = allocatedList.end(); + for (; i != end; ++i) { + MSHR *mshr = *i; + if (mshr->addr == addr) { + return mshr; + } + } + return NULL; +} + +bool +MSHRQueue::findMatches(Addr addr, int asid, vector<MSHR*>& matches) const +{ + // Need an empty vector + assert(matches.empty()); + bool retval = false; + MSHR::ConstIterator i = allocatedList.begin(); + MSHR::ConstIterator end = allocatedList.end(); + for (; i != end; ++i) { + MSHR *mshr = *i; + if (mshr->addr == addr) { + retval = true; + matches.push_back(mshr); + } + } + return retval; + +} + +MSHR* +MSHRQueue::findPending(Packet * &pkt) const +{ + MSHR::ConstIterator i = pendingList.begin(); + MSHR::ConstIterator end = pendingList.end(); + for (; i != end; ++i) { + MSHR *mshr = *i; + if (mshr->addr < pkt->getAddr()) { + if (mshr->addr + mshr->pkt->getSize() > pkt->getAddr()) { + return mshr; + } + } else { + if (pkt->getAddr() + pkt->getSize() > mshr->addr) { + return mshr; + } + } + + //need to check destination address for copies. + //TEMP NOT DOING COPIES +#if 0 + if (mshr->pkt->cmd == Copy) { + Addr dest = mshr->pkt->dest; + if (dest < pkt->addr) { + if (dest + mshr->pkt->size > pkt->addr) { + return mshr; + } + } else { + if (pkt->addr + pkt->size > dest) { + return mshr; + } + } + } +#endif + } + return NULL; +} + +MSHR* +MSHRQueue::allocate(Packet * &pkt, int size) +{ + Addr aligned_addr = pkt->getAddr() & ~((Addr)size - 1); + MSHR *mshr = freeList.front(); + assert(mshr->getNumTargets() == 0); + freeList.pop_front(); + + if (!pkt->needsResponse()) { + mshr->allocateAsBuffer(pkt); + } else { + assert(size !=0); + mshr->allocate(pkt->cmd, aligned_addr, pkt->req->getAsid(), size, pkt); + allocatedTargets += 1; + } + mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); + mshr->readyIter = pendingList.insert(pendingList.end(), mshr); + + allocated += 1; + return mshr; +} + +MSHR* +MSHRQueue::allocateFetch(Addr addr, int asid, int size, Packet * &target) +{ + MSHR *mshr = freeList.front(); + assert(mshr->getNumTargets() == 0); + freeList.pop_front(); + mshr->allocate(Packet::ReadReq, addr, asid, size, target); + mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); + mshr->readyIter = pendingList.insert(pendingList.end(), mshr); + + allocated += 1; + return mshr; +} + +MSHR* +MSHRQueue::allocateTargetList(Addr addr, int asid, int size) +{ + MSHR *mshr = freeList.front(); + assert(mshr->getNumTargets() == 0); + freeList.pop_front(); + Packet * dummy; + mshr->allocate(Packet::ReadReq, addr, asid, size, dummy); + mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); + mshr->inService = true; + ++inServiceMSHRs; + ++allocated; + return mshr; +} + + +void +MSHRQueue::deallocate(MSHR* mshr) +{ + deallocateOne(mshr); +} + +MSHR::Iterator +MSHRQueue::deallocateOne(MSHR* mshr) +{ + MSHR::Iterator retval = allocatedList.erase(mshr->allocIter); + freeList.push_front(mshr); + allocated--; + allocatedTargets -= mshr->getNumTargets(); + if (mshr->inService) { + inServiceMSHRs--; + } else { + pendingList.erase(mshr->readyIter); + } + mshr->deallocate(); + return retval; +} + +void +MSHRQueue::moveToFront(MSHR *mshr) +{ + if (!mshr->inService) { + assert(mshr == *(mshr->readyIter)); + pendingList.erase(mshr->readyIter); + mshr->readyIter = pendingList.insert(pendingList.begin(), mshr); + } +} + +void +MSHRQueue::markInService(MSHR* mshr) +{ + //assert(mshr == pendingList.front()); + if (!mshr->pkt->needsResponse()) { + assert(mshr->getNumTargets() == 0); + deallocate(mshr); + return; + } + mshr->inService = true; + pendingList.erase(mshr->readyIter); + mshr->readyIter = NULL; + inServiceMSHRs += 1; + //pendingList.pop_front(); +} + +void +MSHRQueue::markPending(MSHR* mshr, Packet::Command cmd) +{ + assert(mshr->readyIter == NULL); + mshr->pkt->cmd = cmd; + mshr->pkt->flags &= ~SATISFIED; + mshr->inService = false; + --inServiceMSHRs; + /** + * @ todo might want to add rerequests to front of pending list for + * performance. + */ + mshr->readyIter = pendingList.insert(pendingList.end(), mshr); +} + +void +MSHRQueue::squash(int threadNum) +{ + MSHR::Iterator i = allocatedList.begin(); + MSHR::Iterator end = allocatedList.end(); + for (; i != end;) { + MSHR *mshr = *i; + if (mshr->threadNum == threadNum) { + while (mshr->hasTargets()) { + Packet * target = mshr->getTarget(); + mshr->popTarget(); + + assert(target->req->getThreadNum() == threadNum); + target = NULL; + } + assert(!mshr->hasTargets()); + assert(mshr->ntargets==0); + if (!mshr->inService) { + i = deallocateOne(mshr); + } else { + //mshr->pkt->flags &= ~CACHE_LINE_FILL; + ++i; + } + } else { + ++i; + } + } +} diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh new file mode 100644 index 000000000..a67f1b9a6 --- /dev/null +++ b/src/mem/cache/miss/mshr_queue.hh @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** @file + * Declaration of a structure to manage MSHRs. + */ + +#ifndef __MSHR_QUEUE_HH__ +#define __MSHR_QUEUE_HH__ + +#include <vector> +#include "mem/cache/miss/mshr.hh" + +/** + * A Class for maintaining a list of pending and allocated memory requests. + */ +class MSHRQueue { + private: + /** MSHR storage. */ + MSHR* registers; + /** Holds pointers to all allocated MSHRs. */ + MSHR::List allocatedList; + /** Holds pointers to MSHRs that haven't been sent to the bus. */ + MSHR::List pendingList; + /** Holds non allocated MSHRs. */ + MSHR::List freeList; + + // Parameters + /** + * The total number of MSHRs in this queue. This number is set as the + * number of MSHRs requested plus (numReserve - 1). This allows for + * the same number of effective MSHRs while still maintaining the reserve. + */ + const int numMSHRs; + + /** + * The number of MSHRs to hold in reserve. This is needed because copy + * operations can allocate upto 4 MSHRs at one time. + */ + const int numReserve; + + public: + /** The number of allocated MSHRs. */ + int allocated; + /** The number of MSHRs that have been forwarded to the bus. */ + int inServiceMSHRs; + /** The number of targets waiting for response. */ + int allocatedTargets; + + /** + * Create a queue with a given number of MSHRs. + * @param num_mshrs The number of MSHRs in this queue. + * @param reserve The minimum number of MSHRs needed to satisfy any access. + */ + MSHRQueue(int num_mshrs, int reserve = 1); + + /** Destructor */ + ~MSHRQueue(); + + /** + * Find the first MSHR that matches the provide address and asid. + * @param addr The address to find. + * @param asid The address space id. + * @return Pointer to the matching MSHR, null if not found. + */ + MSHR* findMatch(Addr addr, int asid) const; + + /** + * Find and return all the matching MSHRs in the provided vector. + * @param addr The address to find. + * @param asid The address space ID. + * @param matches The vector to return pointers to the matching MSHRs. + * @return True if any matches are found, false otherwise. + * @todo Typedef the vector?? + */ + bool findMatches(Addr addr, int asid, std::vector<MSHR*>& matches) const; + + /** + * Find any pending requests that overlap the given request. + * @param req The request to find. + * @return A pointer to the earliest matching MSHR. + */ + MSHR* findPending(Packet * &pkt) const; + + /** + * Allocates a new MSHR for the request and size. This places the request + * as the first target in the MSHR. + * @param req The request to handle. + * @param size The number in bytes to fetch from memory. + * @return The a pointer to the MSHR allocated. + * + * @pre There are free MSHRs. + */ + MSHR* allocate(Packet * &pkt, int size = 0); + + /** + * Allocate a read request for the given address, and places the given + * target on the target list. + * @param addr The address to fetch. + * @param asid The address space for the fetch. + * @param size The number of bytes to request. + * @param target The first target for the request. + * @return Pointer to the new MSHR. + */ + MSHR* allocateFetch(Addr addr, int asid, int size, Packet * &target); + + /** + * Allocate a target list for the given address. + * @param addr The address to fetch. + * @param asid The address space for the fetch. + * @param size The number of bytes to request. + * @return Pointer to the new MSHR. + */ + MSHR* allocateTargetList(Addr addr, int asid, int size); + + /** + * Removes the given MSHR from the queue. This places the MSHR on the + * free list. + * @param mshr + */ + void deallocate(MSHR* mshr); + + /** + * Allocates a target to the given MSHR. Used to keep track of the number + * of outstanding targets. + * @param mshr The MSHR to allocate the target to. + * @param req The target request. + */ + void allocateTarget(MSHR* mshr, Packet * &pkt) + { + mshr->allocateTarget(pkt); + allocatedTargets += 1; + } + + /** + * Remove a MSHR from the queue. Returns an iterator into the allocatedList + * for faster squash implementation. + * @param mshr The MSHR to remove. + * @return An iterator to the next entry in the allocatedList. + */ + MSHR::Iterator deallocateOne(MSHR* mshr); + + /** + * Moves the MSHR to the front of the pending list if it is not in service. + * @param mshr The mshr to move. + */ + void moveToFront(MSHR *mshr); + + /** + * Mark the given MSHR as in service. This removes the MSHR from the + * pendingList. Deallocates the MSHR if it does not expect a response. + * @param mshr The MSHR to mark in service. + */ + void markInService(MSHR* mshr); + + /** + * Mark an in service mshr as pending, used to resend a request. + * @param mshr The MSHR to resend. + * @param cmd The command to resend. + */ + void markPending(MSHR* mshr, Packet::Command cmd); + + /** + * Squash outstanding requests with the given thread number. If a request + * is in service, just squashes the targets. + * @param req->getThreadNum()ber The thread to squash. + */ + void squash(int threadNum); + + /** + * Returns true if the pending list is not empty. + * @return True if there are outstanding requests. + */ + bool havePending() const + { + return !pendingList.empty(); + } + + /** + * Returns true if there are no free MSHRs. + * @return True if this queue is full. + */ + bool isFull() const + { + return (allocated > numMSHRs - numReserve); + } + + /** + * Returns the request at the head of the pendingList. + * @return The next request to service. + */ + Packet * getReq() const + { + if (pendingList.empty()) { + return NULL; + } + MSHR* mshr = pendingList.front(); + return mshr->pkt; + } + + /** + * Returns the number of outstanding targets. + * @return the number of allocated targets. + */ + int getAllocatedTargets() const + { + return allocatedTargets; + } + +}; + +#endif //__MSHR_QUEUE_HH__ diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc new file mode 100644 index 000000000..897551989 --- /dev/null +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Hardware Prefetcher Definition. + */ + +#include "base/trace.hh" +#include "mem/cache/base_cache.hh" +#include "mem/cache/prefetch/base_prefetcher.hh" +#include "mem/request.hh" +#include <list> + +BasePrefetcher::BasePrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData) + :size(size), pageStop(pageStop), serialSquash(serialSquash), + cacheCheckPush(cacheCheckPush), only_data(onlyData) +{ +} + +void +BasePrefetcher::setCache(BaseCache *_cache) +{ + cache = _cache; + blkSize = cache->getBlockSize(); +} + +void +BasePrefetcher::regStats(const std::string &name) +{ + pfIdentified + .name(name + ".prefetcher.num_hwpf_identified") + .desc("number of hwpf identified") + ; + + pfMSHRHit + .name(name + ".prefetcher.num_hwpf_already_in_mshr") + .desc("number of hwpf that were already in mshr") + ; + + pfCacheHit + .name(name + ".prefetcher.num_hwpf_already_in_cache") + .desc("number of hwpf that were already in the cache") + ; + + pfBufferHit + .name(name + ".prefetcher.num_hwpf_already_in_prefetcher") + .desc("number of hwpf that were already in the prefetch queue") + ; + + pfRemovedFull + .name(name + ".prefetcher.num_hwpf_evicted") + .desc("number of hwpf removed due to no buffer left") + ; + + pfRemovedMSHR + .name(name + ".prefetcher.num_hwpf_removed_MSHR_hit") + .desc("number of hwpf removed because MSHR allocated") + ; + + pfIssued + .name(name + ".prefetcher.num_hwpf_issued") + .desc("number of hwpf issued") + ; + + pfSpanPage + .name(name + ".prefetcher.num_hwpf_span_page") + .desc("number of hwpf spanning a virtual page") + ; + + pfSquashed + .name(name + ".prefetcher.num_hwpf_squashed_from_miss") + .desc("number of hwpf that got squashed due to a miss aborting calculation time") + ; +} + +Packet * +BasePrefetcher::getPacket() +{ + DPRINTF(HWPrefetch, "%s:Requesting a hw_pf to issue\n", cache->name()); + + if (pf.empty()) { + DPRINTF(HWPrefetch, "%s:No HW_PF found\n", cache->name()); + return NULL; + } + + Packet * pkt; + bool keepTrying = false; + do { + pkt = *pf.begin(); + pf.pop_front(); + if (!cacheCheckPush) { + keepTrying = inCache(pkt); + } + if (pf.empty()) { + cache->clearMasterRequest(Request_PF); + if (keepTrying) return NULL; //None left, all were in cache + } + } while (keepTrying); + + pfIssued++; + return pkt; +} + +void +BasePrefetcher::handleMiss(Packet * &pkt, Tick time) +{ + if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && only_data)) + { + //Calculate the blk address + Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); + + //Check if miss is in pfq, if so remove it + std::list<Packet *>::iterator iter = inPrefetch(blkAddr); + if (iter != pf.end()) { + DPRINTF(HWPrefetch, "%s:Saw a miss to a queued prefetch, removing it\n", cache->name()); + pfRemovedMSHR++; + pf.erase(iter); + if (pf.empty()) + cache->clearMasterRequest(Request_PF); + } + + //Remove anything in queue with delay older than time + //since everything is inserted in time order, start from end + //and work until pf.empty() or time is earlier + //This is done to emulate Aborting the previous work on a new miss + //Needed for serial calculators like GHB + if (serialSquash) { + iter = pf.end(); + iter--; + while (!pf.empty() && ((*iter)->time >= time)) { + pfSquashed++; + pf.pop_back(); + iter--; + } + if (pf.empty()) + cache->clearMasterRequest(Request_PF); + } + + + std::list<Addr> addresses; + std::list<Tick> delays; + calculatePrefetch(pkt, addresses, delays); + + std::list<Addr>::iterator addr = addresses.begin(); + std::list<Tick>::iterator delay = delays.begin(); + while (addr != addresses.end()) + { + DPRINTF(HWPrefetch, "%s:Found a pf canidate, inserting into prefetch queue\n", cache->name()); + //temp calc this here... + pfIdentified++; + //create a prefetch memreq + Request * prefetchReq = new Request(*addr, blkSize, 0); + Packet * prefetch; + prefetch = new Packet(prefetchReq, Packet::HardPFReq, -1); + prefetch->allocate(); + prefetch->req->setThreadContext(pkt->req->getCpuNum(), + pkt->req->getThreadNum()); + + prefetch->time = time + (*delay); //@todo ADD LATENCY HERE + //... initialize + + //Check if it is already in the cache + if (cacheCheckPush) { + if (inCache(prefetch)) { + addr++; + delay++; + continue; + } + } + + //Check if it is already in the miss_queue + if (inMissQueue(prefetch->getAddr(), prefetch->req->getAsid())) { + addr++; + delay++; + continue; + } + + //Check if it is already in the pf buffer + if (inPrefetch(prefetch->getAddr()) != pf.end()) { + pfBufferHit++; + addr++; + delay++; + continue; + } + + //We just remove the head if we are full + if (pf.size() == size) + { + DPRINTF(HWPrefetch, "%s:Inserting into prefetch queue, it was full removing oldest\n", cache->name()); + pfRemovedFull++; + pf.pop_front(); + } + + pf.push_back(prefetch); + prefetch->flags |= CACHE_LINE_FILL; + + //Make sure to request the bus, with proper delay + cache->setMasterRequest(Request_PF, prefetch->time); + + //Increment through the list + addr++; + delay++; + } + } +} + +std::list<Packet *>::iterator +BasePrefetcher::inPrefetch(Addr address) +{ + //Guaranteed to only be one match, we always check before inserting + std::list<Packet *>::iterator iter; + for (iter=pf.begin(); iter != pf.end(); iter++) { + if (((*iter)->getAddr() & ~(Addr)(blkSize-1)) == address) { + return iter; + } + } + return pf.end(); +} + + diff --git a/src/mem/cache/prefetch/base_prefetcher.hh b/src/mem/cache/prefetch/base_prefetcher.hh new file mode 100644 index 000000000..3e4fc89d1 --- /dev/null +++ b/src/mem/cache/prefetch/base_prefetcher.hh @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Miss and writeback queue declarations. + */ + +#ifndef __MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__ + +#include "mem/packet.hh" +#include <list> + +class BaseCache; +class BasePrefetcher +{ + protected: + + /** The Prefetch Queue. */ + std::list<Packet *> pf; + + // PARAMETERS + + /** The number of MSHRs in the Prefetch Queue. */ + const int size; + + /** Pointr to the parent cache. */ + BaseCache* cache; + + /** The block size of the parent cache. */ + int blkSize; + + /** Do we prefetch across page boundaries. */ + bool pageStop; + + /** Do we remove prefetches with later times than a new miss.*/ + bool serialSquash; + + /** Do we check if it is in the cache when inserting into buffer, + or removing.*/ + bool cacheCheckPush; + + /** Do we prefetch on only data reads, or on inst reads as well. */ + bool only_data; + + public: + + Stats::Scalar<> pfIdentified; + Stats::Scalar<> pfMSHRHit; + Stats::Scalar<> pfCacheHit; + Stats::Scalar<> pfBufferHit; + Stats::Scalar<> pfRemovedFull; + Stats::Scalar<> pfRemovedMSHR; + Stats::Scalar<> pfIssued; + Stats::Scalar<> pfSpanPage; + Stats::Scalar<> pfSquashed; + + void regStats(const std::string &name); + + public: + BasePrefetcher(int numMSHRS, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData); + + virtual ~BasePrefetcher() {} + + void setCache(BaseCache *_cache); + + void handleMiss(Packet * &pkt, Tick time); + + Packet * getPacket(); + + bool havePending() + { + return !pf.empty(); + } + + virtual void calculatePrefetch(Packet * &pkt, + std::list<Addr> &addresses, + std::list<Tick> &delays) = 0; + + virtual bool inCache(Packet * &pkt) = 0; + + virtual bool inMissQueue(Addr address, int asid) = 0; + + std::list<Packet *>::iterator inPrefetch(Addr address); +}; + + +#endif //__MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/ghb_prefetcher.cc b/src/mem/cache/prefetch/ghb_prefetcher.cc new file mode 100644 index 000000000..247ec6e8b --- /dev/null +++ b/src/mem/cache/prefetch/ghb_prefetcher.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + * Steve Reinhardt + */ + +/** + * @file + * GHB Prefetcher template instantiations. + */ + +#include "mem/cache/tags/cache_tags.hh" + +#include "mem/cache/tags/lru.hh" + +#include "base/compression/null_compression.hh" + +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +#include "mem/cache/prefetch/ghb_prefetcher.hh" + +// Template Instantiations +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template class GHBPrefetcher<CacheTags<LRU,NullCompression>, MissQueue>; +template class GHBPrefetcher<CacheTags<LRU,NullCompression>, BlockingBuffer>; + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/prefetch/ghb_prefetcher.hh b/src/mem/cache/prefetch/ghb_prefetcher.hh new file mode 100644 index 000000000..c22b763d1 --- /dev/null +++ b/src/mem/cache/prefetch/ghb_prefetcher.hh @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Describes a ghb prefetcher based on template policies. + */ + +#ifndef __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__ + +#include "base/misc.hh" // fatal, panic, and warn + +#include "mem/cache/prefetch/prefetcher.hh" + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template <class TagStore, class Buffering> +class GHBPrefetcher : public Prefetcher<TagStore, Buffering> +{ + protected: + + Buffering* mq; + TagStore* tags; + + Addr second_last_miss_addr[64/*MAX_CPUS*/]; + Addr last_miss_addr[64/*MAX_CPUS*/]; + + Tick latency; + int degree; + bool useCPUId; + + public: + + GHBPrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData, + Tick latency, int degree, bool useCPUId) + :Prefetcher<TagStore, Buffering>(size, pageStop, serialSquash, + cacheCheckPush, onlyData), + latency(latency), degree(degree), useCPUId(useCPUId) + { + } + + ~GHBPrefetcher() {} + + void calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses, + std::list<Tick> &delays) + { + Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1); + int cpuID = pkt->req->getCpuNum(); + if (!useCPUId) cpuID = 0; + + + int new_stride = blkAddr - last_miss_addr[cpuID]; + int old_stride = last_miss_addr[cpuID] - + second_last_miss_addr[cpuID]; + + second_last_miss_addr[cpuID] = last_miss_addr[cpuID]; + last_miss_addr[cpuID] = blkAddr; + + if (new_stride == old_stride) { + for (int d=1; d <= degree; d++) { + Addr newAddr = blkAddr + d * new_stride; + if (this->pageStop && + (blkAddr & ~(TheISA::VMPageSize - 1)) != + (newAddr & ~(TheISA::VMPageSize - 1))) + { + //Spanned the page, so now stop + this->pfSpanPage += degree - d + 1; + return; + } + else + { + addresses.push_back(newAddr); + delays.push_back(latency); + } + } + } + } +}; + +#endif // __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/stride_prefetcher.cc b/src/mem/cache/prefetch/stride_prefetcher.cc new file mode 100644 index 000000000..93a096468 --- /dev/null +++ b/src/mem/cache/prefetch/stride_prefetcher.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + * Steve Reinhardt + */ + +/** + * @file + * Stride Prefetcher template instantiations. + */ + +#include "mem/cache/tags/cache_tags.hh" + +#include "mem/cache/tags/lru.hh" + +#include "base/compression/null_compression.hh" + +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +#include "mem/cache/prefetch/stride_prefetcher.hh" + +// Template Instantiations +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template class StridePrefetcher<CacheTags<LRU,NullCompression>, MissQueue>; +template class StridePrefetcher<CacheTags<LRU,NullCompression>, BlockingBuffer>; + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/prefetch/stride_prefetcher.hh b/src/mem/cache/prefetch/stride_prefetcher.hh new file mode 100644 index 000000000..4a8ee7de4 --- /dev/null +++ b/src/mem/cache/prefetch/stride_prefetcher.hh @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Describes a strided prefetcher based on template policies. + */ + +#ifndef __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ + +#include "base/misc.hh" // fatal, panic, and warn + +#include "mem/cache/prefetch/prefetcher.hh" + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template <class TagStore, class Buffering> +class StridePrefetcher : public Prefetcher<TagStore, Buffering> +{ + protected: + + Buffering* mq; + TagStore* tags; + + class strideEntry + { + public: + Addr IAddr; + Addr MAddr; + int stride; + int64_t confidence; + +/* bool operator < (strideEntry a,strideEntry b) + { + if (a.confidence == b.confidence) { + return true; //?????? + } + else return a.confidence < b.confidence; + }*/ + }; + Addr* lastMissAddr[64/*MAX_CPUS*/]; + + std::list<strideEntry*> table[64/*MAX_CPUS*/]; + Tick latency; + int degree; + bool useCPUId; + + + public: + + StridePrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData, + Tick latency, int degree, bool useCPUId) + :Prefetcher<TagStore, Buffering>(size, pageStop, serialSquash, + cacheCheckPush, onlyData), + latency(latency), degree(degree), useCPUId(useCPUId) + { + } + + ~StridePrefetcher() {} + + void calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses, + std::list<Tick> &delays) + { +// Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); + int cpuID = pkt->req->getCpuNum(); + if (!useCPUId) cpuID = 0; + + /* Scan Table for IAddr Match */ +/* std::list<strideEntry*>::iterator iter; + for (iter=table[cpuID].begin(); + iter !=table[cpuID].end(); + iter++) { + if ((*iter)->IAddr == pkt->pc) break; + } + + if (iter != table[cpuID].end()) { + //Hit in table + + int newStride = blkAddr - (*iter)->MAddr; + if (newStride == (*iter)->stride) { + (*iter)->confidence++; + } + else { + (*iter)->stride = newStride; + (*iter)->confidence--; + } + + (*iter)->MAddr = blkAddr; + + for (int d=1; d <= degree; d++) { + Addr newAddr = blkAddr + d * newStride; + if (this->pageStop && + (blkAddr & ~(TheISA::VMPageSize - 1)) != + (newAddr & ~(TheISA::VMPageSize - 1))) + { + //Spanned the page, so now stop + this->pfSpanPage += degree - d + 1; + return; + } + else + { + addresses.push_back(newAddr); + delays.push_back(latency); + } + } + } + else { + //Miss in table + //Find lowest confidence and replace + + } +*/ } +}; + +#endif // __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/tagged_prefetcher.hh b/src/mem/cache/prefetch/tagged_prefetcher.hh new file mode 100644 index 000000000..17f500dd8 --- /dev/null +++ b/src/mem/cache/prefetch/tagged_prefetcher.hh @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Describes a tagged prefetcher based on template policies. + */ + +#ifndef __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__ + +#include "mem/cache/prefetch/prefetcher.hh" + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template <class TagStore, class Buffering> +class TaggedPrefetcher : public Prefetcher<TagStore, Buffering> +{ + protected: + + Buffering* mq; + TagStore* tags; + + Tick latency; + int degree; + + public: + + TaggedPrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData, + Tick latency, int degree); + + ~TaggedPrefetcher() {} + + void calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses, + std::list<Tick> &delays); +}; + +#endif // __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh index 7bdabbe14..db5c94820 100644 --- a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh +++ b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh @@ -49,10 +49,10 @@ TaggedPrefetcher(int size, bool pageStop, bool serialSquash, template <class TagStore, class Buffering> void TaggedPrefetcher<TagStore, Buffering>:: -calculatePrefetch(MemReqPtr &req, std::list<Addr> &addresses, +calculatePrefetch(Packet * &pkt, std::list<Addr> &addresses, std::list<Tick> &delays) { - Addr blkAddr = req->paddr & ~(Addr)(this->blkSize-1); + Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1); for (int d=1; d <= degree; d++) { Addr newAddr = blkAddr + d*(this->blkSize); diff --git a/src/mem/cache/tags/base_tags.cc b/src/mem/cache/tags/base_tags.cc new file mode 100644 index 000000000..153737300 --- /dev/null +++ b/src/mem/cache/tags/base_tags.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + */ + +/** + * @file + * Definitions of BaseTags. + */ + +#include "mem/cache/tags/base_tags.hh" + +#include "mem/cache/base_cache.hh" +#include "cpu/smt.hh" //maxThreadsPerCPU +#include "sim/sim_exit.hh" + +using namespace std; + +void +BaseTags::setCache(BaseCache *_cache) +{ + cache = _cache; + objName = cache->name(); +} + +void +BaseTags::regStats(const string &name) +{ + using namespace Stats; + replacements + .init(maxThreadsPerCPU) + .name(name + ".replacements") + .desc("number of replacements") + .flags(total) + ; + + tagsInUse + .name(name + ".tagsinuse") + .desc("Cycle average of tags in use") + ; + + totalRefs + .name(name + ".total_refs") + .desc("Total number of references to valid blocks.") + ; + + sampledRefs + .name(name + ".sampled_refs") + .desc("Sample count of references to valid blocks.") + ; + + avgRefs + .name(name + ".avg_refs") + .desc("Average number of references to valid blocks.") + ; + + avgRefs = totalRefs/sampledRefs; + + warmupCycle + .name(name + ".warmup_cycle") + .desc("Cycle when the warmup percentage was hit.") + ; + + registerExitCallback(new BaseTagsCallback(this)); +} diff --git a/src/mem/cache/tags/base_tags.hh b/src/mem/cache/tags/base_tags.hh new file mode 100644 index 000000000..b7b0c7ef0 --- /dev/null +++ b/src/mem/cache/tags/base_tags.hh @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + */ + +/** + * @file + * Declaration of a common base class for cache tagstore objects. + */ + +#ifndef __BASE_TAGS_HH__ +#define __BASE_TAGS_HH__ + +#include <string> +#include "base/statistics.hh" +#include "base/callback.hh" + +class BaseCache; + +/** + * A common base class of Cache tagstore objects. + */ +class BaseTags +{ + protected: + /** Pointer to the parent cache. */ + BaseCache *cache; + + /** Local copy of the parent cache name. Used for DPRINTF. */ + std::string objName; + + /** + * The number of tags that need to be touched to meet the warmup + * percentage. + */ + int warmupBound; + /** Marked true when the cache is warmed up. */ + bool warmedUp; + + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + + /** Number of replacements of valid blocks per thread. */ + Stats::Vector<> replacements; + /** Per cycle average of the number of tags that hold valid data. */ + Stats::Average<> tagsInUse; + + /** The total number of references to a block before it is replaced. */ + Stats::Scalar<> totalRefs; + + /** + * The number of reference counts sampled. This is different from + * replacements because we sample all the valid blocks when the simulator + * exits. + */ + Stats::Scalar<> sampledRefs; + + /** + * Average number of references to a block before is was replaced. + * @todo This should change to an average stat once we have them. + */ + Stats::Formula avgRefs; + + /** The cycle that the warmup percentage was hit. */ + Stats::Scalar<> warmupCycle; + /** + * @} + */ + + public: + + /** + * Destructor. + */ + virtual ~BaseTags() {} + + /** + * Set the parent cache back pointer. Also copies the cache name to + * objName. + * @param _cache Pointer to parent cache. + */ + void setCache(BaseCache *_cache); + + /** + * Return the parent cache name. + * @return the parent cache name. + */ + const std::string &name() const + { + return objName; + } + + /** + * Register local statistics. + * @param name The name to preceed each statistic name. + */ + void regStats(const std::string &name); + + /** + * Average in the reference count for valid blocks when the simulation + * exits. + */ + virtual void cleanupRefs() {} +}; + +class BaseTagsCallback : public Callback +{ + BaseTags *tags; + public: + BaseTagsCallback(BaseTags *t) : tags(t) {} + virtual void process() { tags->cleanupRefs(); }; +}; + +#endif //__BASE_TAGS_HH__ diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc new file mode 100644 index 000000000..82d2c410d --- /dev/null +++ b/src/mem/cache/tags/fa_lru.cc @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions a fully associative LRU tagstore. + */ + +#include <sstream> + +#include <assert.h> + +#include "mem/cache/tags/fa_lru.hh" +#include "base/intmath.hh" +#include "base/misc.hh" + +using namespace std; + +FALRU::FALRU(int _blkSize, int _size, int hit_latency) + : blkSize(_blkSize), size(_size), + numBlks(size/blkSize), hitLatency(hit_latency) +{ + if (!isPowerOf2(blkSize)) + fatal("cache block size (in bytes) `%d' must be a power of two", + blkSize); + if (!(hitLatency > 0)) + fatal("Access latency in cycles must be at least one cycle"); + if (!isPowerOf2(size)) + fatal("Cache Size must be power of 2 for now"); + + // Track all cache sizes from 128K up by powers of 2 + numCaches = floorLog2(size) - 17; + if (numCaches >0){ + cacheBoundaries = new FALRUBlk *[numCaches]; + cacheMask = (1 << numCaches) - 1; + } else { + cacheMask = 0; + } + + warmedUp = false; + warmupBound = size/blkSize; + + blks = new FALRUBlk[numBlks]; + head = &(blks[0]); + tail = &(blks[numBlks-1]); + + head->prev = NULL; + head->next = &(blks[1]); + head->inCache = cacheMask; + + tail->prev = &(blks[numBlks-2]); + tail->next = NULL; + tail->inCache = 0; + + int index = (1 << 17) / blkSize; + int j = 0; + int flags = cacheMask; + for (int i = 1; i < numBlks-1; i++) { + blks[i].inCache = flags; + if (i == index - 1){ + cacheBoundaries[j] = &(blks[i]); + flags &= ~ (1<<j); + ++j; + index = index << 1; + } + blks[i].prev = &(blks[i-1]); + blks[i].next = &(blks[i+1]); + blks[i].isTouched = false; + } + assert(j == numCaches); + assert(index == numBlks); + //assert(check()); +} + +void +FALRU::regStats(const string &name) +{ + using namespace Stats; + BaseTags::regStats(name); + hits + .init(numCaches+1) + .name(name + ".falru_hits") + .desc("The number of hits in each cache size.") + ; + misses + .init(numCaches+1) + .name(name + ".falru_misses") + .desc("The number of misses in each cache size.") + ; + accesses + .name(name + ".falru_accesses") + .desc("The number of accesses to the FA LRU cache.") + ; + + for (int i = 0; i < numCaches+1; ++i) { + stringstream size_str; + if (i < 3){ + size_str << (1<<(i+7)) <<"K"; + } else { + size_str << (1<<(i-3)) <<"M"; + } + + hits.subname(i, size_str.str()); + hits.subdesc(i, "Hits in a " + size_str.str() +" cache"); + misses.subname(i, size_str.str()); + misses.subdesc(i, "Misses in a " + size_str.str() +" cache"); + } +} + +FALRUBlk * +FALRU::hashLookup(Addr addr) const +{ + tagIterator iter = tagHash.find(addr); + if (iter != tagHash.end()) { + return (*iter).second; + } + return NULL; +} + +bool +FALRU::probe(int asid, Addr addr) const +{ + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = hashLookup(blkAddr); + return blk && blk->tag == blkAddr && blk->isValid(); +} + +void +FALRU::invalidateBlk(int asid, Addr addr) +{ + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = (*tagHash.find(blkAddr)).second; + if (blk) { + assert(blk->tag == blkAddr); + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + } +} + +FALRUBlk* +FALRU::findBlock(Addr addr, int asid, int &lat, int *inCache) +{ + accesses++; + int tmp_in_cache = 0; + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = hashLookup(blkAddr); + + if (blk && blk->isValid()) { + assert(blk->tag == blkAddr); + tmp_in_cache = blk->inCache; + for (int i = 0; i < numCaches; i++) { + if (1<<i & blk->inCache) { + hits[i]++; + } else { + misses[i]++; + } + } + hits[numCaches]++; + if (blk != head){ + moveToHead(blk); + } + } else { + blk = NULL; + for (int i = 0; i < numCaches+1; ++i) { + misses[i]++; + } + } + if (inCache) { + *inCache = tmp_in_cache; + } + + lat = hitLatency; + //assert(check()); + return blk; +} + +FALRUBlk* +FALRU::findBlock(Packet * &pkt, int &lat, int *inCache) +{ + Addr addr = pkt->getAddr(); + + accesses++; + int tmp_in_cache = 0; + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = hashLookup(blkAddr); + + if (blk && blk->isValid()) { + assert(blk->tag == blkAddr); + tmp_in_cache = blk->inCache; + for (int i = 0; i < numCaches; i++) { + if (1<<i & blk->inCache) { + hits[i]++; + } else { + misses[i]++; + } + } + hits[numCaches]++; + if (blk != head){ + moveToHead(blk); + } + } else { + blk = NULL; + for (int i = 0; i < numCaches+1; ++i) { + misses[i]++; + } + } + if (inCache) { + *inCache = tmp_in_cache; + } + + lat = hitLatency; + //assert(check()); + return blk; +} + +FALRUBlk* +FALRU::findBlock(Addr addr, int asid) const +{ + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = hashLookup(blkAddr); + + if (blk && blk->isValid()) { + assert(blk->tag == blkAddr); + } else { + blk = NULL; + } + return blk; +} + +FALRUBlk* +FALRU::findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks) +{ + FALRUBlk * blk = tail; + assert(blk->inCache == 0); + moveToHead(blk); + tagHash.erase(blk->tag); + tagHash[blkAlign(pkt->getAddr())] = blk; + if (blk->isValid()) { + replacements[0]++; + } else { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + //assert(check()); + return blk; +} + +void +FALRU::moveToHead(FALRUBlk *blk) +{ + int updateMask = blk->inCache ^ cacheMask; + for (int i = 0; i < numCaches; i++){ + if ((1<<i) & updateMask) { + cacheBoundaries[i]->inCache &= ~(1<<i); + cacheBoundaries[i] = cacheBoundaries[i]->prev; + } else if (cacheBoundaries[i] == blk) { + cacheBoundaries[i] = blk->prev; + } + } + blk->inCache = cacheMask; + if (blk != head) { + if (blk == tail){ + assert(blk->next == NULL); + tail = blk->prev; + tail->next = NULL; + } else { + blk->prev->next = blk->next; + blk->next->prev = blk->prev; + } + blk->next = head; + blk->prev = NULL; + head->prev = blk; + head = blk; + } +} + +bool +FALRU::check() +{ + FALRUBlk* blk = head; + int size = 0; + int boundary = 1<<17; + int j = 0; + int flags = cacheMask; + while (blk) { + size += blkSize; + if (blk->inCache != flags) { + return false; + } + if (size == boundary && blk != tail) { + if (cacheBoundaries[j] != blk) { + return false; + } + flags &=~(1 << j); + boundary = boundary<<1; + ++j; + } + blk = blk->next; + } + return true; +} diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh new file mode 100644 index 000000000..566e36c27 --- /dev/null +++ b/src/mem/cache/tags/fa_lru.hh @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a fully associative LRU tag store. + */ + +#ifndef __FA_LRU_HH__ +#define __FA_LRU_HH__ + +#include <list> + +#include "mem/cache/cache_blk.hh" +#include "mem/packet.hh" +#include "base/hashmap.hh" +#include "mem/cache/tags/base_tags.hh" + +/** + * A fully associative cache block. + */ +class FALRUBlk : public CacheBlk +{ +public: + /** The previous block in LRU order. */ + FALRUBlk *prev; + /** The next block in LRU order. */ + FALRUBlk *next; + /** Has this block been touched? */ + bool isTouched; + + /** + * A bit mask of the sizes of cache that this block is resident in. + * Each bit represents a power of 2 in MB size cache. + * If bit 0 is set, this block is in a 1MB cache + * If bit 2 is set, this block is in a 4MB cache, etc. + * There is one bit for each cache smaller than the full size (default + * 16MB). + */ + int inCache; +}; + +/** + * A fully associative LRU cache. Keeps statistics for accesses to a number of + * cache sizes at once. + */ +class FALRU : public BaseTags +{ + public: + /** Typedef the block type used in this class. */ + typedef FALRUBlk BlkType; + /** Typedef a list of pointers to the local block type. */ + typedef std::list<FALRUBlk*> BlkList; + protected: + /** The block size of the cache. */ + const int blkSize; + /** The size of the cache. */ + const int size; + /** The number of blocks in the cache. */ + const int numBlks; // calculated internally + /** The hit latency of the cache. */ + const int hitLatency; + + /** Array of pointers to blocks at the cache size boundaries. */ + FALRUBlk **cacheBoundaries; + /** A mask for the FALRUBlk::inCache bits. */ + int cacheMask; + /** The number of different size caches being tracked. */ + int numCaches; + + /** The cache blocks. */ + FALRUBlk *blks; + + /** The MRU block. */ + FALRUBlk *head; + /** The LRU block. */ + FALRUBlk *tail; + + /** Hash table type mapping addresses to cache block pointers. */ + typedef m5::hash_map<Addr, FALRUBlk *, m5::hash<Addr> > hash_t; + /** Iterator into the address hash table. */ + typedef hash_t::const_iterator tagIterator; + + /** The address hash table. */ + hash_t tagHash; + + /** + * Find the cache block for the given address. + * @param addr The address to find. + * @return The cache block of the address, if any. + */ + FALRUBlk * hashLookup(Addr addr) const; + + /** + * Move a cache block to the MRU position. + * @param blk The block to promote. + */ + void moveToHead(FALRUBlk *blk); + + /** + * Check to make sure all the cache boundaries are still where they should + * be. Used for debugging. + * @return True if everything is correct. + */ + bool check(); + + /** + * @defgroup FALRUStats Fully Associative LRU specific statistics + * The FA lru stack lets us track multiple cache sizes at once. These + * statistics track the hits and misses for different cache sizes. + * @{ + */ + + /** Hits in each cache size >= 128K. */ + Stats::Vector<> hits; + /** Misses in each cache size >= 128K. */ + Stats::Vector<> misses; + /** Total number of accesses. */ + Stats::Scalar<> accesses; + + /** + * @} + */ + +public: + /** + * Construct and initialize this cache tagstore. + * @param blkSize The block size of the cache. + * @param size The size of the cache. + * @param hit_latency The hit latency of the cache. + */ + FALRU(int blkSize, int size, int hit_latency); + + /** + * Register the stats for this object. + * @param name The name to prepend to the stats name. + */ + void regStats(const std::string &name); + + /** + * Return true if the address is found in the cache. + * @param asid The address space ID. + * @param addr The address to look for. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the cache block that contains the given addr. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Find the block in the cache and update the replacement data. Returns + * the access latency and the in cache flags as a side effect + * @param addr The address to look for. + * @param asid The address space ID. + * @param lat The latency of the access. + * @param inCache The FALRUBlk::inCache flags. + * @return Pointer to the cache block. + */ + FALRUBlk* findBlock(Addr addr, int asid, int &lat, int *inCache = 0); + + /** + * Find the block in the cache and update the replacement data. Returns + * the access latency and the in cache flags as a side effect + * @param req The req whose block to find + * @param lat The latency of the access. + * @param inCache The FALRUBlk::inCache flags. + * @return Pointer to the cache block. + */ + FALRUBlk* findBlock(Packet * &pkt, int &lat, int *inCache = 0); + + /** + * Find the block in the cache, do not update the replacement data. + * @param addr The address to look for. + * @param asid The address space ID. + * @return Pointer to the cache block. + */ + FALRUBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + FALRUBlk* findReplacement(Packet * &pkt, PacketList & writebacks, + BlkList &compress_blocks); + + /** + * Return the hit latency of this cache. + * @return The hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Return the block size of this cache. + * @return The block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size of this cache, always the block size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The aligned address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)(blkSize-1)); + } + + /** + * Generate the tag from the addres. For fully associative this is just the + * block address. + * @param addr The address to get the tag from. + * @param blk ignored here + * @return The tag. + */ + Addr extractTag(Addr addr, FALRUBlk *blk) const + { + return blkAlign(addr); + } + + /** + * Return the set of an address. Only one set in a fully associative cache. + * @param addr The address to get the set from. + * @return 0. + */ + int extractSet(Addr addr) const + { + return 0; + } + + /** + * Calculate the block offset of an address. + * @param addr the address to get the offset of. + * @return the block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & (Addr)(blkSize-1)); + } + + /** + * Regenerate the block address from the tag and the set. + * @param tag The tag of the block. + * @param set The set the block belongs to. + * @return the block address. + */ + Addr regenerateBlkAddr(Addr tag, int set) const + { + return (tag); + } + + /** + * Read the data out of the internal storage of a cache block. FALRU + * currently doesn't support data storage. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The data from the cache block. + */ + void readData(FALRUBlk *blk, uint8_t *data) + { + } + + /** + * Write data into the internal storage of a cache block. FALRU + * currently doesn't support data storage. + * @param blk The cache block to be written. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(FALRUBlk *blk, uint8_t *data, int size, + PacketList &writebacks) + { + } + + /** + * Unimplemented. Perform a cache block copy from block aligned addresses. + * @param source The block aligned source address. + * @param dest The block aligned destination adddress. + * @param asid The address space ID. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) + { + } + + /** + * Unimplemented. + */ + void fixCopy(Packet * &pkt, PacketList &writebacks) + { + } + +}; + +#endif diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc new file mode 100644 index 000000000..847fabc88 --- /dev/null +++ b/src/mem/cache/tags/iic.cc @@ -0,0 +1,880 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions of the Indirect Index Cache tagstore. + */ + +#include <algorithm> +#include <string> +#include <vector> + +#include <math.h> + +#include "mem/cache/base_cache.hh" +#include "mem/cache/tags/iic.hh" +#include "base/intmath.hh" +#include "sim/root.hh" // for curTick + +#include "base/trace.hh" // for DPRINTF + + +using namespace std; + +/** Track the number of accesses to each cache set. */ +#define PROFILE_IIC 1 + +IIC::IIC(IIC::Params ¶ms) : + hashSets(params.numSets), blkSize(params.blkSize), assoc(params.assoc), + hitLatency(params.hitLatency), subSize(params.subblockSize), + numSub(blkSize/subSize), + trivialSize((floorLog2(params.size/subSize)*numSub)/8), + tagShift(floorLog2(blkSize)), blkMask(blkSize - 1), + subShift(floorLog2(subSize)), subMask(numSub - 1), + hashDelay(params.hashDelay), + numBlocks(params.size/subSize), + numTags(hashSets * assoc + params.size/blkSize -1), + numSecondary(params.size/blkSize), + tagNull(numTags), + primaryBound(hashSets * assoc) +{ + int i; + + // Check parameters + if (blkSize < 4 || !isPowerOf2(blkSize)) { + fatal("Block size must be at least 4 and a power of 2"); + } + if (hashSets <= 0 || !isPowerOf2(hashSets)) { + fatal("# of hashsets must be non-zero and a power of 2"); + } + if (assoc <= 0) { + fatal("associativity must be greater than zero"); + } + if (hitLatency <= 0) { + fatal("access latency must be greater than zero"); + } + if (numSub*subSize != blkSize) { + fatal("blocksize must be evenly divisible by subblock size"); + } + + // debug stuff + freeSecond = numSecondary; + + warmedUp = false; + warmupBound = params.size/blkSize; + + // Replacement Policy Initialization + repl = params.rp; + repl->setIIC(this); + + //last_miss_time = 0 + + // allocate data reference counters + dataReferenceCount = new int[numBlocks]; + memset(dataReferenceCount, 0, numBlocks*sizeof(int)); + + // Allocate storage for both internal data and block fast access data. + // We allocate it as one large chunk to reduce overhead and to make + // deletion easier. + int data_index = 0; + dataStore = new uint8_t[(numBlocks + numTags) * blkSize]; + dataBlks = new uint8_t*[numBlocks]; + for (i = 0; i < numBlocks; ++i) { + dataBlks[i] = &dataStore[data_index]; + freeDataBlock(i); + data_index += subSize; + } + + assert(data_index == numBlocks * subSize); + + // allocate and init tag store + tagStore = new IICTag[numTags]; + + int blkIndex = 0; + // allocate and init sets + sets = new IICSet[hashSets]; + for (i = 0; i < hashSets; ++i) { + sets[i].assoc = assoc; + sets[i].tags = new IICTag*[assoc]; + sets[i].chain_ptr = tagNull; + + for (int j = 0; j < assoc; ++j) { + IICTag *tag = &tagStore[blkIndex++]; + tag->chain_ptr = tagNull; + tag->data_ptr.resize(numSub); + tag->size = blkSize; + tag->trivialData = new uint8_t[trivialSize]; + tag->numData = 0; + sets[i].tags[j] = tag; + tag->set = i; + tag->data = &dataStore[data_index]; + data_index += blkSize; + } + } + + assert(blkIndex == primaryBound); + + for (i = primaryBound; i < tagNull; i++) { + tagStore[i].chain_ptr = i+1; + //setup data ptrs to subblocks + tagStore[i].data_ptr.resize(numSub); + tagStore[i].size = blkSize; + tagStore[i].trivialData = new uint8_t[trivialSize]; + tagStore[i].numData = 0; + tagStore[i].set = 0; + tagStore[i].data = &dataStore[data_index]; + data_index += blkSize; + } + freelist = primaryBound; +} + +IIC::~IIC() +{ + delete [] dataReferenceCount; + delete [] dataStore; + delete [] tagStore; + delete [] sets; +} + +/* register cache stats */ +void +IIC::regStats(const string &name) +{ + using namespace Stats; + + BaseTags::regStats(name); + + hitHashDepth.init(0, 20, 1); + missHashDepth.init(0, 20, 1); + setAccess.init(0, hashSets, 1); + + /** IIC Statistics */ + hitHashDepth + .name(name + ".hit_hash_depth_dist") + .desc("Dist. of Hash lookup depths") + .flags(pdf) + ; + + missHashDepth + .name(name + ".miss_hash_depth_dist") + .desc("Dist. of Hash lookup depths") + .flags(pdf) + ; + + repl->regStats(name); + + if (PROFILE_IIC) + setAccess + .name(name + ".set_access_dist") + .desc("Dist. of Accesses across sets") + .flags(pdf) + ; + + missDepthTotal + .name(name + ".miss_depth_total") + .desc("Total of miss depths") + ; + + hashMiss + .name(name + ".hash_miss") + .desc("Total of misses in hash table") + ; + + hitDepthTotal + .name(name + ".hit_depth_total") + .desc("Total of hit depths") + ; + + hashHit + .name(name + ".hash_hit") + .desc("Total of hites in hash table") + ; +} + +// probe cache for presence of given block. +bool +IIC::probe(int asid, Addr addr) const +{ + return (findBlock(addr,asid) != NULL); +} + +IICTag* +IIC::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = hash(addr); + int set_lat; + + unsigned long chain_ptr; + + if (PROFILE_IIC) + setAccess.sample(set); + + IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + set_lat = 1; + if (tag_ptr == NULL && chain_ptr != tagNull) { + int secondary_depth; + tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth); + set_lat += secondary_depth; + // set depth for statistics fix this later!!! egh + sets[set].depth = set_lat; + + if (tag_ptr != NULL) { + /* need to move tag into primary table */ + // need to preserve chain: fix this egh + sets[set].tags[assoc-1]->chain_ptr = tag_ptr->chain_ptr; + tagSwap(tag_ptr - tagStore, sets[set].tags[assoc-1] - tagStore); + tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + assert(tag_ptr!=NULL); + } + + } + set_lat = set_lat * hashDelay + hitLatency; + if (tag_ptr != NULL) { + // IIC replacement: if this is not the first element of + // list, reorder + sets[set].moveToHead(tag_ptr); + + hitHashDepth.sample(sets[set].depth); + hashHit++; + hitDepthTotal += sets[set].depth; + tag_ptr->status |= BlkReferenced; + lat = set_lat; + if (tag_ptr->whenReady > curTick && tag_ptr->whenReady - curTick > set_lat) { + lat = tag_ptr->whenReady - curTick; + } + + tag_ptr->refCount += 1; + } + else { + // fall through: cache block not found, not a hit... + missHashDepth.sample(sets[set].depth); + hashMiss++; + missDepthTotal += sets[set].depth; + lat = set_lat; + } + return tag_ptr; +} + +IICTag* +IIC::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); + + Addr tag = extractTag(addr); + unsigned set = hash(addr); + int set_lat; + + unsigned long chain_ptr; + + if (PROFILE_IIC) + setAccess.sample(set); + + IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + set_lat = 1; + if (tag_ptr == NULL && chain_ptr != tagNull) { + int secondary_depth; + tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth); + set_lat += secondary_depth; + // set depth for statistics fix this later!!! egh + sets[set].depth = set_lat; + + if (tag_ptr != NULL) { + /* need to move tag into primary table */ + // need to preserve chain: fix this egh + sets[set].tags[assoc-1]->chain_ptr = tag_ptr->chain_ptr; + tagSwap(tag_ptr - tagStore, sets[set].tags[assoc-1] - tagStore); + tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + assert(tag_ptr!=NULL); + } + + } + set_lat = set_lat * hashDelay + hitLatency; + if (tag_ptr != NULL) { + // IIC replacement: if this is not the first element of + // list, reorder + sets[set].moveToHead(tag_ptr); + + hitHashDepth.sample(sets[set].depth); + hashHit++; + hitDepthTotal += sets[set].depth; + tag_ptr->status |= BlkReferenced; + lat = set_lat; + if (tag_ptr->whenReady > curTick && tag_ptr->whenReady - curTick > set_lat) { + lat = tag_ptr->whenReady - curTick; + } + + tag_ptr->refCount += 1; + } + else { + // fall through: cache block not found, not a hit... + missHashDepth.sample(sets[set].depth); + hashMiss++; + missDepthTotal += sets[set].depth; + lat = set_lat; + } + return tag_ptr; +} + +IICTag* +IIC::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = hash(addr); + + unsigned long chain_ptr; + + IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + if (tag_ptr == NULL && chain_ptr != tagNull) { + int secondary_depth; + tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth); + } + return tag_ptr; +} + + +IICTag* +IIC::findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks) +{ + DPRINTF(IIC, "Finding Replacement for %x\n", pkt->getAddr()); + unsigned set = hash(pkt->getAddr()); + IICTag *tag_ptr; + unsigned long *tmp_data = new unsigned long[numSub]; + + // Get a enough subblocks for a full cache line + for (int i = 0; i < numSub; ++i){ + tmp_data[i] = getFreeDataBlock(writebacks); + assert(dataReferenceCount[tmp_data[i]]==0); + } + + tag_ptr = getFreeTag(set, writebacks); + + tag_ptr->set = set; + for (int i=0; i< numSub; ++i) { + tag_ptr->data_ptr[i] = tmp_data[i]; + dataReferenceCount[tag_ptr->data_ptr[i]]++; + } + tag_ptr->numData = numSub; + assert(tag_ptr - tagStore < primaryBound); // make sure it is in primary + tag_ptr->chain_ptr = tagNull; + sets[set].moveToHead(tag_ptr); + delete [] tmp_data; + + list<unsigned long> tag_indexes; + repl->doAdvance(tag_indexes); + while (!tag_indexes.empty()) { + if (!tagStore[tag_indexes.front()].isCompressed()) { + compress_blocks.push_back(&tagStore[tag_indexes.front()]); + } + tag_indexes.pop_front(); + } + + tag_ptr->re = (void*)repl->add(tag_ptr-tagStore); + + return tag_ptr; +} + +void +IIC::freeReplacementBlock(PacketList & writebacks) +{ + IICTag *tag_ptr; + unsigned long data_ptr; + /* consult replacement policy */ + tag_ptr = &tagStore[repl->getRepl()]; + assert(tag_ptr->isValid()); + + DPRINTF(Cache, "Replacing %x in IIC: %s\n", + regenerateBlkAddr(tag_ptr->tag,0), + tag_ptr->isModified() ? "writeback" : "clean"); + /* write back replaced block data */ + if (tag_ptr && (tag_ptr->isValid())) { + replacements[0]++; + totalRefs += tag_ptr->refCount; + ++sampledRefs; + tag_ptr->refCount = 0; + + if (tag_ptr->isModified()) { +/* Packet * writeback = + buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0), + tag_ptr->req->asid, tag_ptr->xc, blkSize, + tag_ptr->data, + tag_ptr->size); +*/ + Request *writebackReq = new Request(regenerateBlkAddr(tag_ptr->tag, 0), + blkSize, 0); + Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); + writeback->allocate(); + memcpy(writeback->getPtr<uint8_t>(), tag_ptr->data, blkSize); + + writebacks.push_back(writeback); + } + } + + // free the data blocks + for (int i = 0; i < tag_ptr->numData; ++i) { + data_ptr = tag_ptr->data_ptr[i]; + assert(dataReferenceCount[data_ptr]>0); + if (--dataReferenceCount[data_ptr] == 0) { + freeDataBlock(data_ptr); + } + } + freeTag(tag_ptr); +} + +unsigned long +IIC::getFreeDataBlock(PacketList & writebacks) +{ + struct IICTag *tag_ptr; + unsigned long data_ptr; + + tag_ptr = NULL; + /* find data block */ + while (blkFreelist.empty()) { + freeReplacementBlock(writebacks); + } + + data_ptr = blkFreelist.front(); + blkFreelist.pop_front(); + DPRINTF(IICMore,"Found free data at %d\n",data_ptr); + return data_ptr; +} + + + +IICTag* +IIC::getFreeTag(int set, PacketList & writebacks) +{ + unsigned long tag_index; + IICTag *tag_ptr; + // Add new tag + tag_ptr = sets[set].findFree(); + // if no free in primary, and secondary exists + if (!tag_ptr && numSecondary) { + // need to spill a tag into secondary storage + while (freelist == tagNull) { + // get replacements until one is in secondary + freeReplacementBlock(writebacks); + } + + tag_index = freelist; + freelist = tagStore[freelist].chain_ptr; + freeSecond--; + + assert(tag_index != tagNull); + tagSwap(tag_index, sets[set].tags[assoc-1] - tagStore); + tagStore[tag_index].chain_ptr = sets[set].chain_ptr; + sets[set].chain_ptr = tag_index; + + tag_ptr = sets[set].tags[assoc-1]; + } + DPRINTF(IICMore,"Found free tag at %d\n",tag_ptr - tagStore); + tagsInUse++; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + + return tag_ptr; +} + +void +IIC::freeTag(IICTag *tag_ptr) +{ + unsigned long tag_index, tmp_index; + // Fix tag_ptr + if (tag_ptr) { + // we have a tag to clear + DPRINTF(IICMore,"Freeing Tag for %x\n", + regenerateBlkAddr(tag_ptr->tag,0)); + tagsInUse--; + tag_ptr->status = 0; + tag_ptr->numData = 0; + tag_ptr->re = NULL; + tag_index = tag_ptr - tagStore; + if (tag_index >= primaryBound) { + // tag_ptr points to secondary store + assert(tag_index < tagNull); // remove this?? egh + if (tag_ptr->chain_ptr == tagNull) { + // need to fix chain list + unsigned tmp_set = hash(tag_ptr->tag << tagShift); + if (sets[tmp_set].chain_ptr == tag_index) { + sets[tmp_set].chain_ptr = tagNull; + } else { + tmp_index = sets[tmp_set].chain_ptr; + while (tmp_index != tagNull + && tagStore[tmp_index].chain_ptr != tag_index) { + tmp_index = tagStore[tmp_index].chain_ptr; + } + assert(tmp_index != tagNull); + tagStore[tmp_index].chain_ptr = tagNull; + } + tag_ptr->chain_ptr = freelist; + freelist = tag_index; + freeSecond++; + } else { + // copy next chained entry to this tag location + tmp_index = tag_ptr->chain_ptr; + tagSwap(tmp_index, tag_index); + tagStore[tmp_index].chain_ptr = freelist; + freelist = tmp_index; + freeSecond++; + } + } else { + // tag_ptr in primary hash table + assert(tag_index < primaryBound); + tag_ptr->status = 0; + unsigned tmp_set = hash(tag_ptr->tag << tagShift); + if (sets[tmp_set].chain_ptr != tagNull) { // collapse chain + tmp_index = sets[tmp_set].chain_ptr; + tagSwap(tag_index, tmp_index); + tagStore[tmp_index].chain_ptr = freelist; + freelist = tmp_index; + freeSecond++; + sets[tmp_set].chain_ptr = tag_ptr->chain_ptr; + sets[tmp_set].moveToTail(tag_ptr); + } + } + } +} + +void +IIC::freeDataBlock(unsigned long data_ptr) +{ + assert(dataReferenceCount[data_ptr] == 0); + DPRINTF(IICMore, "Freeing data at %d\n", data_ptr); + blkFreelist.push_front(data_ptr); +} + +/** Use a simple modulo hash. */ +#define SIMPLE_HASH 0 + +unsigned +IIC::hash(Addr addr) const { +#if SIMPLE_HASH + return extractTag(addr) % iic_hash_size; +#else + Addr tag, mask, x, y; + tag = extractTag(addr); + mask = hashSets-1; /* assumes iic_hash_size is a power of 2 */ + x = tag & mask; + y = (tag >> (int)(::log(hashSets)/::log(2))) & mask; + assert (x < hashSets && y < hashSets); + return x ^ y; +#endif +} + + +void +IICSet::moveToHead(IICTag *tag) +{ + if (tags[0] == tag) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = 0; + IICTag *next = tag; + + do { + assert(i < assoc); + // swap blks[i] and next + IICTag *tmp = tags[i]; + tags[i] = next; + next = tmp; + ++i; + } while (next != tag); +} + +void +IICSet::moveToTail(IICTag *tag) +{ + if (tags[assoc-1] == tag) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = assoc - 1; + IICTag *next = tag; + + do { + assert(i >= 0); + // swap blks[i] and next + IICTag *tmp = tags[i]; + tags[i] = next; + next = tmp; + --i; + } while (next != tag); +} + +void +IIC::tagSwap(unsigned long index1, unsigned long index2) +{ + DPRINTF(IIC,"Swapping tag[%d]=%x for tag[%d]=%x\n",index1, + tagStore[index1].tag<<tagShift, index2, + tagStore[index2].tag<<tagShift); + IICTag tmp_tag; + tmp_tag = tagStore[index1]; + tagStore[index1] = tagStore[index2]; + tagStore[index2] = tmp_tag; + if (tagStore[index1].isValid()) + repl->fixTag(tagStore[index1].re, index2, index1); + if (tagStore[index2].isValid()) + repl->fixTag(tagStore[index2].re, index1, index2); +} + + +IICTag * +IIC::secondaryChain(int asid, Addr tag, unsigned long chain_ptr, + int *_depth) const +{ + int depth = 0; + while (chain_ptr != tagNull) { + DPRINTF(IIC,"Searching secondary at %d for %x\n", chain_ptr, + tag<<tagShift); + if (tagStore[chain_ptr].tag == tag && + tagStore[chain_ptr].asid == asid && + (tagStore[chain_ptr].isValid())) { + *_depth = depth; + return &tagStore[chain_ptr]; + } + depth++; + chain_ptr = tagStore[chain_ptr].chain_ptr; + } + *_depth = depth; + return NULL; +} + +void +IIC::decompressBlock(unsigned long index) +{ + IICTag *tag_ptr = &tagStore[index]; + if (tag_ptr->isCompressed()) { + // decompress the data here. + } +} + +void +IIC::compressBlock(unsigned long index) +{ + IICTag *tag_ptr = &tagStore[index]; + if (!tag_ptr->isCompressed()) { + // Compress the data here. + } +} + +void +IIC::invalidateBlk(int asid, Addr addr) +{ + IICTag* tag_ptr = findBlock(addr, asid); + if (tag_ptr) { + for (int i = 0; i < tag_ptr->numData; ++i) { + dataReferenceCount[tag_ptr->data_ptr[i]]--; + if (dataReferenceCount[tag_ptr->data_ptr[i]] == 0) { + freeDataBlock(tag_ptr->data_ptr[i]); + } + } + repl->removeEntry(tag_ptr->re); + freeTag(tag_ptr); + } +} + +void +IIC::readData(IICTag *blk, uint8_t *data){ +// assert(cache->doData()); + assert(blk->size <= trivialSize || blk->numData > 0); + int data_size = blk->size; + if (data_size > trivialSize) { + for (int i = 0; i < blk->numData; ++i){ + memcpy(data+i*subSize, + &(dataBlks[blk->data_ptr[i]][0]), + (data_size>subSize)?subSize:data_size); + data_size -= subSize; + } + } else { + memcpy(data,blk->trivialData,data_size); + } +} + +void +IIC::writeData(IICTag *blk, uint8_t *write_data, int size, + PacketList & writebacks){ +// assert(cache->doData()); + assert(size < blkSize || !blk->isCompressed()); + DPRINTF(IIC, "Writing %d bytes to %x\n", size, + blk->tag<<tagShift); + // Find the number of subblocks needed, (round up) + int num_subs = (size + (subSize -1))/subSize; + if (size <= trivialSize) { + num_subs = 0; + } + assert(num_subs <= numSub); + if (num_subs > blk->numData) { + // need to allocate more data blocks + for (int i = blk->numData; i < num_subs; ++i){ + blk->data_ptr[i] = getFreeDataBlock(writebacks); + dataReferenceCount[blk->data_ptr[i]] += 1; + } + } else if (num_subs < blk->numData){ + // can free data blocks + for (int i=num_subs; i < blk->numData; ++i){ + // decrement reference count and compare to zero + /** + * @todo + * Make this work with copying. + */ + if (--dataReferenceCount[blk->data_ptr[i]] == 0) { + freeDataBlock(blk->data_ptr[i]); + } + } + } + + blk->numData = num_subs; + blk->size = size; + assert(size <= trivialSize || blk->numData > 0); + if (size > trivialSize){ + for (int i = 0; i < blk->numData; ++i){ + memcpy(&dataBlks[blk->data_ptr[i]][0], write_data + i*subSize, + (size>subSize)?subSize:size); + size -= subSize; + } + } else { + memcpy(blk->trivialData,write_data,size); + } +} + + +/** + * @todo This code can break if the src is evicted to get a tag for the dest. + */ +void +IIC::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) +{ +//Copy unsuported now +#if 0 + IICTag *dest_tag = findBlock(dest, asid); + + if (dest_tag) { + for (int i = 0; i < dest_tag->numData; ++i) { + if (--dataReferenceCount[dest_tag->data_ptr[i]] == 0) { + freeDataBlock(dest_tag->data_ptr[i]); + } + } + // Reset replacement entry + } else { + dest_tag = getFreeTag(hash(dest), writebacks); + dest_tag->re = (void*) repl->add(dest_tag - tagStore); + dest_tag->set = hash(dest); + dest_tag->tag = extractTag(dest); + dest_tag->asid = asid; + dest_tag->status = BlkValid | BlkWritable; + } + // Find the source tag here since it might move if we need to find a + // tag for the destination. + IICTag *src_tag = findBlock(source, asid); + assert(src_tag); + assert(!cache->doData() || src_tag->size <= trivialSize + || src_tag->numData > 0); + // point dest to source data and inc counter + for (int i = 0; i < src_tag->numData; ++i) { + dest_tag->data_ptr[i] = src_tag->data_ptr[i]; + ++dataReferenceCount[dest_tag->data_ptr[i]]; + } + + // Maintain fast access data. + memcpy(dest_tag->data, src_tag->data, blkSize); + + dest_tag->xc = src_tag->xc; + dest_tag->size = src_tag->size; + dest_tag->numData = src_tag->numData; + if (src_tag->numData == 0) { + // Data is stored in the trivial data, just copy it. + memcpy(dest_tag->trivialData, src_tag->trivialData, src_tag->size); + } + + dest_tag->status |= BlkDirty; + if (dest_tag->size < blkSize) { + dest_tag->status |= BlkCompressed; + } else { + dest_tag->status &= ~BlkCompressed; + } +#endif +} + +void +IIC::fixCopy(Packet * &pkt, PacketList &writebacks) +{ +#if 0 + // if reference counter is greater than 1, do copy + // else do write + Addr blk_addr = blkAlign(pkt->getAddr); + IICTag* blk = findBlock(blk_addr, pkt->req->getAsid()); + + if (blk->numData > 0 && dataReferenceCount[blk->data_ptr[0]] != 1) { + // copy the data + // Mark the block as referenced so it doesn't get replaced. + blk->status |= BlkReferenced; + for (int i = 0; i < blk->numData; ++i){ + unsigned long new_data = getFreeDataBlock(writebacks); + // Need to refresh pointer + /** + * @todo Remove this refetch once we change IIC to pointer based + */ + blk = findBlock(blk_addr, pkt->req->getAsid()); + assert(blk); + if (cache->doData()) { + memcpy(&(dataBlks[new_data][0]), + &(dataBlks[blk->data_ptr[i]][0]), + subSize); + } + dataReferenceCount[blk->data_ptr[i]]--; + dataReferenceCount[new_data]++; + blk->data_ptr[i] = new_data; + } + } +#endif +} + +void +IIC::cleanupRefs() +{ + for (int i = 0; i < numTags; ++i) { + if (tagStore[i].isValid()) { + totalRefs += tagStore[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh new file mode 100644 index 000000000..6628f7e7a --- /dev/null +++ b/src/mem/cache/tags/iic.hh @@ -0,0 +1,574 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of the Indirect Index Cache (IIC) tags store. + */ + +#ifndef __IIC_HH__ +#define __IIC_HH__ + +#include <list> +#include <vector> + +#include "mem/cache/cache_blk.hh" +#include "mem/cache/tags/repl/repl.hh" +#include "mem/packet.hh" +#include "base/statistics.hh" +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; // Forward declaration + +/** + * IIC cache blk. + */ +class IICTag : public CacheBlk +{ + public: + /** + * Copy the contents of the given IICTag into this one. + * @param rhs The tag to copy. + * @return const reference to this tag. + */ + const IICTag& operator=(const IICTag& rhs) + { + CacheBlk::operator=(rhs); + chain_ptr = rhs.chain_ptr; + re = rhs.re; + set = rhs.set; + trivialData = rhs.trivialData; + numData = rhs.numData; + data_ptr.clear(); + for (int i = 0; i < rhs.numData; ++i) { + data_ptr.push_back(rhs.data_ptr[i]); + } + return *this; + } + + /** Hash chain pointer into secondary store. */ + unsigned long chain_ptr; + /** Data array pointers for each subblock. */ + std::vector<unsigned long> data_ptr; + /** Replacement Entry pointer. */ + void *re; + /** + * An array to store small compressed data. Conceputally the same size + * as the unsused data array pointers. + */ + uint8_t *trivialData; + /** + * The number of allocated subblocks. + */ + int numData; +}; + +/** + * A hash set for the IIC primary lookup table. + */ +class IICSet{ + public: + /** The associativity of the primary table. */ + int assoc; + + /** The number of hash chains followed when finding the last block. */ + int depth; + /** The current number of blocks on the chain. */ + int size; + + /** Tag pointer into the secondary tag storage. */ + unsigned long chain_ptr; + + /** The LRU list of the primary table. MRU is at 0 index. */ + IICTag ** tags; + + /** + * Find the addr in this set, return the chain pointer to the secondary if + * it isn't found. + * @param asid The address space ID. + * @param tag The address to find. + * @param chain_ptr The chain pointer to start the search of the secondary + * @return Pointer to the tag, NULL if not found. + */ + IICTag* findTag(int asid, Addr tag, unsigned long &chain_ptr) + { + depth = 1; + for (int i = 0; i < assoc; ++i) { + if (tags[i]->tag == tag && tags[i]->isValid()) { + return tags[i]; + } + } + chain_ptr = this->chain_ptr; + return 0; + } + + /** + * Find an usused tag in this set. + * @return Pointer to the unused tag, NULL if none are free. + */ + IICTag* findFree() + { + for (int i = 0; i < assoc; ++i) { + if (!tags[i]->isValid()) { + return tags[i]; + } + } + return 0; + } + + /** + * Move a tag to the head of the LRU list + * @param tag The tag to move. + */ + void moveToHead(IICTag *tag); + + /** + * Move a tag to the tail (LRU) of the LRU list + * @param tag The tag to move. + */ + void moveToTail(IICTag *tag); +}; + +/** + * The IIC tag store. This is a hardware-realizable, fully-associative tag + * store that uses software replacement, e.g. Gen. + */ +class IIC : public BaseTags +{ + public: + /** Typedef of the block type used in this class. */ + typedef IICTag BlkType; + /** Typedef for list of pointers to the local block type. */ + typedef std::list<IICTag*> BlkList; + protected: + /** The number of set in the primary table. */ + const int hashSets; + /** The block size in bytes. */ + const int blkSize; + /** The associativity of the primary table. */ + const int assoc; + /** The base hit latency. */ + const int hitLatency; + /** The subblock size, used for compression. */ + const int subSize; + + /** The number of subblocks */ + const int numSub; + /** The number of bytes used by data pointers */ + const int trivialSize; + + /** The amount to shift address to get the tag. */ + const int tagShift; + /** The mask to get block offset bits. */ + const unsigned blkMask; + + /** The amount to shift to get the subblock number. */ + const int subShift; + /** The mask to get the correct subblock number. */ + const unsigned subMask; + + /** The latency of a hash lookup. */ + const int hashDelay; + /** The number of data blocks. */ + const int numBlocks; + /** The total number of tags in primary and secondary. */ + const int numTags; + /** The number of tags in the secondary tag store. */ + const int numSecondary; + + /** The Null tag pointer. */ + const int tagNull; + /** The last tag in the primary table. */ + const int primaryBound; + + /** All of the tags */ + IICTag *tagStore; + /** + * Pointer to the head of the secondary freelist (maintained with chain + * pointers. + */ + unsigned long freelist; + /** + * The data block freelist. + */ + std::list<unsigned long> blkFreelist; + + /** The primary table. */ + IICSet *sets; + + /** The replacement policy. */ + Repl *repl; + + /** An array of data reference counters. */ + int *dataReferenceCount; + + /** The data blocks. */ + uint8_t *dataStore; + + /** Storage for the fast access data of each cache block. */ + uint8_t **dataBlks; + + /** + * Count of the current number of free secondary tags. + * Used for debugging. + */ + int freeSecond; + + // IIC Statistics + /** + * @addtogroup IICStatistics IIC Statistics + * @{ + */ + + /** Hash hit depth of cache hits. */ + Stats::Distribution<> hitHashDepth; + /** Hash depth for cache misses. */ + Stats::Distribution<> missHashDepth; + /** Count of accesses to each hash set. */ + Stats::Distribution<> setAccess; + + /** The total hash depth for every miss. */ + Stats::Scalar<> missDepthTotal; + /** The total hash depth for all hits. */ + Stats::Scalar<> hitDepthTotal; + /** The number of hash misses. */ + Stats::Scalar<> hashMiss; + /** The number of hash hits. */ + Stats::Scalar<> hashHit; + /** @} */ + + public: + /** + * Collection of parameters for the IIC. + */ + class Params { + public: + /** The size in bytes of the cache. */ + int size; + /** The number of sets in the primary table. */ + int numSets; + /** The block size in bytes. */ + int blkSize; + /** The associativity of the primary table. */ + int assoc; + /** The number of cycles for each hash lookup. */ + int hashDelay; + /** The number of cycles to read the data. */ + int hitLatency; + /** The replacement policy. */ + Repl *rp; + /** The subblock size in bytes. */ + int subblockSize; + }; + + /** + * Construct and initialize this tag store. + * @param params The IIC parameters. + * @todo + * Should make a way to have less tags in the primary than blks in the + * cache. Also should be able to specify number of secondary blks. + */ + IIC(Params ¶ms); + + /** + * Destructor. + */ + virtual ~IIC(); + + /** + * Register the statistics. + * @param name The name to prepend to the statistic descriptions. + */ + void regStats(const std::string &name); + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set Not needed for the iic. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, int set) { + return (((Addr)tag << tagShift)); + } + + /** + * Return the block size. + * @return The block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. + * @return The subblock size. + */ + int getSubBlockSize() + { + return subSize; + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Generate the tag from the address. + * @param addr The address to a get a tag for. + * @param blk Ignored here. + * @return the tag. + */ + Addr extractTag(Addr addr, IICTag *blk) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the address. + * @param addr The address to a get a tag for. + * @return the tag. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Return the set, always 0 for IIC. + * @return 0. + */ + int extractSet(Addr addr) const + { + return 0; + } + + /** + * Get the block offset of an address. + * @param addr The address to get the offset of. + * @return the block offset of the address. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Check for the address in the tagstore. + * @param asid The address space ID. + * @param addr The address to find. + * @return true if it is found. + */ + bool probe(int asid, Addr addr) const; + + /** + * Swap the position of two tags. + * @param index1 The first tag location. + * @param index2 The second tag location. + */ + void tagSwap(unsigned long index1, unsigned long index2); + + /** + * Clear the reference bit of the tag and return its old value. + * @param index The pointer of the tag to manipulate. + * @return The previous state of the reference bit. + */ + bool clearRef(unsigned long index) + { + bool tmp = tagStore[index].isReferenced(); + tagStore[index].status &= ~BlkReferenced; + return tmp; + } + + /** + * Decompress a block if it is compressed. + * @param index The tag store index for the block to uncompress. + */ + void decompressBlock(unsigned long index); + + /** + * Try and compress a block if it is not already compressed. + * @param index The tag store index for the block to compress. + */ + void compressBlock(unsigned long index); + + /** + * Invalidate the block containing the address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Find the block and update the replacement data. This call also returns + * the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return A pointer to the block found, if any. + */ + IICTag* findBlock(Addr addr, int asid, int &lat); + + /** + * Find the block and update the replacement data. This call also returns + * the access latency as a side effect. + * @param req The req whose block to find + * @param lat The access latency. + * @return A pointer to the block found, if any. + */ + IICTag* findBlock(Packet * &pkt, int &lat); + + /** + * Find the block, do not update the replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return A pointer to the block found, if any. + */ + IICTag* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + IICTag* findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks); + + /** + * Read the data from the internal storage of the given cache block. + * @param blk The block to read the data from. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(IICTag *blk, uint8_t *data); + + /** + * Write the data into the internal storage of the given cache block. + * @param blk The block to write to. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(IICTag *blk, uint8_t *data, int size, + PacketList & writebacks); + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); + + /** + * If a block is currently marked copy on write, copy it before writing. + * @param req The write request. + * @param writebacks List for any generated writeback requests. + */ + void fixCopy(Packet * &pkt, PacketList &writebacks); + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +private: + /** + * Return the hash of the address. + * @param addr The address to hash. + * @return the hash of the address. + */ + unsigned hash(Addr addr) const; + + /** + * Search for a block in the secondary tag store. Returns the number of + * hash lookups as a side effect. + * @param asid The address space ID. + * @param tag The tag to match. + * @param chain_ptr The first entry to search. + * @param depth The number of hash lookups made while searching. + * @return A pointer to the block if found. + */ + IICTag *secondaryChain(int asid, Addr tag, unsigned long chain_ptr, + int *depth) const; + + /** + * Free the resources associated with the next replacement block. + * @param writebacks A list of any writebacks to perform. + */ + void freeReplacementBlock(PacketList & writebacks); + + /** + * Return the pointer to a free data block. + * @param writebacks A list of any writebacks to perform. + * @return A pointer to a free data block. + */ + unsigned long getFreeDataBlock(PacketList & writebacks); + + /** + * Get a free tag in the given hash set. + * @param set The hash set to search. + * @param writebacks A list of any writebacks to perform. + * @return a pointer to a free tag. + */ + IICTag* getFreeTag(int set, PacketList & writebacks); + + /** + * Free the resources associated with the given tag. + * @param tag_ptr The tag to free. + */ + void freeTag(IICTag *tag_ptr); + + /** + * Mark the given data block as being available. + * @param data_ptr The data block to free. + */ + void freeDataBlock(unsigned long data_ptr); +}; +#endif // __IIC_HH__ + diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc new file mode 100644 index 000000000..556025a3a --- /dev/null +++ b/src/mem/cache/tags/lru.cc @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions of LRU tag store. + */ + +#include <string> + +#include "mem/cache/base_cache.hh" +#include "base/intmath.hh" +#include "mem/cache/tags/lru.hh" +#include "sim/root.hh" + +using namespace std; + +LRUBlk* +CacheSet::findBlk(int asid, Addr tag) const +{ + for (int i = 0; i < assoc; ++i) { + if (blks[i]->tag == tag && blks[i]->isValid()) { + return blks[i]; + } + } + return 0; +} + + +void +CacheSet::moveToHead(LRUBlk *blk) +{ + // nothing to do if blk is already head + if (blks[0] == blk) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = 0; + LRUBlk *next = blk; + + do { + assert(i < assoc); + // swap blks[i] and next + LRUBlk *tmp = blks[i]; + blks[i] = next; + next = tmp; + ++i; + } while (next != blk); +} + + +// create and initialize a LRU/MRU cache structure +LRU::LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency) : + numSets(_numSets), blkSize(_blkSize), assoc(_assoc), hitLatency(_hit_latency) +{ + // Check parameters + if (blkSize < 4 || !isPowerOf2(blkSize)) { + fatal("Block size must be at least 4 and a power of 2"); + } + if (numSets <= 0 || !isPowerOf2(numSets)) { + fatal("# of sets must be non-zero and a power of 2"); + } + if (assoc <= 0) { + fatal("associativity must be greater than zero"); + } + if (hitLatency <= 0) { + fatal("access latency must be greater than zero"); + } + + LRUBlk *blk; + int i, j, blkIndex; + + blkMask = blkSize - 1; + setShift = floorLog2(blkSize); + setMask = numSets - 1; + tagShift = setShift + floorLog2(numSets); + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = numSets * assoc; + + sets = new CacheSet[numSets]; + blks = new LRUBlk[numSets * assoc]; + // allocate data storage in one big chunk + dataBlks = new uint8_t[numSets*assoc*blkSize]; + + blkIndex = 0; // index into blks array + for (i = 0; i < numSets; ++i) { + sets[i].assoc = assoc; + + sets[i].blks = new LRUBlk*[assoc]; + + // link in the data blocks + for (j = 0; j < assoc; ++j) { + // locate next cache block + blk = &blks[blkIndex]; + blk->data = &dataBlks[blkSize*blkIndex]; + ++blkIndex; + + // invalidate new cache block + blk->status = 0; + + //EGH Fix Me : do we need to initialize blk? + + // Setting the tag to j is just to prevent long chains in the hash + // table; won't matter because the block is invalid + blk->tag = j; + blk->whenReady = 0; + blk->asid = -1; + blk->isTouched = false; + blk->size = blkSize; + sets[i].blks[j]=blk; + blk->set = i; + } + } +} + +LRU::~LRU() +{ + delete [] dataBlks; + delete [] blks; + delete [] sets; +} + +// probe cache for presence of given block. +bool +LRU::probe(int asid, Addr addr) const +{ + // return(findBlock(Read, addr, asid) != 0); + Addr tag = extractTag(addr); + unsigned myset = extractSet(addr); + + LRUBlk *blk = sets[myset].findBlk(asid, tag); + + return (blk != NULL); // true if in cache +} + +LRUBlk* +LRU::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + LRUBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick + && blk->whenReady - curTick > hitLatency) { + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + } + + return blk; +} + +LRUBlk* +LRU::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->getAddr(); + int asid = 0;//pkt->req->getAsid(); + + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + LRUBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick + && blk->whenReady - curTick > hitLatency) { + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + } + + return blk; +} + +LRUBlk* +LRU::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + LRUBlk *blk = sets[set].findBlk(asid, tag); + return blk; +} + +LRUBlk* +LRU::findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks) +{ + unsigned set = extractSet(pkt->getAddr()); + // grab a replacement candidate + LRUBlk *blk = sets[set].blks[assoc-1]; + sets[set].moveToHead(blk); + if (blk->isValid()) { + replacements[0]++; + totalRefs += blk->refCount; + ++sampledRefs; + blk->refCount = 0; + } else if (!blk->isTouched) { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + + return blk; +} + +void +LRU::invalidateBlk(int asid, Addr addr) +{ + LRUBlk *blk = findBlock(addr, asid); + if (blk) { + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + } +} + +void +LRU::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) +{ + assert(source == blkAlign(source)); + assert(dest == blkAlign(dest)); + LRUBlk *source_blk = findBlock(source, asid); + assert(source_blk); + LRUBlk *dest_blk = findBlock(dest, asid); + if (dest_blk == NULL) { + // Need to do a replacement + Request *search = new Request(dest,1,0); + Packet * pkt = new Packet(search, Packet::ReadReq, -1); + BlkList dummy_list; + dest_blk = findReplacement(pkt, writebacks, dummy_list); + if (dest_blk->isValid() && dest_blk->isModified()) { + // Need to writeback data. +/* pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + dest_blk->req->asid, + dest_blk->xc, + blkSize, + dest_blk->data, + dest_blk->size); +*/ + Request *writebackReq = new Request(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + blkSize, 0); + Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); + writeback->allocate(); + memcpy(writeback->getPtr<uint8_t>(),dest_blk->data, blkSize); + writebacks.push_back(writeback); + } + dest_blk->tag = extractTag(dest); + dest_blk->asid = asid; + delete search; + delete pkt; + } + /** + * @todo Can't assume the status once we have coherence on copies. + */ + + // Set this block as readable, writeable, and dirty. + dest_blk->status = 7; + memcpy(dest_blk->data, source_blk->data, blkSize); +} + +void +LRU::cleanupRefs() +{ + for (int i = 0; i < numSets*assoc; ++i) { + if (blks[i].isValid()) { + totalRefs += blks[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh new file mode 100644 index 000000000..437244660 --- /dev/null +++ b/src/mem/cache/tags/lru.hh @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a LRU tag store. + */ + +#ifndef __LRU_HH__ +#define __LRU_HH__ + +#include <list> + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/packet.hh" // for inlined functions +#include <assert.h> +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; + +/** + * LRU cache block. + */ +class LRUBlk : public CacheBlk { + public: + /** Has this block been touched? Used to aid calculation of warmup time. */ + bool isTouched; +}; + +/** + * An associative set of cache blocks. + */ +class CacheSet +{ + public: + /** The associativity of this set. */ + int assoc; + + /** Cache blocks in this set, maintained in LRU order 0 = MRU. */ + LRUBlk **blks; + + /** + * Find a block matching the tag in this set. + * @param asid The address space ID. + * @param tag The Tag to find. + * @return Pointer to the block if found. + */ + LRUBlk* findBlk(int asid, Addr tag) const; + + /** + * Move the given block to the head of the list. + * @param blk The block to move. + */ + void moveToHead(LRUBlk *blk); +}; + +/** + * A LRU cache tag store. + */ +class LRU : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef LRUBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list<LRUBlk*> BlkList; + protected: + /** The number of sets in the cache. */ + const int numSets; + /** The number of bytes in a block. */ + const int blkSize; + /** The associativity of the cache. */ + const int assoc; + /** The hit latency. */ + const int hitLatency; + + /** The cache sets. */ + CacheSet *sets; + + /** The cache blocks. */ + LRUBlk *blks; + /** The data blocks, 1 per cache block. */ + uint8_t *dataBlks; + + /** The amount to shift the address to get the set. */ + int setShift; + /** The amount to shift the address to get the tag. */ + int tagShift; + /** Mask out all bits that aren't part of the set index. */ + unsigned setMask; + /** Mask out all bits that aren't part of the block offset. */ + unsigned blkMask; + +public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency); + + /** + * Destructor + */ + virtual ~LRU(); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of LRU it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The request whose block to find. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + LRUBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + LRUBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + LRUBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + LRUBlk* findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks); + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @return The tag of the address. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk Ignored. + * @return The tag of the address. + */ + Addr extractTag(Addr addr, LRUBlk *blk) const + { + return (addr >> tagShift); + } + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + return ((addr >> setShift) & setMask); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, unsigned set) const + { + return ((tag << tagShift) | ((Addr)set << setShift)); + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(LRUBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * LRU does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(LRUBlk *blk, uint8_t *data, int size, + PacketList & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif diff --git a/src/mem/cache/tags/repl/gen.cc b/src/mem/cache/tags/repl/gen.cc new file mode 100644 index 000000000..ec1c2aaf3 --- /dev/null +++ b/src/mem/cache/tags/repl/gen.cc @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + */ + +/** + * @file + * Definitions of the Generational replacement policy. + */ + +#include <string> + +#include "base/misc.hh" +#include "mem/cache/tags/iic.hh" +#include "mem/cache/tags/repl/gen.hh" +#include "sim/builder.hh" +#include "sim/host.hh" + +using namespace std; + +GenRepl::GenRepl(const string &_name, + int _num_pools, + int _fresh_res, + int _pool_res) // fix this, should be set by cache + : Repl(_name) +{ + num_pools = _num_pools; + fresh_res = _fresh_res; + pool_res = _pool_res; + num_entries = 0; + num_pool_entries = 0; + misses = 0; + pools = new GenPool[num_pools+1]; +} + +GenRepl::~GenRepl() +{ + delete [] pools; +} + +unsigned long +GenRepl::getRepl() +{ + unsigned long tmp; + GenReplEntry *re; + int i; + int num_seen = 0; + if (!(num_pool_entries>0)) { + fatal("No blks available to replace"); + } + num_entries--; + num_pool_entries--; + for (i = 0; i < num_pools; i++) { + while ((re = pools[i].pop())) { + num_seen++; + // Remove invalidated entries + if (!re->valid) { + delete re; + continue; + } + if (iic->clearRef(re->tag_ptr)) { + pools[(((i+1)== num_pools)? i :i+1)].push(re, misses); + } + else { + tmp = re->tag_ptr; + delete re; + + repl_pool.sample(i); + + return tmp; + } + } + } + fatal("No replacement found"); + return 0xffffffff; +} + +unsigned long * +GenRepl::getNRepl(int n) +{ + unsigned long *tmp; + GenReplEntry *re; + int i; + if (!(num_pool_entries>(n-1))) { + fatal("Not enough blks available to replace"); + } + num_entries -= n; + num_pool_entries -= n; + tmp = new unsigned long[n]; /* array of cache_blk pointers */ + int blk_index = 0; + for (i = 0; i < num_pools && blk_index < n; i++) { + while (blk_index < n && (re = pools[i].pop())) { + // Remove invalidated entries + if (!re->valid) { + delete re; + continue; + } + if (iic->clearRef(re->tag_ptr)) { + pools[(((i+1)== num_pools)? i :i+1)].push(re, misses); + } + else { + tmp[blk_index] = re->tag_ptr; + blk_index++; + delete re; + repl_pool.sample(i); + } + } + } + if (blk_index >= n) + return tmp; + /* search the fresh pool */ + + fatal("No N replacements found"); + return NULL; +} + +void +GenRepl::doAdvance(std::list<unsigned long> &demoted) +{ + int i; + int num_seen = 0; + GenReplEntry *re; + misses++; + for (i=0; i<num_pools; i++) { + while (misses-pools[i].oldest > pool_res && (re = pools[i].pop())!=NULL) { + if (iic->clearRef(re->tag_ptr)) { + pools[(((i+1)== num_pools)? i :i+1)].push(re, misses); + /** @todo Not really demoted, but use it for now. */ + demoted.push_back(re->tag_ptr); + advance_pool.sample(i); + } + else { + pools[(((i-1)<0)?i:i-1)].push(re, misses); + demoted.push_back(re->tag_ptr); + demote_pool.sample(i); + } + } + num_seen += pools[i].size; + } + while (misses-pools[num_pools].oldest > fresh_res + && (re = pools[num_pools].pop())!=NULL) { + num_pool_entries++; + if (iic->clearRef(re->tag_ptr)) { + pools[num_pools/2].push(re, misses); + /** @todo Not really demoted, but use it for now. */ + demoted.push_back(re->tag_ptr); + advance_pool.sample(num_pools); + } + else { + pools[num_pools/2-1].push(re, misses); + demoted.push_back(re->tag_ptr); + demote_pool.sample(num_pools); + } + } +} + +void* +GenRepl::add(unsigned long tag_index) +{ + GenReplEntry *re = new GenReplEntry; + re->tag_ptr = tag_index; + re->valid = true; + pools[num_pools].push(re, misses); + num_entries++; + return (void*)re; +} + +void +GenRepl::regStats(const string name) +{ + using namespace Stats; + + /** GEN statistics */ + repl_pool + .init(0, 16, 1) + .name(name + ".repl_pool_dist") + .desc("Dist. of Repl. across pools") + .flags(pdf) + ; + + advance_pool + .init(0, 16, 1) + .name(name + ".advance_pool_dist") + .desc("Dist. of Repl. across pools") + .flags(pdf) + ; + + demote_pool + .init(0, 16, 1) + .name(name + ".demote_pool_dist") + .desc("Dist. of Repl. across pools") + .flags(pdf) + ; +} + +int +GenRepl::fixTag(void* _re, unsigned long old_index, unsigned long new_index) +{ + GenReplEntry *re = (GenReplEntry*)_re; + assert(re->valid); + if (re->tag_ptr == old_index) { + re->tag_ptr = new_index; + return 1; + } + fatal("Repl entry: tag ptrs do not match"); + return 0; +} + +bool +GenRepl::findTagPtr(unsigned long index) +{ + for (int i = 0; i < num_pools + 1; ++i) { + list<GenReplEntry*>::const_iterator iter = pools[i].entries.begin(); + list<GenReplEntry*>::const_iterator end = pools[i].entries.end(); + for (; iter != end; ++iter) { + if ((*iter)->valid && (*iter)->tag_ptr == index) { + return true; + } + } + } + return false; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(GenRepl) + + Param<int> num_pools; + Param<int> fresh_res; + Param<int> pool_res; + +END_DECLARE_SIM_OBJECT_PARAMS(GenRepl) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(GenRepl) + + INIT_PARAM(num_pools, "capacity in bytes"), + INIT_PARAM(fresh_res, "associativity"), + INIT_PARAM(pool_res, "block size in bytes") + +END_INIT_SIM_OBJECT_PARAMS(GenRepl) + + +CREATE_SIM_OBJECT(GenRepl) +{ + return new GenRepl(getInstanceName(), num_pools, fresh_res, pool_res); +} + +REGISTER_SIM_OBJECT("GenRepl", GenRepl) + +#endif // DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/tags/repl/gen.hh b/src/mem/cache/tags/repl/gen.hh new file mode 100644 index 000000000..c1ceb3f4e --- /dev/null +++ b/src/mem/cache/tags/repl/gen.hh @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declarations of generational replacement policy + */ + +#ifndef ___GEN_HH__ +#define __GEN_HH__ + +#include <list> + +#include "base/statistics.hh" +#include "mem/cache/tags/repl/repl.hh" + +/** + * Generational Replacement entry. + */ +class GenReplEntry +{ + public: + /** Valid flag, used to quickly invalidate bogus entries. */ + bool valid; + /** The difference between this entry and the previous in the pool. */ + int delta; + /** Pointer to the corresponding tag in the IIC. */ + unsigned long tag_ptr; +}; + +/** + * Generational replacement pool + */ +class GenPool +{ + public: + /** The time the last entry was added. */ + Tick newest; + /** The time the oldest entry was added. */ + Tick oldest; + /** List of the replacement entries in this pool. */ + std::list<GenReplEntry*> entries; + + /** The number of entries in this pool. */ + int size; + + /** + * Simple constructor. + */ + GenPool() { + newest = 0; + oldest = 0; + size = 0; + } + + /** + * Add an entry to this pool. + * @param re The entry to add. + * @param now The current time. + */ + void push(GenReplEntry *re, Tick now) { + ++size; + if (!entries.empty()) { + re->delta = now - newest; + newest = now; + } else { + re->delta = 0; + newest = oldest = now; + } + entries.push_back(re); + } + + /** + * Remove an entry from the pool. + * @return The entry at the front of the list. + */ + GenReplEntry* pop() { + GenReplEntry *tmp = NULL; + if (!entries.empty()) { + --size; + tmp = entries.front(); + entries.pop_front(); + oldest += tmp->delta; + } + return tmp; + } + + /** + * Return the entry at the front of the list. + * @return the entry at the front of the list. + */ + GenReplEntry* top() { + return entries.front(); + } + + /** + * Destructor. + */ + ~GenPool() { + while (!entries.empty()) { + GenReplEntry *tmp = entries.front(); + entries.pop_front(); + delete tmp; + } + } +}; + +/** + * Generational replacement policy for use with the IIC. + * @todo update to use STL and for efficiency + */ +class GenRepl : public Repl +{ + public: + /** The array of pools. */ + GenPool *pools; + /** The number of pools. */ + int num_pools; + /** The amount of time to stay in the fresh pool. */ + int fresh_res; + /** The amount of time to stay in the normal pools. */ + int pool_res; + /** The maximum number of entries */ + int num_entries; + /** The number of entries currently in the pools. */ + int num_pool_entries; + /** The number of misses. Used as the internal time. */ + Tick misses; + + // Statistics + + /** + * @addtogroup CacheStatistics + * @{ + */ + /** The number of replacements from each pool. */ + Stats::Distribution<> repl_pool; + /** The number of advances out of each pool. */ + Stats::Distribution<> advance_pool; + /** The number of demotions from each pool. */ + Stats::Distribution<> demote_pool; + /** + * @} + */ + + /** + * Constructs and initializes this replacement policy. + * @param name The name of the policy. + * @param num_pools The number of pools to use. + * @param fresh_res The amount of time to wait in the fresh pool. + * @param pool_res The amount of time to wait in the normal pools. + */ + GenRepl(const std::string &name, int num_pools, + int fresh_res, int pool_res); + + /** + * Destructor. + */ + ~GenRepl(); + + /** + * Returns the tag pointer of the cache block to replace. + * @return The tag to replace. + */ + virtual unsigned long getRepl(); + + /** + * Return an array of N tag pointers to replace. + * @param n The number of tag pointer to return. + * @return An array of tag pointers to replace. + */ + virtual unsigned long *getNRepl(int n); + + /** + * Update replacement data + */ + virtual void doAdvance(std::list<unsigned long> &demoted); + + /** + * Add a tag to the replacement policy and return a pointer to the + * replacement entry. + * @param tag_index The tag to add. + * @return The replacement entry. + */ + virtual void* add(unsigned long tag_index); + + /** + * Register statistics. + * @param name The name to prepend to statistic descriptions. + */ + virtual void regStats(const std::string name); + + /** + * Update the tag pointer to when the tag moves. + * @param re The replacement entry of the tag. + * @param old_index The old tag pointer. + * @param new_index The new tag pointer. + * @return 1 if successful, 0 otherwise. + */ + virtual int fixTag(void *re, unsigned long old_index, + unsigned long new_index); + + /** + * Remove this entry from the replacement policy. + * @param re The replacement entry to remove + */ + virtual void removeEntry(void *re) + { + ((GenReplEntry*)re)->valid = false; + } + + protected: + /** + * Debug function to verify that there is only one repl entry per tag. + * @param index The tag index to check. + */ + bool findTagPtr(unsigned long index); +}; + +#endif /* __GEN_HH__ */ diff --git a/src/mem/cache/tags/repl/repl.cc b/src/mem/cache/tags/repl/repl.cc new file mode 100644 index 000000000..ce781eb9f --- /dev/null +++ b/src/mem/cache/tags/repl/repl.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Nathan Binkert + */ + +/** + * Definitions of the base replacement class. + */ + +#include "sim/param.hh" +#include "mem/cache/tags/repl/repl.hh" + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +DEFINE_SIM_OBJECT_CLASS_NAME("Repl", Repl) + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/tags/repl/repl.hh b/src/mem/cache/tags/repl/repl.hh new file mode 100644 index 000000000..7c289a5c1 --- /dev/null +++ b/src/mem/cache/tags/repl/repl.hh @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + * Nathan Binkert + */ + +/** + * @file + * Declaration of a base replacement policy class. + */ + +#ifndef __REPL_HH__ +#define __REPL_HH__ + +#include <string> +#include <list> + +#include "cpu/smt.hh" +#include "sim/host.hh" +#include "sim/sim_object.hh" + + +class IIC; + +/** + * A pure virtual base class that defines the interface of a replacement + * policy. + */ +class Repl : public SimObject +{ + public: + /** Pointer to the IIC using this policy. */ + IIC *iic; + + /** + * Construct and initialize this polixy. + * @param name The instance name of this policy. + */ + Repl (const std::string &name) + : SimObject(name) + { + iic = NULL; + } + + /** + * Set the back pointer to the IIC. + * @param iic_ptr Pointer to the IIC. + */ + void setIIC(IIC *iic_ptr) + { + iic = iic_ptr; + } + + /** + * Returns the tag pointer of the cache block to replace. + * @return The tag to replace. + */ + virtual unsigned long getRepl() = 0; + + /** + * Return an array of N tag pointers to replace. + * @param n The number of tag pointer to return. + * @return An array of tag pointers to replace. + */ + virtual unsigned long *getNRepl(int n) = 0; + + /** + * Update replacement data + */ + virtual void doAdvance(std::list<unsigned long> &demoted) = 0; + + /** + * Add a tag to the replacement policy and return a pointer to the + * replacement entry. + * @param tag_index The tag to add. + * @return The replacement entry. + */ + virtual void* add(unsigned long tag_index) = 0; + + /** + * Register statistics. + * @param name The name to prepend to statistic descriptions. + */ + virtual void regStats(const std::string name) = 0; + + /** + * Update the tag pointer to when the tag moves. + * @param re The replacement entry of the tag. + * @param old_index The old tag pointer. + * @param new_index The new tag pointer. + * @return 1 if successful, 0 otherwise. + */ + virtual int fixTag(void *re, unsigned long old_index, + unsigned long new_index) = 0; + + /** + * Remove this entry from the replacement policy. + * @param re The replacement entry to remove + */ + virtual void removeEntry(void *re) = 0; +}; + +#endif /* SMT_REPL_HH */ diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc new file mode 100644 index 000000000..bf23fb8cb --- /dev/null +++ b/src/mem/cache/tags/split.cc @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Definitions of split cache tag store. + */ + +#include <string> +#include <iostream> +#include <fstream> + +#include "base/cprintf.hh" +#include "base/intmath.hh" +#include "base/output.hh" +#include "base/trace.hh" +#include "mem/cache/base_cache.hh" +#include "mem/cache/tags/split.hh" +#include "mem/cache/tags/split_lifo.hh" +#include "mem/cache/tags/split_lru.hh" + + +using namespace std; +using namespace TheISA; + +// create and initialize a partitioned cache structure +Split::Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc, + bool _lifo, bool _two_queue, int _hit_latency) : + numSets(_numSets), blkSize(_blkSize), lifo(_lifo), hitLatency(_hit_latency) +{ + DPRINTF(Split, "new split cache!!\n"); + + DPRINTF(Split, "lru has %d numSets, %d blkSize, %d assoc, and %d hit_latency\n", + numSets, blkSize, LRU1_assoc, hitLatency); + + lru = new SplitLRU(_numSets, _blkSize, LRU1_assoc, _hit_latency, 1); + + if (total_ways - LRU1_assoc == 0) { + lifo_net = NULL; + lru_net = NULL; + } else { + if (lifo) { + DPRINTF(Split, "Other partition is a LIFO with size %d in bytes. it gets %d ways\n", + (total_ways - LRU1_assoc)*_numSets*_blkSize, (total_ways - LRU1_assoc)); + lifo_net = new SplitLIFO(_blkSize, (total_ways - LRU1_assoc)*_numSets*_blkSize, + (total_ways - LRU1_assoc), _hit_latency, _two_queue, 2); + lru_net = NULL; + } + else { + DPRINTF(Split, "other LRU gets %d ways\n", total_ways - LRU1_assoc); + lru_net = new SplitLRU(_numSets, _blkSize, total_ways - LRU1_assoc, _hit_latency, 2); + lifo_net = NULL; + } + } + + blkMask = blkSize - 1; + + if (!isPowerOf2(total_ways)) + warn("total cache ways/columns %d should be power of 2", + total_ways); + + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = numSets * total_ways; + +} + +Split::~Split() +{ + delete lru; + if (lifo) + delete lifo_net; + else + delete lru_net; +} + +void +Split::regStats(const string &name) +{ + using namespace Stats; + + BaseTags::regStats(name); + + usedEvictDist.init(0,3000,40); + unusedEvictDist.init(0,3000,40); + useByCPUCycleDist.init(0,35,1); + + nic_repl + .name(name + ".nic_repl") + .desc("number of replacements in the nic partition") + .precision(0) + ; + + cpu_repl + .name(name + ".cpu_repl") + .desc("number of replacements in the cpu partition") + .precision(0) + ; + + lru->regStats(name + ".lru"); + + if (lifo && lifo_net) { + lifo_net->regStats(name + ".lifo_net"); + } else if (lru_net) { + lru_net->regStats(name + ".lru_net"); + } + + nicUsedWhenEvicted + .name(name + ".nicUsedWhenEvicted") + .desc("number of NIC blks that were used before evicted") + ; + + nicUsedTotLatency + .name(name + ".nicUsedTotLatency") + .desc("total cycles before eviction of used NIC blks") + ; + + nicUsedTotEvicted + .name(name + ".nicUsedTotEvicted") + .desc("total number of used NIC blks evicted") + ; + + nicUsedAvgLatency + .name(name + ".nicUsedAvgLatency") + .desc("avg number of cycles a used NIC blk is in cache") + .precision(0) + ; + nicUsedAvgLatency = nicUsedTotLatency / nicUsedTotEvicted; + + usedEvictDist + .name(name + ".usedEvictDist") + .desc("distribution of used NIC blk eviction times") + .flags(pdf | cdf) + ; + + nicUnusedWhenEvicted + .name(name + ".nicUnusedWhenEvicted") + .desc("number of NIC blks that were unused when evicted") + ; + + nicUnusedTotLatency + .name(name + ".nicUnusedTotLatency") + .desc("total cycles before eviction of unused NIC blks") + ; + + nicUnusedTotEvicted + .name(name + ".nicUnusedTotEvicted") + .desc("total number of unused NIC blks evicted") + ; + + nicUnusedAvgLatency + .name(name + ".nicUnusedAvgLatency") + .desc("avg number of cycles an unused NIC blk is in cache") + .precision(0) + ; + nicUnusedAvgLatency = nicUnusedTotLatency / nicUnusedTotEvicted; + + unusedEvictDist + .name(name + ".unusedEvictDist") + .desc("distribution of unused NIC blk eviction times") + .flags(pdf | cdf) + ; + + nicUseByCPUCycleTotal + .name(name + ".nicUseByCPUCycleTotal") + .desc("total latency of NIC blks til usage time") + ; + + nicBlksUsedByCPU + .name(name + ".nicBlksUsedByCPU") + .desc("total number of NIC blks used") + ; + + nicAvgUsageByCPULatency + .name(name + ".nicAvgUsageByCPULatency") + .desc("average number of cycles before a NIC blk that is used gets used") + .precision(0) + ; + nicAvgUsageByCPULatency = nicUseByCPUCycleTotal / nicBlksUsedByCPU; + + useByCPUCycleDist + .name(name + ".useByCPUCycleDist") + .desc("the distribution of cycle time in cache before NIC blk is used") + .flags(pdf | cdf) + ; + + cpuUsedBlks + .name(name + ".cpuUsedBlks") + .desc("number of cpu blks that were used before evicted") + ; + + cpuUnusedBlks + .name(name + ".cpuUnusedBlks") + .desc("number of cpu blks that were unused before evicted") + ; + + nicAvgLatency + .name(name + ".nicAvgLatency") + .desc("avg number of cycles a NIC blk is in cache before evicted") + .precision(0) + ; + nicAvgLatency = (nicUnusedTotLatency + nicUsedTotLatency) / + (nicUnusedTotEvicted + nicUsedTotEvicted); + + NR_CP_hits + .name(name + ".NR_CP_hits") + .desc("NIC requests hitting in CPU Partition") + ; + + NR_NP_hits + .name(name + ".NR_NP_hits") + .desc("NIC requests hitting in NIC Partition") + ; + + CR_CP_hits + .name(name + ".CR_CP_hits") + .desc("CPU requests hitting in CPU partition") + ; + + CR_NP_hits + .name(name + ".CR_NP_hits") + .desc("CPU requests hitting in NIC partition") + ; + +} + +// probe cache for presence of given block. +bool +Split::probe(int asid, Addr addr) const +{ + bool success = lru->probe(asid, addr); + if (!success) { + if (lifo && lifo_net) + success = lifo_net->probe(asid, addr); + else if (lru_net) + success = lru_net->probe(asid, addr); + } + + return success; +} + +SplitBlk* +Split::findBlock(Packet * &pkt, int &lat) +{ + + Addr aligned = blkAlign(pkt->getAddr()); + + if (memHash.count(aligned)) { + memHash[aligned]++; + } else if (pkt->nic_pkt()) { + memHash[aligned] = 1; + } + + SplitBlk *blk = lru->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat); + if (blk) { + if (pkt->nic_pkt()) { + NR_CP_hits++; + } else { + CR_CP_hits++; + } + } else { + if (lifo && lifo_net) { + blk = lifo_net->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat); + + } else if (lru_net) { + blk = lru_net->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat); + } + if (blk) { + if (pkt->nic_pkt()) { + NR_NP_hits++; + } else { + CR_NP_hits++; + } + } + } + + if (blk) { + Tick latency = curTick - blk->ts; + if (blk->isNIC) { + if (!blk->isUsed && !pkt->nic_pkt()) { + useByCPUCycleDist.sample(latency); + nicUseByCPUCycleTotal += latency; + nicBlksUsedByCPU++; + } + } + blk->isUsed = true; + + if (pkt->nic_pkt()) { + DPRINTF(Split, "found block in partition %d\n", blk->part); + } + } + return blk; +} + +SplitBlk* +Split::findBlock(Addr addr, int asid, int &lat) +{ + SplitBlk *blk = lru->findBlock(addr, asid, lat); + if (!blk) { + if (lifo && lifo_net) { + blk = lifo_net->findBlock(addr, asid, lat); + } else if (lru_net) { + blk = lru_net->findBlock(addr, asid, lat); + } + } + + return blk; +} + +SplitBlk* +Split::findBlock(Addr addr, int asid) const +{ + SplitBlk *blk = lru->findBlock(addr, asid); + if (!blk) { + if (lifo && lifo_net) { + blk = lifo_net->findBlock(addr, asid); + } else if (lru_net) { + blk = lru_net->findBlock(addr, asid); + } + } + + return blk; +} + +SplitBlk* +Split::findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks) +{ + SplitBlk *blk; + + if (pkt->nic_pkt()) { + DPRINTF(Split, "finding a replacement for nic_req\n"); + nic_repl++; + if (lifo && lifo_net) + blk = lifo_net->findReplacement(pkt, writebacks, + compress_blocks); + else if (lru_net) + blk = lru_net->findReplacement(pkt, writebacks, + compress_blocks); + // in this case, this is an LRU only cache, it's non partitioned + else + blk = lru->findReplacement(pkt, writebacks, compress_blocks); + } else { + DPRINTF(Split, "finding replacement for cpu_req\n"); + blk = lru->findReplacement(pkt, writebacks, + compress_blocks); + cpu_repl++; + } + + Tick latency = curTick - blk->ts; + if (blk->isNIC) { + if (blk->isUsed) { + nicUsedWhenEvicted++; + usedEvictDist.sample(latency); + nicUsedTotLatency += latency; + nicUsedTotEvicted++; + } else { + nicUnusedWhenEvicted++; + unusedEvictDist.sample(latency); + nicUnusedTotLatency += latency; + nicUnusedTotEvicted++; + } + } else { + if (blk->isUsed) { + cpuUsedBlks++; + } else { + cpuUnusedBlks++; + } + } + + // blk attributes for the new blk coming IN + blk->ts = curTick; + blk->isNIC = (pkt->nic_pkt()) ? true : false; + + return blk; +} + +void +Split::invalidateBlk(int asid, Addr addr) +{ + SplitBlk *blk = lru->findBlock(addr, asid); + if (!blk) { + if (lifo && lifo_net) + blk = lifo_net->findBlock(addr, asid); + else if (lru_net) + blk = lru_net->findBlock(addr, asid); + + if (!blk) + return; + } + + blk->status = 0; + blk->isTouched = false; + tagsInUse--; +} + +void +Split::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) +{ + if (lru->probe(asid, source)) + lru->doCopy(source, dest, asid, writebacks); + else { + if (lifo && lifo_net) + lifo_net->doCopy(source, dest, asid, writebacks); + else if (lru_net) + lru_net->doCopy(source, dest, asid, writebacks); + } +} + +void +Split::cleanupRefs() +{ + lru->cleanupRefs(); + if (lifo && lifo_net) + lifo_net->cleanupRefs(); + else if (lru_net) + lru_net->cleanupRefs(); + + ofstream memPrint(simout.resolve("memory_footprint.txt").c_str(), + ios::trunc); + + // this shouldn't be here but it happens at the end, which is what i want + memIter end = memHash.end(); + for (memIter iter = memHash.begin(); iter != end; ++iter) { + ccprintf(memPrint, "%8x\t%d\n", (*iter).first, (*iter).second); + } +} + +Addr +Split::regenerateBlkAddr(Addr tag, int set) const +{ + if (lifo_net) + return lifo_net->regenerateBlkAddr(tag, set); + else + return lru->regenerateBlkAddr(tag, set); +} + +Addr +Split::extractTag(Addr addr, SplitBlk *blk) const +{ + if (blk->part == 2) { + if (lifo_net) + return lifo_net->extractTag(addr); + else if (lru_net) + return lru_net->extractTag(addr); + else + panic("this shouldn't happen"); + } else + return lru->extractTag(addr); +} + diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh new file mode 100644 index 000000000..5e0340269 --- /dev/null +++ b/src/mem/cache/tags/split.hh @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of a split/partitioned tag store. + */ + +#ifndef __SPLIT_HH__ +#define __SPLIT_HH__ + +#include <list> + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/cache/tags/split_blk.hh" +#include "mem/packet.hh" // for inlined functions +#include <assert.h> +#include "mem/cache/tags/base_tags.hh" +#include "base/hashmap.hh" + +class BaseCache; +class SplitLRU; +class SplitLIFO; + +/** + * A cache tag store. + */ +class Split : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef SplitBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list<SplitBlk*> BlkList; + protected: + /** The number of sets in the cache. */ + const int numSets; + /** The number of bytes in a block. */ + const int blkSize; + /** Whether the 2nd partition (for the nic) is LIFO or not */ + const bool lifo; + /** The hit latency. */ + const int hitLatency; + + Addr blkMask; + + /** Number of NIC requests that hit in the NIC partition */ + Stats::Scalar<> NR_NP_hits; + /** Number of NIC requests that hit in the CPU partition */ + Stats::Scalar<> NR_CP_hits; + /** Number of CPU requests that hit in the NIC partition */ + Stats::Scalar<> CR_NP_hits; + /** Number of CPU requests that hit in the CPU partition */ + Stats::Scalar<> CR_CP_hits; + /** The number of nic replacements (i.e. misses) */ + Stats::Scalar<> nic_repl; + /** The number of cpu replacements (i.e. misses) */ + Stats::Scalar<> cpu_repl; + + //For latency studies + /** the number of NIC blks that were used before evicted */ + Stats::Scalar<> nicUsedWhenEvicted; + /** the total latency of used NIC blocks in the cache */ + Stats::Scalar<> nicUsedTotLatency; + /** the total number of used NIC blocks evicted */ + Stats::Scalar<> nicUsedTotEvicted; + /** the average number of cycles a used NIC blk is in the cache */ + Stats::Formula nicUsedAvgLatency; + /** the Distribution of used NIC blk eviction times */ + Stats::Distribution<> usedEvictDist; + + /** the number of NIC blks that were unused before evicted */ + Stats::Scalar<> nicUnusedWhenEvicted; + /** the total latency of unused NIC blks in the cache */ + Stats::Scalar<> nicUnusedTotLatency; + /** the total number of unused NIC blocks evicted */ + Stats::Scalar<> nicUnusedTotEvicted; + /** the average number of cycles an unused NIC blk is in the cache */ + Stats::Formula nicUnusedAvgLatency; + /** the Distribution of unused NIC blk eviction times */ + Stats::Distribution<> unusedEvictDist; + + /** The total latency of NIC blocks to 1st usage time by CPU */ + Stats::Scalar<> nicUseByCPUCycleTotal; + /** The total number of NIC blocks used */ + Stats::Scalar<> nicBlksUsedByCPU; + /** the average number of cycles before a NIC blk that is used gets used by CPU */ + Stats::Formula nicAvgUsageByCPULatency; + /** the Distribution of cycles time before a NIC blk is used by CPU*/ + Stats::Distribution<> useByCPUCycleDist; + + /** the number of CPU blks that were used before evicted */ + Stats::Scalar<> cpuUsedBlks; + /** the number of CPU blks that were unused before evicted */ + Stats::Scalar<> cpuUnusedBlks; + + /** the avg number of cycles before a NIC blk is evicted */ + Stats::Formula nicAvgLatency; + + typedef m5::hash_map<Addr, int, m5::hash<Addr> > hash_t; + typedef hash_t::const_iterator memIter; + hash_t memHash; + + + private: + SplitLRU *lru; + SplitLRU *lru_net; + SplitLIFO *lifo_net; + + public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc, + bool _lifo, bool _two_queue, int _hit_latency); + + /** + * Destructor + */ + virtual ~Split(); + + /** + * Register the stats for this object + * @param name The name to prepend to the stats name. + */ + void regStats(const std::string &name); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of Split it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The memory request whose block to find + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks); + + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk The block to find the partition it's in + * @return The tag of the address. + */ + Addr extractTag(Addr addr, SplitBlk *blk) const; + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + panic("should never call this!\n"); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr) (blkMask)); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, int set) const; + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(SplitBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * Split does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(SplitBlk *blk, uint8_t *data, int size, + PacketList & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif diff --git a/src/mem/cache/tags/split_blk.hh b/src/mem/cache/tags/split_blk.hh new file mode 100644 index 000000000..f38516180 --- /dev/null +++ b/src/mem/cache/tags/split_blk.hh @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of partitioned tag store cache block class. + */ + +#ifndef __SPLIT_BLK_HH__ +#define __SPLIT_BLK_HH__ + +#include "mem/cache/cache_blk.hh" // base class + +/** + * Split cache block. + */ +class SplitBlk : public CacheBlk { + public: + /** Has this block been touched? Used to aid calculation of warmup time. */ + bool isTouched; + /** Has this block been used after being brought in? (for LIFO partition) */ + bool isUsed; + /** is this blk a NIC block? (i.e. requested by the NIC) */ + bool isNIC; + /** timestamp of the arrival of this block into the cache */ + Tick ts; + /** the previous block in the LIFO partition (brought in before than me) */ + SplitBlk *prev; + /** the next block in the LIFO partition (brought in later than me) */ + SplitBlk *next; + /** which partition this block is in */ + int part; + + SplitBlk() + : isTouched(false), isUsed(false), isNIC(false), ts(0), prev(NULL), next(NULL), + part(0) + {} +}; + +#endif + diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc new file mode 100644 index 000000000..f6493fdd2 --- /dev/null +++ b/src/mem/cache/tags/split_lifo.cc @@ -0,0 +1,407 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Definitions of LIFO tag store usable in a partitioned cache. + */ + +#include <string> + +#include "mem/cache/base_cache.hh" +#include "base/intmath.hh" +#include "mem/cache/tags/split_lifo.hh" +#include "sim/root.hh" +#include "base/trace.hh" + +using namespace std; + +SplitBlk* +LIFOSet::findBlk(int asid, Addr tag) const +{ + for (SplitBlk *blk = firstIn; blk != NULL; blk = blk->next) { + if (blk->tag == tag && blk->isValid()) { + return blk; + } + } + return NULL; +} + +void +LIFOSet::moveToLastIn(SplitBlk *blk) +{ + if (blk == lastIn) + return; + + if (blk == firstIn) { + blk->next->prev = NULL; + } else { + blk->prev->next = blk->next; + blk->next->prev = blk->prev; + } + blk->next = NULL; + blk->prev = lastIn; + lastIn->next = blk; + + lastIn = blk; +} + +void +LIFOSet::moveToFirstIn(SplitBlk *blk) +{ + if (blk == firstIn) + return; + + if (blk == lastIn) { + blk->prev->next = NULL; + } else { + blk->next->prev = blk->prev; + blk->prev->next = blk->next; + } + + blk->prev = NULL; + blk->next = firstIn; + firstIn->prev = blk; + + firstIn = blk; +} + +// create and initialize a LIFO cache structure +SplitLIFO::SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool two_Queue, int _part) : + blkSize(_blkSize), size(_size), numBlks(_size/_blkSize), numSets((_size/_ways)/_blkSize), ways(_ways), + hitLatency(_hit_latency), twoQueue(two_Queue), part(_part) +{ + if (!isPowerOf2(blkSize)) + fatal("cache block size (in bytes) must be a power of 2"); + if (!(hitLatency > 0)) + fatal("access latency in cycles must be at least on cycle"); + if (_ways == 0) + fatal("if instantiating a splitLIFO, needs non-zero size!"); + + + SplitBlk *blk; + int i, j, blkIndex; + + setShift = floorLog2(blkSize); + blkMask = blkSize - 1; + setMask = numSets - 1; + tagShift = setShift + floorLog2(numSets); + + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = size/blkSize; + + // allocate data blocks + blks = new SplitBlk[numBlks]; + sets = new LIFOSet[numSets]; + dataBlks = new uint8_t[size]; + +/* + // these start off point to same blk + top = &(blks[0]); + head = top; +*/ + + blkIndex = 0; + for (i=0; i < numSets; ++i) { + sets[i].ways = ways; + sets[i].lastIn = &blks[blkIndex]; + sets[i].firstIn = &blks[blkIndex + ways - 1]; + + /* 3 cases: if there is 1 way, if there are 2 ways, or if there are 3+. + in the case of 1 way, last in and first out point to the same blocks, + and the next and prev pointers need to be assigned specially. and so on + */ + /* deal with the first way */ + blk = &blks[blkIndex]; + blk->prev = &blks[blkIndex + 1]; + blk->next = NULL; + blk->data = &dataBlks[blkSize*blkIndex]; + blk->size = blkSize; + blk->part = part; + blk->set = i; + ++blkIndex; + + /* if there are "middle" ways, do them here */ + if (ways > 2) { + for (j=1; j < ways-1; ++j) { + blk = &blks[blkIndex]; + blk->data = &dataBlks[blkSize*blkIndex]; + blk->prev = &blks[blkIndex+1]; + blk->next = &blks[blkIndex-1]; + blk->data = &(dataBlks[blkSize*blkIndex]); + blk->size = blkSize; + blk->part = part; + blk->set = i; + ++blkIndex; + } + } + + /* do the final way here, depending on whether the final way is the only + way or not + */ + if (ways > 1) { + blk = &blks[blkIndex]; + blk->prev = NULL; + blk->next = &blks[blkIndex - 1]; + blk->data = &dataBlks[blkSize*blkIndex]; + blk->size = blkSize; + blk->part = part; + blk->set = i; + ++blkIndex; + } else { + blk->prev = NULL; + } + } + assert(blkIndex == numBlks); +} + +SplitLIFO::~SplitLIFO() +{ + delete [] blks; + delete [] sets; + delete [] dataBlks; +} + +void +SplitLIFO::regStats(const std::string &name) +{ + BaseTags::regStats(name); + + hits + .name(name + ".hits") + .desc("number of hits on this partition") + .precision(0) + ; + + misses + .name(name + ".misses") + .desc("number of misses in this partition") + .precision(0) + ; + + invalidations + .name(name + ".invalidations") + .desc("number of invalidations in this partition") + .precision(0) + ; +} + +// probe cache for presence of given block. +bool +SplitLIFO::probe(int asid, Addr addr) const +{ + Addr tag = extractTag(addr); + unsigned myset = extractSet(addr); + + SplitBlk* blk = sets[myset].findBlk(asid, tag); + return (blk != NULL); +} + +SplitBlk* +SplitLIFO::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + + lat = hitLatency; + + if (blk) { + DPRINTF(Split, "Found LIFO blk %#x in set %d, with tag %#x\n", + addr, set, tag); + hits++; + + if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency) + lat = blk->whenReady - curTick; + blk->refCount +=1; + + if (twoQueue) { + blk->isUsed = true; + sets[set].moveToFirstIn(blk); + } else { + sets[set].moveToLastIn(blk); + } + } + + return blk; +} + +SplitBlk* +SplitLIFO::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); + + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + + if (blk) { + DPRINTF(Split, "Found LIFO blk %#x in set %d, with tag %#x\n", + addr, set, tag); + hits++; + + if (twoQueue) { + blk->isUsed = true; + sets[set].moveToFirstIn(blk); + } else { + sets[set].moveToLastIn(blk); + } + } + lat = hitLatency; + + return blk; +} + +SplitBlk* +SplitLIFO::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + + return blk; +} + +SplitBlk* +SplitLIFO::findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks) +{ + unsigned set = extractSet(pkt->getAddr()); + + SplitBlk *firstIn = sets[set].firstIn; + SplitBlk *lastIn = sets[set].lastIn; + + SplitBlk *blk; + if (twoQueue && firstIn->isUsed) { + blk = firstIn; + blk->isUsed = false; + sets[set].moveToLastIn(blk); + } else { + int withValue = sets[set].withValue; + if (withValue == ways) { + blk = lastIn; + } else { + blk = &(sets[set].firstIn[ways - ++withValue]); + } + } + + DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n", + pkt->getAddr(), regenerateBlkAddr(blk->tag, set), blk->status); + if (blk->isValid()) { + replacements[0]++; + totalRefs += blk->refCount; + ++sampledRefs; + blk->refCount = 0; + } else { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + + misses++; + + return blk; +} + +void +SplitLIFO::invalidateBlk(int asid, Addr addr) +{ + SplitBlk *blk = findBlock(addr, asid); + if (blk) { + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + invalidations++; + } +} + +void +SplitLIFO::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) +{ +//Copy Unsuported for now +#if 0 + assert(source == blkAlign(source)); + assert(dest == blkAlign(dest)); + SplitBlk *source_blk = findBlock(source, asid); + assert(source_blk); + SplitBlk *dest_blk = findBlock(dest, asid); + if (dest_blk == NULL) { + // Need to do a replacement + Packet * pkt = new Packet(); + pkt->paddr = dest; + BlkList dummy_list; + dest_blk = findReplacement(pkt, writebacks, dummy_list); + if (dest_blk->isValid() && dest_blk->isModified()) { + // Need to writeback data. + pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + dest_blk->req->asid, + dest_blk->xc, + blkSize, + (cache->doData())?dest_blk->data:0, + dest_blk->size); + writebacks.push_back(pkt); + } + dest_blk->tag = extractTag(dest); + dest_blk->req->asid = asid; + /** + * @todo Do we need to pass in the execution context, or can we + * assume its the same? + */ + assert(source_blk->xc); + dest_blk->xc = source_blk->xc; + } + /** + * @todo Can't assume the status once we have coherence on copies. + */ + + // Set this block as readable, writeable, and dirty. + dest_blk->status = 7; + if (cache->doData()) { + memcpy(dest_blk->data, source_blk->data, blkSize); + } +#endif +} + +void +SplitLIFO::cleanupRefs() +{ + for (int i = 0; i < numBlks; ++i) { + if (blks[i].isValid()) { + totalRefs += blks[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh new file mode 100644 index 000000000..dfcaa0b67 --- /dev/null +++ b/src/mem/cache/tags/split_lifo.hh @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of a LIFO tag store usable in a partitioned cache. + */ + +#ifndef __SPLIT_LIFO_HH__ +#define __SPLIT_LIFO_HH__ + +#include <list> + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/cache/tags/split_blk.hh" +#include "mem/packet.hh" // for inlined functions +#include "base/hashmap.hh" +#include <assert.h> +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; + +/** + * A LIFO set of cache blks + */ +class LIFOSet { + public: + /** the number of blocks in this set */ + int ways; + + /** Cache blocks in this set, maintained in LIFO order where + 0 = Last in (head) */ + SplitBlk *lastIn; + SplitBlk *firstIn; + + /** has the initial "filling" of this set finished? i.e., have you had + * 'ways' number of compulsory misses in this set yet? if withValue == ways, + * then yes. withValue is meant to be the number of blocks in the set that have + * gone through their first compulsory miss. + */ + int withValue; + + /** + * Find a block matching the tag in this set. + * @param asid The address space ID. + * @param tag the Tag you are looking for + * @return Pointer to the block, if found, NULL otherwise + */ + SplitBlk* findBlk(int asid, Addr tag) const; + + void moveToLastIn(SplitBlk *blk); + void moveToFirstIn(SplitBlk *blk); + + LIFOSet() + : ways(-1), lastIn(NULL), firstIn(NULL), withValue(0) + {} +}; + +/** + * A LIFO cache tag store. + */ +class SplitLIFO : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef SplitBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list<SplitBlk*> BlkList; + protected: + /** The number of bytes in a block. */ + const int blkSize; + /** the size of the cache in bytes */ + const int size; + /** the number of blocks in the cache */ + const int numBlks; + /** the number of sets in the cache */ + const int numSets; + /** the number of ways in the cache */ + const int ways; + /** The hit latency. */ + const int hitLatency; + /** whether this is a "2 queue" replacement @sa moveToLastIn @sa moveToFirstIn */ + const bool twoQueue; + /** indicator for which partition this is */ + const int part; + + /** The cache blocks. */ + SplitBlk *blks; + /** The Cache sets */ + LIFOSet *sets; + /** The data blocks, 1 per cache block. */ + uint8_t *dataBlks; + + /** The amount to shift the address to get the set. */ + int setShift; + /** The amount to shift the address to get the tag. */ + int tagShift; + /** Mask out all bits that aren't part of the set index. */ + unsigned setMask; + /** Mask out all bits that aren't part of the block offset. */ + unsigned blkMask; + + + /** the number of hit in this partition */ + Stats::Scalar<> hits; + /** the number of blocks brought into this partition (i.e. misses) */ + Stats::Scalar<> misses; + /** the number of invalidations in this partition */ + Stats::Scalar<> invalidations; + +public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool twoQueue, int _part); + + /** + * Destructor + */ + virtual ~SplitLIFO(); + + /** + * Register the statistics for this object + * @param name The name to precede the stat + */ + void regStats(const std::string &name); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of LIFO it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The req whose block to find + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks); + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @return The tag of the address. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk Ignored + * @return The tag of the address. + */ + Addr extractTag(Addr addr, SplitBlk *blk) const + { + return (addr >> tagShift); + } + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + return ((addr >> setShift) & setMask); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, unsigned set) const + { + return ((tag << tagShift) | ((Addr)set << setShift)); + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(SplitBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * LIFO does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(SplitBlk *blk, uint8_t *data, int size, + PacketList & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc new file mode 100644 index 000000000..7fc742001 --- /dev/null +++ b/src/mem/cache/tags/split_lru.cc @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Definitions of LRU tag store for a partitioned cache. + */ + +#include <string> + +#include "mem/cache/base_cache.hh" +#include "base/intmath.hh" +#include "mem/cache/tags/split_lru.hh" +#include "sim/root.hh" + +using namespace std; + +SplitBlk* +SplitCacheSet::findBlk(int asid, Addr tag) const +{ + for (int i = 0; i < assoc; ++i) { + if (blks[i]->tag == tag && blks[i]->isValid()) { + return blks[i]; + } + } + return 0; +} + + +void +SplitCacheSet::moveToHead(SplitBlk *blk) +{ + // nothing to do if blk is already head + if (blks[0] == blk) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = 0; + SplitBlk *next = blk; + + do { + assert(i < assoc); + // swap blks[i] and next + SplitBlk *tmp = blks[i]; + blks[i] = next; + next = tmp; + ++i; + } while (next != blk); +} + + +// create and initialize a LRU/MRU cache structure +SplitLRU::SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part) : + numSets(_numSets), blkSize(_blkSize), assoc(_assoc), hitLatency(_hit_latency), part(_part) +{ + // Check parameters + if (blkSize < 4 || !isPowerOf2(blkSize)) { + fatal("Block size must be at least 4 and a power of 2"); + } + if (numSets <= 0 || !isPowerOf2(numSets)) { + fatal("# of sets must be non-zero and a power of 2"); + } + if (assoc <= 0) { + fatal("associativity must be greater than zero"); + } + if (hitLatency <= 0) { + fatal("access latency must be greater than zero"); + } + + SplitBlk *blk; + int i, j, blkIndex; + + blkMask = blkSize - 1; + setShift = floorLog2(blkSize); + setMask = numSets - 1; + tagShift = setShift + floorLog2(numSets); + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = numSets * assoc; + + sets = new SplitCacheSet[numSets]; + blks = new SplitBlk[numSets * assoc]; + // allocate data storage in one big chunk + dataBlks = new uint8_t[numSets*assoc*blkSize]; + + blkIndex = 0; // index into blks array + for (i = 0; i < numSets; ++i) { + sets[i].assoc = assoc; + + sets[i].blks = new SplitBlk*[assoc]; + + // link in the data blocks + for (j = 0; j < assoc; ++j) { + // locate next cache block + blk = &blks[blkIndex]; + blk->data = &dataBlks[blkSize*blkIndex]; + ++blkIndex; + + // invalidate new cache block + blk->status = 0; + + //EGH Fix Me : do we need to initialize blk? + + // Setting the tag to j is just to prevent long chains in the hash + // table; won't matter because the block is invalid + blk->tag = j; + blk->whenReady = 0; + blk->asid = -1; + blk->isTouched = false; + blk->size = blkSize; + sets[i].blks[j]=blk; + blk->set = i; + blk->part = part; + } + } +} + +SplitLRU::~SplitLRU() +{ + delete [] dataBlks; + delete [] blks; + delete [] sets; +} + +void +SplitLRU::regStats(const std::string &name) +{ + BaseTags::regStats(name); + + hits + .name(name + ".hits") + .desc("number of hits on this partition") + .precision(0) + ; + + misses + .name(name + ".misses") + .desc("number of misses in this partition") + .precision(0) + ; +} + +// probe cache for presence of given block. +bool +SplitLRU::probe(int asid, Addr addr) const +{ + // return(findBlock(Read, addr, asid) != 0); + Addr tag = extractTag(addr); + unsigned myset = extractSet(addr); + + SplitBlk *blk = sets[myset].findBlk(asid, tag); + + return (blk != NULL); // true if in cache +} + +SplitBlk* +SplitLRU::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency){ + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + hits++; + } + + return blk; +} + +SplitBlk* +SplitLRU::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); + + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency){ + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + hits++; + } + + return blk; +} + +SplitBlk* +SplitLRU::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + return blk; +} + +SplitBlk* +SplitLRU::findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks) +{ + unsigned set = extractSet(pkt->getAddr()); + // grab a replacement candidate + SplitBlk *blk = sets[set].blks[assoc-1]; + sets[set].moveToHead(blk); + if (blk->isValid()) { + replacements[0]++; + totalRefs += blk->refCount; + ++sampledRefs; + blk->refCount = 0; + } else if (!blk->isTouched) { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + + misses++; + + return blk; +} + +void +SplitLRU::invalidateBlk(int asid, Addr addr) +{ + SplitBlk *blk = findBlock(addr, asid); + if (blk) { + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + } +} + +void +SplitLRU::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) +{ +//Copy not supported for now +#if 0 + assert(source == blkAlign(source)); + assert(dest == blkAlign(dest)); + SplitBlk *source_blk = findBlock(source, asid); + assert(source_blk); + SplitBlk *dest_blk = findBlock(dest, asid); + if (dest_blk == NULL) { + // Need to do a replacement + Packet * pkt = new Packet(); + pkt->paddr = dest; + BlkList dummy_list; + dest_blk = findReplacement(pkt, writebacks, dummy_list); + if (dest_blk->isValid() && dest_blk->isModified()) { + // Need to writeback data. + pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + dest_blk->req->asid, + dest_blk->xc, + blkSize, + (cache->doData())?dest_blk->data:0, + dest_blk->size); + writebacks.push_back(pkt); + } + dest_blk->tag = extractTag(dest); + dest_blk->req->asid = asid; + /** + * @todo Do we need to pass in the execution context, or can we + * assume its the same? + */ + assert(source_blk->xc); + dest_blk->xc = source_blk->xc; + } + /** + * @todo Can't assume the status once we have coherence on copies. + */ + + // Set this block as readable, writeable, and dirty. + dest_blk->status = 7; + if (cache->doData()) { + memcpy(dest_blk->data, source_blk->data, blkSize); + } +#endif +} + +void +SplitLRU::cleanupRefs() +{ + for (int i = 0; i < numSets*assoc; ++i) { + if (blks[i].isValid()) { + totalRefs += blks[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh new file mode 100644 index 000000000..03886b1d8 --- /dev/null +++ b/src/mem/cache/tags/split_lru.hh @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of a LRU tag store for a partitioned cache. + */ + +#ifndef __SPLIT_LRU_HH__ +#define __SPLIT_LRU_HH__ + +#include <list> + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/cache/tags/split_blk.hh" +#include "mem/packet.hh" // for inlined functions +#include <assert.h> +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; + +/** + * An associative set of cache blocks. + */ + +class SplitCacheSet +{ + public: + /** The associativity of this set. */ + int assoc; + + /** Cache blocks in this set, maintained in LRU order 0 = MRU. */ + SplitBlk **blks; + + /** + * Find a block matching the tag in this set. + * @param asid The address space ID. + * @param tag The Tag to find. + * @return Pointer to the block if found. + */ + SplitBlk* findBlk(int asid, Addr tag) const; + + /** + * Move the given block to the head of the list. + * @param blk The block to move. + */ + void moveToHead(SplitBlk *blk); +}; + +/** + * A LRU cache tag store. + */ +class SplitLRU : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef SplitBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list<SplitBlk*> BlkList; + protected: + /** The number of sets in the cache. */ + const int numSets; + /** The number of bytes in a block. */ + const int blkSize; + /** The associativity of the cache. */ + const int assoc; + /** The hit latency. */ + const int hitLatency; + /** indicator for which partition this is */ + const int part; + + /** The cache sets. */ + SplitCacheSet *sets; + + /** The cache blocks. */ + SplitBlk *blks; + /** The data blocks, 1 per cache block. */ + uint8_t *dataBlks; + + /** The amount to shift the address to get the set. */ + int setShift; + /** The amount to shift the address to get the tag. */ + int tagShift; + /** Mask out all bits that aren't part of the set index. */ + unsigned setMask; + /** Mask out all bits that aren't part of the block offset. */ + unsigned blkMask; + + /** number of hits in this partition */ + Stats::Scalar<> hits; + /** number of blocks brought into this partition (i.e. misses) */ + Stats::Scalar<> misses; + +public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part); + + /** + * Destructor + */ + virtual ~SplitLRU(); + + /** + * Register the statistics for this object + * @param name The name to precede the stat + */ + void regStats(const std::string &name); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of LRU it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The req whose block to find. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks, + BlkList &compress_blocks); + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @return The tag of the address. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk Ignored. + * @return The tag of the address. + */ + Addr extractTag(Addr addr, SplitBlk *blk) const + { + return (addr >> tagShift); + } + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + return ((addr >> setShift) & setMask); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, unsigned set) const + { + return ((tag << tagShift) | ((Addr)set << setShift)); + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(SplitBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * LRU does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(SplitBlk *blk, uint8_t *data, int size, + PacketList & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif diff --git a/src/mem/config/cache.hh b/src/mem/config/cache.hh new file mode 100644 index 000000000..24da04021 --- /dev/null +++ b/src/mem/config/cache.hh @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/** + * @file + * Central location to configure which cache types we want to build + * into the simulator. In the future, this should probably be + * autogenerated by some sort of configuration script. + */ +#define USE_CACHE_LRU 1 +#define USE_CACHE_FALRU 1 +// #define USE_CACHE_SPLIT 1 +// #define USE_CACHE_SPLIT_LIFO 1 +#define USE_CACHE_IIC 1 + diff --git a/src/mem/mem_object.hh b/src/mem/mem_object.hh index ac547619d..c81ea03d8 100644 --- a/src/mem/mem_object.hh +++ b/src/mem/mem_object.hh @@ -50,7 +50,7 @@ class MemObject : public SimObject public: /** Additional function to return the Port of a memory object. */ - virtual Port *getPort(const std::string &if_name) = 0; + virtual Port *getPort(const std::string &if_name, int idx = -1) = 0; }; #endif //__MEM_MEM_OBJECT_HH__ diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 56dd2bdfa..91298df8c 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -57,6 +57,19 @@ Packet::cmdString() const } } +const std::string & +Packet::cmdIdxToString(Packet::Command idx) +{ + switch (idx) { + case ReadReq: return ReadReqString; + case WriteReq: return WriteReqString; + case WriteReqNoAck: return WriteReqNoAckString; + case ReadResp: return ReadRespString; + case WriteResp: return WriteRespString; + default: return OtherCmdString; + } +} + /** delete the data pointed to in the data pointer. Ok to call to matter how * data was allocted. */ void diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 403039d96..534db0077 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -41,10 +41,24 @@ #include "mem/request.hh" #include "arch/isa_traits.hh" #include "sim/root.hh" +#include <list> struct Packet; typedef Packet* PacketPtr; typedef uint8_t* PacketDataPtr; +typedef std::list<PacketPtr> PacketList; + +//Coherence Flags +#define NACKED_LINE 1 << 0 +#define SATISFIED 1 << 1 +#define SHARED_LINE 1 << 2 +#define CACHE_LINE_FILL 1 << 3 +#define COMPRESSED 1 << 4 +#define NO_ALLOCATE 1 << 5 + +//For statistics we need max number of commands, hard code it at +//20 for now. @todo fix later +#define NUM_MEM_CMDS 1 << 9 /** * A Packet is used to encapsulate a transfer between two objects in @@ -55,6 +69,10 @@ typedef uint8_t* PacketDataPtr; */ class Packet { + public: + /** Temporary FLAGS field until cache gets working, this should be in coherence/sender state. */ + uint64_t flags; + private: /** A pointer to the data being transfered. It can be differnt * sizes at each level of the heirarchy so it belongs in the @@ -100,8 +118,12 @@ class Packet /** Is the 'src' field valid? */ bool srcValid; + public: + /** Used to calculate latencies for each packet.*/ + Tick time; + /** The special destination address indicating that the packet * should be routed based on its address. */ static const short Broadcast = -1; @@ -149,30 +171,58 @@ class Packet IsRequest = 1 << 4, IsResponse = 1 << 5, NeedsResponse = 1 << 6, + IsSWPrefetch = 1 << 7, + IsHWPrefetch = 1 << 8 }; public: /** List of all commands associated with a packet. */ enum Command { + InvalidCmd = 0, ReadReq = IsRead | IsRequest | NeedsResponse, WriteReq = IsWrite | IsRequest | NeedsResponse, WriteReqNoAck = IsWrite | IsRequest, - ReadResp = IsRead | IsResponse, - WriteResp = IsWrite | IsResponse + ReadResp = IsRead | IsResponse | NeedsResponse, + WriteResp = IsWrite | IsResponse | NeedsResponse, + Writeback = IsWrite | IsRequest, + SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, + HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, + SoftPFResp = IsRead | IsResponse | IsSWPrefetch | NeedsResponse, + HardPFResp = IsRead | IsResponse | IsHWPrefetch | NeedsResponse, + InvalidateReq = IsInvalidate | IsRequest, + WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, + UpgradeReq = IsInvalidate | IsRequest | NeedsResponse, + UpgradeResp = IsInvalidate | IsResponse | NeedsResponse, + ReadExReq = IsRead | IsInvalidate | IsRequest | NeedsResponse, + ReadExResp = IsRead | IsInvalidate | IsResponse | NeedsResponse }; /** Return the string name of the cmd field (for debugging and * tracing). */ const std::string &cmdString() const; + /** Reutrn the string to a cmd given by idx. */ + const std::string &cmdIdxToString(Command idx); + + /** Return the index of this command. */ + inline int cmdToIndex() const { return (int) cmd; } + /** The command field of the packet. */ Command cmd; bool isRead() { return (cmd & IsRead) != 0; } + bool isWrite() { return (cmd & IsWrite) != 0; } bool isRequest() { return (cmd & IsRequest) != 0; } bool isResponse() { return (cmd & IsResponse) != 0; } bool needsResponse() { return (cmd & NeedsResponse) != 0; } + bool isInvalidate() { return (cmd * IsInvalidate) != 0; } + + bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; } + bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; } + bool isCompressed() { return (flags & COMPRESSED) != 0; } + + bool nic_pkt() { assert("Unimplemented\n" && 0); } /** Possible results of a packet's request. */ enum Result @@ -197,6 +247,10 @@ class Packet Addr getAddr() const { assert(addrSizeValid); return addr; } int getSize() const { assert(addrSizeValid); return size; } + Addr getOffset(int blkSize) const { return addr & (Addr)(blkSize - 1); } + + void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; } + void cmdOverride(Command newCmd) { cmd = newCmd; } /** Constructor. Note that a Request object must be constructed * first, but the Requests's physical address and size fields @@ -210,6 +264,21 @@ class Packet req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), result(Unknown) { + flags = 0; + } + + /** Alternate constructor if you are trying to create a packet with + * a request that is for a whole block, not the address from the req. + * this allows for overriding the size/addr of the req.*/ + Packet(Request *_req, Command _cmd, short _dest, int _blkSize) + : data(NULL), staticData(false), dynamicData(false), arrayData(false), + addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), + dest(_dest), + addrSizeValid(_req->validPaddr), srcValid(false), + req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), + result(Unknown) + { + flags = 0; } /** Destructor. */ @@ -242,8 +311,9 @@ class Packet * should not be called. */ void makeTimingResponse() { assert(needsResponse()); + assert(isRequest()); int icmd = (int)cmd; - icmd &= ~(IsRequest | NeedsResponse); + icmd &= ~(IsRequest); icmd |= IsResponse; cmd = (Command)icmd; dest = src; diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc index b5cecc7da..a34a0393a 100644 --- a/src/mem/page_table.cc +++ b/src/mem/page_table.cc @@ -54,6 +54,9 @@ PageTable::PageTable(System *_system, Addr _pageSize) system(_system) { assert(isPowerOf2(pageSize)); + pTableCache[0].vaddr = 0; + pTableCache[1].vaddr = 0; + pTableCache[2].vaddr = 0; } PageTable::~PageTable() @@ -95,7 +98,7 @@ PageTable::allocate(Addr vaddr, int size) assert(pageOffset(vaddr) == 0); for (; size > 0; size -= pageSize, vaddr += pageSize) { - std::map<Addr,Addr>::iterator iter = pTable.find(vaddr); + m5::hash_map<Addr,Addr>::iterator iter = pTable.find(vaddr); if (iter != pTable.end()) { // already mapped @@ -103,6 +106,12 @@ PageTable::allocate(Addr vaddr, int size) } pTable[vaddr] = system->new_page(); + pTableCache[2].paddr = pTableCache[1].paddr; + pTableCache[2].vaddr = pTableCache[1].vaddr; + pTableCache[1].paddr = pTableCache[0].paddr; + pTableCache[1].vaddr = pTableCache[0].vaddr; + pTableCache[0].paddr = pTable[vaddr]; + pTableCache[0].vaddr = vaddr; } } @@ -112,7 +121,22 @@ bool PageTable::translate(Addr vaddr, Addr &paddr) { Addr page_addr = pageAlign(vaddr); - std::map<Addr,Addr>::iterator iter = pTable.find(page_addr); + paddr = 0; + + if (pTableCache[0].vaddr == vaddr) { + paddr = pTableCache[0].paddr; + return true; + } + if (pTableCache[1].vaddr == vaddr) { + paddr = pTableCache[1].paddr; + return true; + } + if (pTableCache[2].vaddr == vaddr) { + paddr = pTableCache[2].paddr; + return true; + } + + m5::hash_map<Addr,Addr>::iterator iter = pTable.find(page_addr); if (iter == pTable.end()) { return false; @@ -130,7 +154,7 @@ PageTable::translate(RequestPtr &req) assert(pageAlign(req->getVaddr() + req->getSize() - 1) == pageAlign(req->getVaddr())); if (!translate(req->getVaddr(), paddr)) { - return genMachineCheckFault(); + return genPageTableFault(req->getVaddr()); } req->setPaddr(paddr); return page_check(req->getPaddr(), req->getSize()); diff --git a/src/mem/page_table.hh b/src/mem/page_table.hh index f7212d423..494c0ce9a 100644 --- a/src/mem/page_table.hh +++ b/src/mem/page_table.hh @@ -37,9 +37,9 @@ #define __PAGE_TABLE__ #include <string> -#include <map> #include "arch/isa_traits.hh" +#include "base/hashmap.hh" #include "base/trace.hh" #include "mem/request.hh" #include "mem/packet.hh" @@ -53,7 +53,14 @@ class System; class PageTable { protected: - std::map<Addr,Addr> pTable; + m5::hash_map<Addr,Addr> pTable; + + struct cacheElement { + Addr paddr; + Addr vaddr; + } ; + + struct cacheElement pTableCache[3]; const Addr pageSize; const Addr offsetMask; diff --git a/src/mem/physical.cc b/src/mem/physical.cc index fb31fb4a3..2d66602ab 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -173,9 +173,9 @@ PhysicalMemory::doFunctionalAccess(Packet *pkt) } Port * -PhysicalMemory::getPort(const std::string &if_name) +PhysicalMemory::getPort(const std::string &if_name, int idx) { - if (if_name == "") { + if (if_name == "port" && idx == -1) { if (port != NULL) panic("PhysicalMemory::getPort: additional port requested to memory!"); port = new MemoryPort(name() + "-port", this); diff --git a/src/mem/physical.hh b/src/mem/physical.hh index 88ea543da..50fa75ed3 100644 --- a/src/mem/physical.hh +++ b/src/mem/physical.hh @@ -108,7 +108,7 @@ class PhysicalMemory : public MemObject public: int deviceBlockSize(); void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop); - virtual Port *getPort(const std::string &if_name); + virtual Port *getPort(const std::string &if_name, int idx = -1); void virtual init(); // fast back-door memory access for vtophys(), remote gdb, etc. diff --git a/src/mem/port.hh b/src/mem/port.hh index 2edad095e..17b1f4a00 100644 --- a/src/mem/port.hh +++ b/src/mem/port.hh @@ -74,7 +74,7 @@ class Port private: /** Descriptive name (for DPRINTF output) */ - const std::string portName; + mutable std::string portName; /** A pointer to the peer port. Ports always come in pairs, that way they can use a standardized interface to communicate between different @@ -83,6 +83,10 @@ class Port public: + Port() + : peer(NULL) + { } + /** * Constructor. * @@ -105,6 +109,9 @@ class Port RangeChange }; + void setName(const std::string &name) + { portName = name; } + /** Function to set the pointer for the peer port. @todo should be called by the configuration stuff (python). */ diff --git a/src/mem/request.hh b/src/mem/request.hh index af1d6d8a8..a1524f807 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -44,6 +44,7 @@ class Request; typedef Request* RequestPtr; + /** The request is a Load locked/store conditional. */ const unsigned LOCKED = 0x001; /** The virtual address is also the physical address. */ @@ -62,6 +63,8 @@ const unsigned PF_EXCLUSIVE = 0x100; const unsigned EVICT_NEXT = 0x200; /** The request should ignore unaligned access faults */ const unsigned NO_ALIGN_FAULT = 0x400; +/** The request was an instruction read. */ +const unsigned INST_READ = 0x800; class Request { @@ -224,6 +227,11 @@ class Request /** Accessor function for pc.*/ Addr getPC() { assert(validPC); return pc; } + /** Accessor Function to Check Cacheability. */ + bool isUncacheable() { return getFlags() & UNCACHEABLE; } + + bool isInstRead() { return getFlags() & INST_READ; } + friend class Packet; }; diff --git a/src/python/SConscript b/src/python/SConscript index 7b0f591eb..c9e713199 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -75,24 +75,35 @@ def addPkg(pkgdir): # build_env flags. def MakeDefinesPyFile(target, source, env): f = file(str(target[0]), 'w') - print >>f, "m5_build_env = ", - print >>f, source[0] + print >>f, "m5_build_env = ", source[0] f.close() optionDict = dict([(opt, env[opt]) for opt in env.ExportOptions]) env.Command('m5/defines.py', Value(optionDict), MakeDefinesPyFile) +def MakeInfoPyFile(target, source, env): + f = file(str(target[0]), 'w') + for src in source: + data = ''.join(file(src.srcnode().abspath, 'r').xreadlines()) + print >>f, "%s = %s" % (src, repr(data)) + f.close() + +env.Command('m5/info.py', + [ '#/AUTHORS', '#/LICENSE', '#/README', '#/RELEASE_NOTES' ], + MakeInfoPyFile) + # Now specify the packages & files for the zip archive. addPkg('m5') pyzip_files.append('m5/defines.py') +pyzip_files.append('m5/info.py') pyzip_files.append(join(env['ROOT'], 'util/pbs/jobfile.py')) -env.Command(['swig/main_wrap.cc', 'm5/main.py'], - 'swig/main.i', +env.Command(['swig/cc_main_wrap.cc', 'm5/cc_main.py'], + 'swig/cc_main.i', '$SWIG $SWIGFLAGS -outdir ${TARGETS[1].dir} ' '-o ${TARGETS[0]} $SOURCES') -pyzip_dep_files.append('m5/main.py') +pyzip_dep_files.append('m5/cc_main.py') # Action function to build the zip archive. Uses the PyZipFile module # included in the standard Python library. diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 60a61d66e..3d0e3defa 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -27,14 +27,14 @@ # Authors: Nathan Binkert # Steve Reinhardt -import sys, os, time, atexit, optparse +import atexit, os, sys # import the SWIG-wrapped main C++ functions -import main +import cc_main # import a few SWIG-wrapped items (those that are likely to be used # directly by user scripts) completely into this module for # convenience -from main import simulate, SimLoopExitEvent +from cc_main import simulate, SimLoopExitEvent # import the m5 compile options import defines @@ -57,20 +57,6 @@ def AddToPath(path): # so place the new dir right after that. sys.path.insert(1, path) - -# Callback to set trace flags. Not necessarily the best way to do -# things in the long run (particularly if we change how these global -# options are handled). -def setTraceFlags(option, opt_str, value, parser): - objects.Trace.flags = value - -# Standard optparse options. Need to be explicitly included by the -# user script when it calls optparse.OptionParser(). -standardOptions = [ - optparse.make_option("--traceflags", type="string", action="callback", - callback=setTraceFlags) - ] - # make a SmartDict out of the build options for our local use import smartdict build_env = smartdict.SmartDict() @@ -80,16 +66,26 @@ build_env.update(defines.m5_build_env) env = smartdict.SmartDict() env.update(os.environ) +# Function to provide to C++ so it can look up instances based on paths +def resolveSimObject(name): + obj = config.instanceDict[name] + return obj.getCCObject() + +from main import options, arguments, main + # The final hook to generate .ini files. Called from the user script # once the config is built. def instantiate(root): config.ticks_per_sec = float(root.clock.frequency) # ugly temporary hack to get output to config.ini - sys.stdout = file('config.ini', 'w') + sys.stdout = file(os.path.join(options.outdir, 'config.ini'), 'w') root.print_ini() sys.stdout.close() # close config.ini sys.stdout = sys.__stdout__ # restore to original - main.initialize() # load config.ini into C++ and process it + cc_main.loadIniFile(resolveSimObject) # load config.ini into C++ + root.createCCObject() + root.connectPorts() + cc_main.finalInit() noDot = True # temporary until we fix dot if not noDot: dot = pydot.Dot() @@ -103,12 +99,105 @@ def instantiate(root): # Export curTick to user script. def curTick(): - return main.cvar.curTick + return cc_main.cvar.curTick # register our C++ exit callback function with Python -atexit.register(main.doExitCleanup) +atexit.register(cc_main.doExitCleanup) # This import allows user scripts to reference 'm5.objects.Foo' after # just doing an 'import m5' (without an 'import m5.objects'). May not # matter since most scripts will probably 'from m5.objects import *'. import objects + +# This loops until all objects have been fully drained. +def doDrain(root): + all_drained = drain(root) + while (not all_drained): + all_drained = drain(root) + +# Tries to drain all objects. Draining might not be completed unless +# all objects return that they are drained on the first call. This is +# because as objects drain they may cause other objects to no longer +# be drained. +def drain(root): + all_drained = False + drain_event = cc_main.createCountedDrain() + unready_objects = root.startDrain(drain_event, True) + # If we've got some objects that can't drain immediately, then simulate + if unready_objects > 0: + drain_event.setCount(unready_objects) + simulate() + else: + all_drained = True + cc_main.cleanupCountedDrain(drain_event) + return all_drained + +def resume(root): + root.resume() + +def checkpoint(root, dir): + if not isinstance(root, objects.Root): + raise TypeError, "Object is not a root object. Checkpoint must be called on a root object." + doDrain(root) + print "Writing checkpoint" + cc_main.serializeAll(dir) + resume(root) + +def restoreCheckpoint(root, dir): + print "Restoring from checkpoint" + cc_main.unserializeAll(dir) + resume(root) + +def changeToAtomic(system): + if not isinstance(system, objects.Root) and not isinstance(system, System): + raise TypeError, "Object is not a root or system object. Checkpoint must be " + "called on a root object." + doDrain(system) + print "Changing memory mode to atomic" + system.changeTiming(cc_main.SimObject.Atomic) + resume(system) + +def changeToTiming(system): + if not isinstance(system, objects.Root) and not isinstance(system, System): + raise TypeError, "Object is not a root or system object. Checkpoint must be " + "called on a root object." + doDrain(system) + print "Changing memory mode to timing" + system.changeTiming(cc_main.SimObject.Timing) + resume(system) + +def switchCpus(cpuList): + if not isinstance(cpuList, list): + raise RuntimeError, "Must pass a list to this function" + for i in cpuList: + if not isinstance(i, tuple): + raise RuntimeError, "List must have tuples of (oldCPU,newCPU)" + + [old_cpus, new_cpus] = zip(*cpuList) + + for cpu in old_cpus: + if not isinstance(cpu, objects.BaseCPU): + raise TypeError, "%s is not of type BaseCPU", cpu + for cpu in new_cpus: + if not isinstance(cpu, objects.BaseCPU): + raise TypeError, "%s is not of type BaseCPU", cpu + + # Drain all of the individual CPUs + drain_event = cc_main.createCountedDrain() + unready_cpus = 0 + for old_cpu in old_cpus: + unready_cpus += old_cpu.startDrain(drain_event, False) + # If we've got some objects that can't drain immediately, then simulate + if unready_cpus > 0: + drain_event.setCount(unready_cpus) + simulate() + cc_main.cleanupCountedDrain(drain_event) + # Now all of the CPUs are ready to be switched out + for old_cpu in old_cpus: + old_cpu._ccObject.switchOut() + index = 0 + print "Switching CPUs" + for new_cpu in new_cpus: + new_cpu.takeOverFrom(old_cpus[index]) + new_cpu._ccObject.resume() + index += 1 diff --git a/src/python/m5/attrdict.py b/src/python/m5/attrdict.py new file mode 100644 index 000000000..4ee7f1b8c --- /dev/null +++ b/src/python/m5/attrdict.py @@ -0,0 +1,61 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +__all__ = [ 'attrdict' ] + +class attrdict(dict): + def __getattr__(self, attr): + if attr in self: + return self.__getitem__(attr) + return super(attrdict, self).__getattribute__(attr) + + def __setattr__(self, attr, value): + if attr in dir(self): + return super(attrdict, self).__setattr__(attr, value) + return self.__setitem__(attr, value) + + def __delattr__(self, attr): + if attr in self: + return self.__delitem__(attr) + return super(attrdict, self).__delattr__(attr, value) + +if __name__ == '__main__': + x = attrdict() + x.y = 1 + x['z'] = 2 + print x['y'], x.y + print x['z'], x.z + print dir(x) + print x + + print + + del x['y'] + del x.z + print dir(x) + print(x) diff --git a/src/python/m5/config.py b/src/python/m5/config.py index 97e13c900..8eed28dcc 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -1,4 +1,4 @@ -# Copyright (c) 2004-2005 The Regents of The University of Michigan +# Copyright (c) 2004-2006 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,10 +27,10 @@ # Authors: Steve Reinhardt # Nathan Binkert -import os, re, sys, types, inspect +import os, re, sys, types, inspect, copy import m5 -from m5 import panic +from m5 import panic, cc_main from convert import * from multidict import multidict @@ -84,65 +84,22 @@ class Singleton(type): # # Once a set of Python objects have been instantiated in a hierarchy, # calling 'instantiate(obj)' (where obj is the root of the hierarchy) -# will generate a .ini file. See simple-4cpu.py for an example -# (corresponding to m5-test/simple-4cpu.ini). +# will generate a .ini file. # ##################################################################### -##################################################################### -# -# ConfigNode/SimObject classes -# -# The Python class hierarchy rooted by ConfigNode (which is the base -# class of SimObject, which in turn is the base class of all other M5 -# SimObject classes) has special attribute behavior. In general, an -# object in this hierarchy has three categories of attribute-like -# things: -# -# 1. Regular Python methods and variables. These must start with an -# underscore to be treated normally. -# -# 2. SimObject parameters. These values are stored as normal Python -# attributes, but all assignments to these attributes are checked -# against the pre-defined set of parameters stored in the class's -# _params dictionary. Assignments to attributes that do not -# correspond to predefined parameters, or that are not of the correct -# type, incur runtime errors. +# dict to look up SimObjects based on path +instanceDict = {} + +############################# # -# 3. Hierarchy children. The child nodes of a ConfigNode are stored -# in the node's _children dictionary, but can be accessed using the -# Python attribute dot-notation (just as they are printed out by the -# simulator). Children cannot be created using attribute assigment; -# they must be added by specifying the parent node in the child's -# constructor or using the '+=' operator. - -# The SimObject parameters are the most complex, for a few reasons. -# First, both parameter descriptions and parameter values are -# inherited. Thus parameter description lookup must go up the -# inheritance chain like normal attribute lookup, but this behavior -# must be explicitly coded since the lookup occurs in each class's -# _params attribute. Second, because parameter values can be set -# on SimObject classes (to implement default values), the parameter -# checking behavior must be enforced on class attribute assignments as -# well as instance attribute assignments. Finally, because we allow -# class specialization via inheritance (e.g., see the L1Cache class in -# the simple-4cpu.py example), we must do parameter checking even on -# class instantiation. To provide all these features, we use a -# metaclass to define most of the SimObject parameter behavior for -# this class hierarchy. +# Utility methods # -##################################################################### +############################# def isSimObject(value): return isinstance(value, SimObject) -def isSimObjectClass(value): - try: - return issubclass(value, SimObject) - except TypeError: - # happens if value is not a class at all - return False - def isSimObjectSequence(value): if not isinstance(value, (list, tuple)) or len(value) == 0: return False @@ -153,22 +110,9 @@ def isSimObjectSequence(value): return True -def isSimObjectClassSequence(value): - if not isinstance(value, (list, tuple)) or len(value) == 0: - return False - - for val in value: - if not isNullPointer(val) and not isSimObjectClass(val): - return False - - return True - def isSimObjectOrSequence(value): return isSimObject(value) or isSimObjectSequence(value) -def isSimObjectClassOrSequence(value): - return isSimObjectClass(value) or isSimObjectClassSequence(value) - def isNullPointer(value): return isinstance(value, NullSimObject) @@ -188,40 +132,36 @@ def applyOrMap(objOrSeq, meth, *args, **kwargs): return [applyMethod(o, meth, *args, **kwargs) for o in objOrSeq] -# The metaclass for ConfigNode (and thus for everything that derives -# from ConfigNode, including SimObject). This class controls how new -# classes that derive from ConfigNode are instantiated, and provides -# inherited class behavior (just like a class controls how instances -# of that class are instantiated, and provides inherited instance -# behavior). +# The metaclass for SimObject. This class controls how new classes +# that derive from SimObject are instantiated, and provides inherited +# class behavior (just like a class controls how instances of that +# class are instantiated, and provides inherited instance behavior). class MetaSimObject(type): # Attributes that can be set only at initialization time init_keywords = { 'abstract' : types.BooleanType, 'type' : types.StringType } # Attributes that can be set any time - keywords = { 'check' : types.FunctionType, - 'children' : types.ListType } + keywords = { 'check' : types.FunctionType } # __new__ is called before __init__, and is where the statements # in the body of the class definition get loaded into the class's - # __dict__. We intercept this to filter out parameter assignments + # __dict__. We intercept this to filter out parameter & port assignments # and only allow "private" attributes to be passed to the base # __new__ (starting with underscore). def __new__(mcls, name, bases, dict): - if dict.has_key('_init_dict'): - # must have been called from makeSubclass() rather than - # via Python class declaration; bypass filtering process. - cls_dict = dict - else: - # Copy "private" attributes (including special methods - # such as __new__) to the official dict. Everything else - # goes in _init_dict to be filtered in __init__. - cls_dict = {} - for key,val in dict.items(): - if key.startswith('_'): - cls_dict[key] = val - del dict[key] - cls_dict['_init_dict'] = dict + # Copy "private" attributes, functions, and classes to the + # official dict. Everything else goes in _init_dict to be + # filtered in __init__. + cls_dict = {} + value_dict = {} + for key,val in dict.items(): + if key.startswith('_') or isinstance(val, (types.FunctionType, + types.TypeType)): + cls_dict[key] = val + else: + # must be a param/port setting + value_dict[key] = val + cls_dict['_value_dict'] = value_dict return super(MetaSimObject, mcls).__new__(mcls, name, bases, cls_dict) # subclass initialization @@ -231,10 +171,15 @@ class MetaSimObject(type): super(MetaSimObject, cls).__init__(name, bases, dict) # initialize required attributes - cls._params = multidict() - cls._values = multidict() - cls._instantiated = False # really instantiated or subclassed - cls._anon_subclass_counter = 0 + + # class-only attributes + cls._params = multidict() # param descriptions + cls._ports = multidict() # port descriptions + + # class or instance attributes + cls._values = multidict() # param values + cls._port_map = multidict() # port bindings + cls._instantiated = False # really instantiated, cloned, or subclassed # We don't support multiple inheritance. If you want to, you # must fix multidict to deal with it properly. @@ -243,22 +188,34 @@ class MetaSimObject(type): base = bases[0] - # the only time the following is not true is when we define - # the SimObject class itself + # Set up general inheritance via multidicts. A subclass will + # inherit all its settings from the base class. The only time + # the following is not true is when we define the SimObject + # class itself (in which case the multidicts have no parent). if isinstance(base, MetaSimObject): cls._params.parent = base._params + cls._ports.parent = base._ports cls._values.parent = base._values + cls._port_map.parent = base._port_map + # mark base as having been subclassed base._instantiated = True - # now process the _init_dict items - for key,val in cls._init_dict.items(): - if isinstance(val, (types.FunctionType, types.TypeType)): - type.__setattr__(cls, key, val) - + # Now process the _value_dict items. They could be defining + # new (or overriding existing) parameters or ports, setting + # class keywords (e.g., 'abstract'), or setting parameter + # values or port bindings. The first 3 can only be set when + # the class is defined, so we handle them here. The others + # can be set later too, so just emulate that by calling + # setattr(). + for key,val in cls._value_dict.items(): # param descriptions - elif isinstance(val, ParamDesc): + if isinstance(val, ParamDesc): cls._new_param(key, val) + # port objects + elif isinstance(val, Port): + cls._ports[key] = val + # init-time-only keywords elif cls.init_keywords.has_key(key): cls._set_keyword(key, val, cls.init_keywords[key]) @@ -267,27 +224,6 @@ class MetaSimObject(type): else: setattr(cls, key, val) - # Pull the deep-copy memoization dict out of the class dict if - # it's there... - memo = cls.__dict__.get('_memo', {}) - - # Handle SimObject values - for key,val in cls._values.iteritems(): - # SimObject instances need to be promoted to classes. - # Existing classes should not have any instance values, so - # these can only occur at the lowest level dict (the - # parameters just being set in this class definition). - if isSimObjectOrSequence(val): - assert(val == cls._values.local[key]) - cls._values[key] = applyOrMap(val, 'makeClass', memo) - # SimObject classes need to be subclassed so that - # parameters that get set at this level only affect this - # level and derivatives. - elif isSimObjectClassOrSequence(val): - assert(not cls._values.local.has_key(key)) - cls._values[key] = applyOrMap(val, 'makeSubclass', {}, memo) - - def _set_keyword(cls, keyword, val, kwtype): if not isinstance(val, kwtype): raise TypeError, 'keyword %s has bad type %s (expecting %s)' % \ @@ -313,15 +249,19 @@ class MetaSimObject(type): cls._set_keyword(attr, value, cls.keywords[attr]) return - # must be SimObject param - param = cls._params.get(attr, None) - if param: - # It's ok: set attribute by delegating to 'object' class. - if isSimObjectOrSequence(value) and cls._instantiated: - raise AttributeError, \ - "Cannot set SimObject parameter '%s' after\n" \ + if cls._ports.has_key(attr): + self._ports[attr].connect(self, attr, value) + return + + if isSimObjectOrSequence(value) and cls._instantiated: + raise RuntimeError, \ + "cannot set SimObject parameter '%s' after\n" \ " class %s has been instantiated or subclassed" \ % (attr, cls.__name__) + + # check for param + param = cls._params.get(attr, None) + if param: try: cls._values[attr] = param.convert(value) except Exception, e: @@ -329,12 +269,12 @@ class MetaSimObject(type): (e, cls.__name__, attr, value) e.args = (msg, ) raise - # I would love to get rid of this elif isSimObjectOrSequence(value): - cls._values[attr] = value + # if RHS is a SimObject, it's an implicit child assignment + cls._values[attr] = value else: raise AttributeError, \ - "Class %s has no parameter %s" % (cls.__name__, attr) + "Class %s has no parameter \'%s\'" % (cls.__name__, attr) def __getattr__(cls, attr): if cls._values.has_key(attr): @@ -343,23 +283,7 @@ class MetaSimObject(type): raise AttributeError, \ "object '%s' has no attribute '%s'" % (cls.__name__, attr) - # Create a subclass of this class. Basically a function interface - # to the standard Python class definition mechanism, primarily for - # internal use. 'memo' dict param supports "deep copy" (really - # "deep subclass") operations... within a given operation, - # multiple references to a class should result in a single - # subclass object with multiple references to it (as opposed to - # mutiple unique subclasses). - def makeSubclass(cls, init_dict, memo = {}): - subcls = memo.get(cls) - if not subcls: - name = cls.__name__ + '_' + str(cls._anon_subclass_counter) - cls._anon_subclass_counter += 1 - subcls = MetaSimObject(name, (cls,), - { '_init_dict': init_dict, '_memo': memo }) - return subcls - -# The ConfigNode class is the root of the special hierarchy. Most of +# The SimObject class is the root of the special hierarchy. Most of # the code in this class deals with the configuration hierarchy itself # (parent/child node relationships). class SimObject(object): @@ -367,82 +291,79 @@ class SimObject(object): # get this metaclass. __metaclass__ = MetaSimObject - # __new__ operator allocates new instances of the class. We - # override it here just to support "deep instantiation" operation - # via the _memo dict. When recursively instantiating an object - # hierarchy we want to make sure that each class is instantiated - # only once, and that if there are multiple references to the same - # original class, we end up with the corresponding instantiated - # references all pointing to the same instance. - def __new__(cls, _memo = None, **kwargs): - if _memo is not None and _memo.has_key(cls): - # return previously instantiated object - assert(len(kwargs) == 0) - return _memo[cls] - else: - # Need a new one... if it needs to be memoized, this will - # happen in __init__. We defer the insertion until then - # so __init__ can use the memo dict to tell whether or not - # to perform the initialization. - return super(SimObject, cls).__new__(cls, **kwargs) - - # Initialize new instance previously allocated by __new__. For - # objects with SimObject-valued params, we need to recursively - # instantiate the classes represented by those param values as - # well (in a consistent "deep copy"-style fashion; see comment - # above). - def __init__(self, _memo = None, **kwargs): - if _memo is not None: - # We're inside a "deep instantiation" - assert(isinstance(_memo, dict)) - assert(len(kwargs) == 0) - if _memo.has_key(self.__class__): - # __new__ returned an existing, already initialized - # instance, so there's nothing to do here - assert(_memo[self.__class__] == self) - return - # no pre-existing object, so remember this one here - _memo[self.__class__] = self - else: - # This is a new top-level instantiation... don't memoize - # this objcet, but prepare to memoize any recursively - # instantiated objects. - _memo = {} - - self.__class__._instantiated = True + # Initialize new instance. For objects with SimObject-valued + # children, we need to recursively clone the classes represented + # by those param values as well in a consistent "deep copy"-style + # fashion. That is, we want to make sure that each instance is + # cloned only once, and that if there are multiple references to + # the same original object, we end up with the corresponding + # cloned references all pointing to the same cloned instance. + def __init__(self, **kwargs): + ancestor = kwargs.get('_ancestor') + memo_dict = kwargs.get('_memo') + if memo_dict is None: + # prepare to memoize any recursively instantiated objects + memo_dict = {} + elif ancestor: + # memoize me now to avoid problems with recursive calls + memo_dict[ancestor] = self + + if not ancestor: + ancestor = self.__class__ + ancestor._instantiated = True + # initialize required attributes + self._parent = None self._children = {} + self._ccObject = None # pointer to C++ object + self._instantiated = False # really "cloned" + # Inherit parameter values from class using multidict so # individual value settings can be overridden. - self._values = multidict(self.__class__._values) - # For SimObject-valued parameters, the class should have - # classes (not instances) for the values. We need to - # instantiate these classes rather than just inheriting the - # class object. - for key,val in self.__class__._values.iteritems(): - if isSimObjectClass(val): - setattr(self, key, val(_memo)) - elif isSimObjectClassSequence(val) and len(val): - setattr(self, key, [ v(_memo) for v in val ]) + self._values = multidict(ancestor._values) + # clone SimObject-valued parameters + for key,val in ancestor._values.iteritems(): + if isSimObject(val): + setattr(self, key, val(_memo=memo_dict)) + elif isSimObjectSequence(val) and len(val): + setattr(self, key, [ v(_memo=memo_dict) for v in val ]) + # clone port references. no need to use a multidict here + # since we will be creating new references for all ports. + self._port_map = {} + for key,val in ancestor._port_map.iteritems(): + self._port_map[key] = applyOrMap(val, 'clone', memo_dict) # apply attribute assignments from keyword args, if any for key,val in kwargs.iteritems(): setattr(self, key, val) - # Use this instance as a template to create a new class. - def makeClass(self, memo = {}): - cls = memo.get(self) - if not cls: - cls = self.__class__.makeSubclass(self._values.local) - memo[self] = cls - return cls - - # Direct instantiation of instances (cloning) is no longer - # allowed; must generate class from instance first. + # "Clone" the current instance by creating another instance of + # this instance's class, but that inherits its parameter values + # and port mappings from the current instance. If we're in a + # "deep copy" recursive clone, check the _memo dict to see if + # we've already cloned this instance. def __call__(self, **kwargs): - raise TypeError, "cannot instantiate SimObject; "\ - "use makeClass() to make class first" + memo_dict = kwargs.get('_memo') + if memo_dict is None: + # no memo_dict: must be top-level clone operation. + # this is only allowed at the root of a hierarchy + if self._parent: + raise RuntimeError, "attempt to clone object %s " \ + "not at the root of a tree (parent = %s)" \ + % (self, self._parent) + # create a new dict and use that. + memo_dict = {} + kwargs['_memo'] = memo_dict + elif memo_dict.has_key(self): + # clone already done & memoized + return memo_dict[self] + return self.__class__(_ancestor = self, **kwargs) def __getattr__(self, attr): + if self._ports.has_key(attr): + # return reference that can be assigned to another port + # via __setattr__ + return self._ports[attr].makeRef(self, attr) + if self._values.has_key(attr): return self._values[attr] @@ -457,10 +378,19 @@ class SimObject(object): object.__setattr__(self, attr, value) return + if self._ports.has_key(attr): + # set up port connection + self._ports[attr].connect(self, attr, value) + return + + if isSimObjectOrSequence(value) and self._instantiated: + raise RuntimeError, \ + "cannot set SimObject parameter '%s' after\n" \ + " instance been cloned %s" % (attr, `self`) + # must be SimObject param param = self._params.get(attr, None) if param: - # It's ok: set attribute by delegating to 'object' class. try: value = param.convert(value) except Exception, e: @@ -468,7 +398,6 @@ class SimObject(object): (e, self.__class__.__name__, attr, value) e.args = (msg, ) raise - # I would love to get rid of this elif isSimObjectOrSequence(value): pass else: @@ -507,13 +436,13 @@ class SimObject(object): self._children[name] = value def set_path(self, parent, name): - if not hasattr(self, '_parent'): + if not self._parent: self._parent = parent self._name = name parent.add_child(name, self) def path(self): - if not hasattr(self, '_parent'): + if not self._parent: return 'root' ppath = self._parent.path() if ppath == 'root': @@ -554,6 +483,8 @@ class SimObject(object): def print_ini(self): print '[' + self.path() + ']' # .ini section header + instanceDict[self.path()] = self + if hasattr(self, 'type') and not isinstance(self, ParamContext): print 'type=%s' % self.type @@ -585,6 +516,59 @@ class SimObject(object): for child in child_names: self._children[child].print_ini() + # Call C++ to create C++ object corresponding to this object and + # (recursively) all its children + def createCCObject(self): + self.getCCObject() # force creation + for child in self._children.itervalues(): + child.createCCObject() + + # Get C++ object corresponding to this object, calling C++ if + # necessary to construct it. Does *not* recursively create + # children. + def getCCObject(self): + if not self._ccObject: + self._ccObject = -1 # flag to catch cycles in recursion + self._ccObject = cc_main.createSimObject(self.path()) + elif self._ccObject == -1: + raise RuntimeError, "%s: recursive call to getCCObject()" \ + % self.path() + return self._ccObject + + # Create C++ port connections corresponding to the connections in + # _port_map (& recursively for all children) + def connectPorts(self): + for portRef in self._port_map.itervalues(): + applyOrMap(portRef, 'ccConnect') + for child in self._children.itervalues(): + child.connectPorts() + + def startDrain(self, drain_event, recursive): + count = 0 + # ParamContexts don't serialize + if isinstance(self, SimObject) and not isinstance(self, ParamContext): + count += self._ccObject.drain(drain_event) + if recursive: + for child in self._children.itervalues(): + count += child.startDrain(drain_event, True) + return count + + def resume(self): + if isinstance(self, SimObject) and not isinstance(self, ParamContext): + self._ccObject.resume() + for child in self._children.itervalues(): + child.resume() + + def changeTiming(self, mode): + if isinstance(self, System): + self._ccObject.setMemoryMode(mode) + for child in self._children.itervalues(): + child.changeTiming(mode) + + def takeOverFrom(self, old_cpu): + cpu_ptr = cc_main.convertToBaseCPUPtr(old_cpu._ccObject) + self._ccObject.takeOverFrom(cpu_ptr) + # generate output file for 'dot' to display as a pretty graph. # this code is currently broken. def outputDot(self, dot): @@ -675,9 +659,9 @@ class BaseProxy(object): if self._search_up: while not done: - try: obj = obj._parent - except: break - + obj = obj._parent + if not obj: + break result, done = self.find(obj) if not done: @@ -793,16 +777,16 @@ Self = ProxyFactory(search_self = True, search_up = False) # # Parameter description classes # -# The _params dictionary in each class maps parameter names to -# either a Param or a VectorParam object. These objects contain the +# The _params dictionary in each class maps parameter names to either +# a Param or a VectorParam object. These objects contain the # parameter description string, the parameter type, and the default -# value (loaded from the PARAM section of the .odesc files). The -# _convert() method on these objects is used to force whatever value -# is assigned to the parameter to the appropriate type. +# value (if any). The convert() method on these objects is used to +# force whatever value is assigned to the parameter to the appropriate +# type. # # Note that the default values are loaded into the class's attribute # space when the parameter dictionary is initialized (in -# MetaConfigNode._setparams()); after that point they aren't used. +# MetaSimObject._new_param()); after that point they aren't used. # ##################################################################### @@ -1419,6 +1403,107 @@ MaxAddr = Addr.max MaxTick = Tick.max AllMemory = AddrRange(0, MaxAddr) + +##################################################################### +# +# Port objects +# +# Ports are used to interconnect objects in the memory system. +# +##################################################################### + +# Port reference: encapsulates a reference to a particular port on a +# particular SimObject. +class PortRef(object): + def __init__(self, simobj, name, isVec): + assert(isSimObject(simobj)) + self.simobj = simobj + self.name = name + self.index = -1 + self.isVec = isVec # is this a vector port? + self.peer = None # not associated with another port yet + self.ccConnected = False # C++ port connection done? + + # Set peer port reference. Called via __setattr__ as a result of + # a port assignment, e.g., "obj1.port1 = obj2.port2". + def setPeer(self, other): + if self.isVec: + curMap = self.simobj._port_map.get(self.name, []) + self.index = len(curMap) + curMap.append(other) + else: + curMap = self.simobj._port_map.get(self.name) + if curMap and not self.isVec: + print "warning: overwriting port", self.simobj, self.name + curMap = other + self.simobj._port_map[self.name] = curMap + self.peer = other + + def clone(self, memo): + newRef = copy.copy(self) + assert(isSimObject(newRef.simobj)) + newRef.simobj = newRef.simobj(_memo=memo) + # Tricky: if I'm the *second* PortRef in the pair to be + # cloned, then my peer is still in the middle of its clone + # method, and thus hasn't returned to its owner's + # SimObject.__init__ to get installed in _port_map. As a + # result I have no way of finding the *new* peer object. So I + # mark myself as "waiting" for my peer, and I let the *first* + # PortRef clone call set up both peer pointers after I return. + newPeer = newRef.simobj._port_map.get(self.name) + if newPeer: + if self.isVec: + assert(self.index != -1) + newPeer = newPeer[self.index] + # other guy is all set up except for his peer pointer + assert(newPeer.peer == -1) # peer must be waiting for handshake + newPeer.peer = newRef + newRef.peer = newPeer + else: + # other guy is in clone; just wait for him to do the work + newRef.peer = -1 # mark as waiting for handshake + return newRef + + # Call C++ to create corresponding port connection between C++ objects + def ccConnect(self): + if self.ccConnected: # already done this + return + peer = self.peer + cc_main.connectPorts(self.simobj.getCCObject(), self.name, self.index, + peer.simobj.getCCObject(), peer.name, peer.index) + self.ccConnected = True + peer.ccConnected = True + +# Port description object. Like a ParamDesc object, this represents a +# logical port in the SimObject class, not a particular port on a +# SimObject instance. The latter are represented by PortRef objects. +class Port(object): + def __init__(self, desc): + self.desc = desc + self.isVec = False + + # Generate a PortRef for this port on the given SimObject with the + # given name + def makeRef(self, simobj, name): + return PortRef(simobj, name, self.isVec) + + # Connect an instance of this port (on the given SimObject with + # the given name) with the port described by the supplied PortRef + def connect(self, simobj, name, ref): + if not isinstance(ref, PortRef): + raise TypeError, \ + "assigning non-port reference port '%s'" % name + myRef = self.makeRef(simobj, name) + myRef.setPeer(ref) + ref.setPeer(myRef) + +# VectorPort description object. Like Port, but represents a vector +# of connections (e.g., as on a Bus). +class VectorPort(Port): + def __init__(self, desc): + Port.__init__(self, desc) + self.isVec = True + ##################################################################### # __all__ defines the list of symbols that get exported when @@ -1436,5 +1521,6 @@ __all__ = ['SimObject', 'ParamContext', 'Param', 'VectorParam', 'NetworkBandwidth', 'MemoryBandwidth', 'Range', 'AddrRange', 'MaxAddr', 'MaxTick', 'AllMemory', 'Null', 'NULL', - 'NextEthernetAddr'] + 'NextEthernetAddr', + 'Port', 'VectorPort'] diff --git a/src/python/m5/main.py b/src/python/m5/main.py new file mode 100644 index 000000000..afe73d94c --- /dev/null +++ b/src/python/m5/main.py @@ -0,0 +1,321 @@ +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +import code, optparse, os, socket, sys +from datetime import datetime +from attrdict import attrdict + +try: + import info +except ImportError: + info = None + +__all__ = [ 'options', 'arguments', 'main' ] + +usage="%prog [m5 options] script.py [script options]" +version="%prog 2.0" +brief_copyright=''' +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved +''' + +# there's only one option parsing done, so make it global and add some +# helper functions to make it work well. +parser = optparse.OptionParser(usage=usage, version=version, + description=brief_copyright, + formatter=optparse.TitledHelpFormatter()) +parser.disable_interspersed_args() + +# current option group +group = None + +def set_group(*args, **kwargs): + '''set the current option group''' + global group + if not args and not kwargs: + group = None + else: + group = parser.add_option_group(*args, **kwargs) + +class splitter(object): + def __init__(self, split): + self.split = split + def __call__(self, option, opt_str, value, parser): + getattr(parser.values, option.dest).extend(value.split(self.split)) + +def add_option(*args, **kwargs): + '''add an option to the current option group, or global none set''' + + # if action=split, but allows the option arguments + # themselves to be lists separated by the split variable''' + + if kwargs.get('action', None) == 'append' and 'split' in kwargs: + split = kwargs.pop('split') + kwargs['default'] = [] + kwargs['type'] = 'string' + kwargs['action'] = 'callback' + kwargs['callback'] = splitter(split) + + if group: + return group.add_option(*args, **kwargs) + + return parser.add_option(*args, **kwargs) + +def bool_option(name, default, help): + '''add a boolean option called --name and --no-name. + Display help depending on which is the default''' + + tname = '--%s' % name + fname = '--no-%s' % name + dest = name.replace('-', '_') + if default: + thelp = optparse.SUPPRESS_HELP + fhelp = help + else: + thelp = help + fhelp = optparse.SUPPRESS_HELP + + add_option(tname, action="store_true", default=default, help=thelp) + add_option(fname, action="store_false", dest=dest, help=fhelp) + +# Help options +add_option('-A', "--authors", action="store_true", default=False, + help="Show author information") +add_option('-C', "--copyright", action="store_true", default=False, + help="Show full copyright information") +add_option('-R', "--readme", action="store_true", default=False, + help="Show the readme") +add_option('-N', "--release-notes", action="store_true", default=False, + help="Show the release notes") + +# Options for configuring the base simulator +add_option('-d', "--outdir", metavar="DIR", default=".", + help="Set the output directory to DIR [Default: %default]") +add_option('-i', "--interactive", action="store_true", default=False, + help="Invoke the interactive interpreter after running the script") +add_option("--pdb", action="store_true", default=False, + help="Invoke the python debugger before running the script") +add_option('-p', "--path", metavar="PATH[:PATH]", action='append', split=':', + help="Prepend PATH to the system path when invoking the script") +add_option('-q', "--quiet", action="count", default=0, + help="Reduce verbosity") +add_option('-v', "--verbose", action="count", default=0, + help="Increase verbosity") + +# Statistics options +set_group("Statistics Options") +add_option("--stats-file", metavar="FILE", default="m5stats.txt", + help="Sets the output file for statistics [Default: %default]") + +# Debugging options +set_group("Debugging Options") +add_option("--debug-break", metavar="TIME[,TIME]", action='append', split=',', + help="Cycle to create a breakpoint") + +# Tracing options +set_group("Trace Options") +add_option("--trace-flags", metavar="FLAG[,FLAG]", action='append', split=',', + help="Sets the flags for tracing") +add_option("--trace-start", metavar="TIME", default='0s', + help="Start tracing at TIME (must have units)") +add_option("--trace-file", metavar="FILE", default="cout", + help="Sets the output file for tracing [Default: %default]") +add_option("--trace-circlebuf", metavar="SIZE", type="int", default=0, + help="If SIZE is non-zero, turn on the circular buffer with SIZE lines") +add_option("--no-trace-circlebuf", action="store_const", const=0, + dest='trace_circlebuf', help=optparse.SUPPRESS_HELP) +bool_option("trace-dumponexit", default=False, + help="Dump trace buffer on exit") +add_option("--trace-ignore", metavar="EXPR", action='append', split=':', + help="Ignore EXPR sim objects") + +# Execution Trace options +set_group("Execution Trace Options") +bool_option("speculative", default=True, + help="Don't capture speculative instructions") +bool_option("print-cycle", default=True, + help="Don't print cycle numbers in trace output") +bool_option("print-symbol", default=True, + help="Disable PC symbols in trace output") +bool_option("print-opclass", default=True, + help="Don't print op class type in trace output") +bool_option("print-thread", default=True, + help="Don't print thread number in trace output") +bool_option("print-effaddr", default=True, + help="Don't print effective address in trace output") +bool_option("print-data", default=True, + help="Don't print result data in trace output") +bool_option("print-iregs", default=False, + help="Print fetch sequence numbers in trace output") +bool_option("print-fetch-seq", default=False, + help="Print fetch sequence numbers in trace output") +bool_option("print-cpseq", default=False, + help="Print correct path sequence numbers in trace output") + +options = attrdict() +arguments = [] + +def usage(exitcode=None): + parser.print_help() + if exitcode is not None: + sys.exit(exitcode) + +def parse_args(): + _opts,args = parser.parse_args() + opts = attrdict(_opts.__dict__) + + # setting verbose and quiet at the same time doesn't make sense + if opts.verbose > 0 and opts.quiet > 0: + usage(2) + + # store the verbosity in a single variable. 0 is default, + # negative numbers represent quiet and positive values indicate verbose + opts.verbose -= opts.quiet + + del opts.quiet + + options.update(opts) + arguments.extend(args) + return opts,args + +def main(): + import cc_main + + parse_args() + + done = False + if options.copyright: + done = True + print info.LICENSE + print + + if options.authors: + done = True + print 'Author information:' + print + print info.AUTHORS + print + + if options.readme: + done = True + print 'Readme:' + print + print info.README + print + + if options.release_notes: + done = True + print 'Release Notes:' + print + print info.RELEASE_NOTES + print + + if done: + sys.exit(0) + + if options.verbose >= 0: + print "M5 Simulator System" + print brief_copyright + print + print "M5 compiled %s" % cc_main.cvar.compileDate; + print "M5 started %s" % datetime.now().ctime() + print "M5 executing on %s" % socket.gethostname() + + # check to make sure we can find the listed script + if not arguments or not os.path.isfile(arguments[0]): + usage(2) + + # tell C++ about output directory + cc_main.setOutputDir(options.outdir) + + # update the system path with elements from the -p option + sys.path[0:0] = options.path + + import objects + + # set stats options + objects.Statistics.text_file = options.stats_file + + # set debugging options + objects.Debug.break_cycles = options.debug_break + + # set tracing options + objects.Trace.flags = options.trace_flags + objects.Trace.start = options.trace_start + objects.Trace.file = options.trace_file + objects.Trace.bufsize = options.trace_circlebuf + objects.Trace.dump_on_exit = options.trace_dumponexit + objects.Trace.ignore = options.trace_ignore + + # set execution trace options + objects.ExecutionTrace.speculative = options.speculative + objects.ExecutionTrace.print_cycle = options.print_cycle + objects.ExecutionTrace.pc_symbol = options.print_symbol + objects.ExecutionTrace.print_opclass = options.print_opclass + objects.ExecutionTrace.print_thread = options.print_thread + objects.ExecutionTrace.print_effaddr = options.print_effaddr + objects.ExecutionTrace.print_data = options.print_data + objects.ExecutionTrace.print_iregs = options.print_iregs + objects.ExecutionTrace.print_fetchseq = options.print_fetch_seq + objects.ExecutionTrace.print_cpseq = options.print_cpseq + + sys.argv = arguments + sys.path = [ os.path.dirname(sys.argv[0]) ] + sys.path + + scope = { '__file__' : sys.argv[0] } + + # we want readline if we're doing anything interactive + if options.interactive or options.pdb: + exec("import readline", scope) + + # if pdb was requested, execfile the thing under pdb, otherwise, + # just do the execfile normally + if options.pdb: + from pdb import Pdb + debugger = Pdb() + debugger.run('execfile("%s")' % sys.argv[0], scope) + else: + execfile(sys.argv[0], scope) + + # once the script is done + if options.interactive: + interact = code.InteractiveConsole(scope) + interact.interact("M5 Interactive Console") + +if __name__ == '__main__': + from pprint import pprint + + parse_args() + + print 'opts:' + pprint(options, indent=4) + print + + print 'args:' + pprint(arguments, indent=4) diff --git a/src/python/m5/objects/BaseCPU.py b/src/python/m5/objects/BaseCPU.py index 2e78578df..5bf98be9c 100644 --- a/src/python/m5/objects/BaseCPU.py +++ b/src/python/m5/objects/BaseCPU.py @@ -6,10 +6,10 @@ class BaseCPU(SimObject): abstract = True mem = Param.MemObject("memory") + system = Param.System(Parent.any, "system object") if build_env['FULL_SYSTEM']: dtb = Param.AlphaDTB("Data TLB") itb = Param.AlphaITB("Instruction TLB") - system = Param.System(Parent.any, "system object") cpu_id = Param.Int(-1, "CPU identifier") else: workload = VectorParam.Process("processes to run") diff --git a/src/python/m5/objects/BaseCache.py b/src/python/m5/objects/BaseCache.py index 33f44759b..497b2b038 100644 --- a/src/python/m5/objects/BaseCache.py +++ b/src/python/m5/objects/BaseCache.py @@ -1,29 +1,26 @@ from m5.config import * -from BaseMem import BaseMem +from MemObject import MemObject class Prefetch(Enum): vals = ['none', 'tagged', 'stride', 'ghb'] -class BaseCache(BaseMem): +class BaseCache(MemObject): type = 'BaseCache' adaptive_compression = Param.Bool(False, "Use an adaptive compression scheme") assoc = Param.Int("associativity") block_size = Param.Int("block size in bytes") + latency = Param.Int("Latency") compressed_bus = Param.Bool(False, "This cache connects to a compressed memory") compression_latency = Param.Latency('0ns', "Latency in cycles of compression algorithm") do_copy = Param.Bool(False, "perform fast copies in the cache") hash_delay = Param.Int(1, "time in cycles of hash access") - in_bus = Param.Bus(NULL, "incoming bus object") lifo = Param.Bool(False, "whether this NIC partition should use LIFO repl. policy") max_miss_count = Param.Counter(0, "number of misses to handle before calling exit") - mem_trace = Param.MemTraceWriter(NULL, - "memory trace writer to record accesses") mshrs = Param.Int("number of MSHRs (max outstanding requests)") - out_bus = Param.Bus("outgoing bus object") prioritizeRequests = Param.Bool(False, "always service demand misses first") protocol = Param.CoherenceProtocol(NULL, "coherence protocol to use") @@ -63,3 +60,6 @@ class BaseCache(BaseMem): "Use the CPU ID to seperate calculations of prefetches") prefetch_data_accesses_only = Param.Bool(False, "Only prefetch on data not on instruction accesses") + hit_latency = Param.Int(1,"Hit Latency of the cache") + cpu_side = Port("Port on side closer to CPU") + mem_side = Port("Port on side closer to MEM") diff --git a/src/python/m5/objects/Bridge.py b/src/python/m5/objects/Bridge.py index 880535755..c9e673afb 100644 --- a/src/python/m5/objects/Bridge.py +++ b/src/python/m5/objects/Bridge.py @@ -3,6 +3,8 @@ from MemObject import MemObject class Bridge(MemObject): type = 'Bridge' + side_a = Port('Side A port') + side_b = Port('Side B port') queue_size_a = Param.Int(16, "The number of requests to buffer") queue_size_b = Param.Int(16, "The number of requests to buffer") delay = Param.Latency('0ns', "The latency of this bridge") diff --git a/src/python/m5/objects/Bus.py b/src/python/m5/objects/Bus.py index c37dab438..e0278e6c3 100644 --- a/src/python/m5/objects/Bus.py +++ b/src/python/m5/objects/Bus.py @@ -3,4 +3,6 @@ from MemObject import MemObject class Bus(MemObject): type = 'Bus' + port = VectorPort("vector port for connecting devices") + default = Port("Default port for requests that aren't handeled by a device.") bus_id = Param.Int(0, "blah") diff --git a/src/python/m5/objects/Device.py b/src/python/m5/objects/Device.py index 7798f5f04..222f750da 100644 --- a/src/python/m5/objects/Device.py +++ b/src/python/m5/objects/Device.py @@ -4,6 +4,7 @@ from MemObject import MemObject class PioDevice(MemObject): type = 'PioDevice' abstract = True + pio = Port("Programmed I/O port") platform = Param.Platform(Parent.any, "Platform this device is part of") system = Param.System(Parent.any, "System this device is part of") @@ -16,3 +17,4 @@ class BasicPioDevice(PioDevice): class DmaDevice(PioDevice): type = 'DmaDevice' abstract = True + dma = Port("DMA port") diff --git a/src/python/m5/objects/FuncUnit.py b/src/python/m5/objects/FuncUnit.py new file mode 100644 index 000000000..f61590ae9 --- /dev/null +++ b/src/python/m5/objects/FuncUnit.py @@ -0,0 +1,17 @@ +from m5.config import * + +class OpType(Enum): + vals = ['(null)', 'IntAlu', 'IntMult', 'IntDiv', 'FloatAdd', + 'FloatCmp', 'FloatCvt', 'FloatMult', 'FloatDiv', 'FloatSqrt', + 'MemRead', 'MemWrite', 'IprAccess', 'InstPrefetch'] + +class OpDesc(SimObject): + type = 'OpDesc' + issueLat = Param.Int(1, "cycles until another can be issued") + opClass = Param.OpType("type of operation") + opLat = Param.Int(1, "cycles until result is available") + +class FUDesc(SimObject): + type = 'FUDesc' + count = Param.Int("number of these FU's available") + opList = VectorParam.OpDesc("operation classes for this FU type") diff --git a/src/python/m5/objects/AlphaFullCPU.py b/src/python/m5/objects/O3CPU.py index 2988305d3..d6bc454ad 100644 --- a/src/python/m5/objects/AlphaFullCPU.py +++ b/src/python/m5/objects/O3CPU.py @@ -2,14 +2,16 @@ from m5 import build_env from m5.config import * from BaseCPU import BaseCPU -class DerivAlphaFullCPU(BaseCPU): - type = 'DerivAlphaFullCPU' +class DerivO3CPU(BaseCPU): + type = 'DerivO3CPU' activity = Param.Unsigned("Initial count") numThreads = Param.Unsigned("number of HW thread contexts") checker = Param.BaseCPU(NULL, "checker") cachePorts = Param.Unsigned("Cache Ports") + icache_port = Port("Instruction Port") + dcache_port = Port("Data Port") decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") renameToFetchDelay = Param.Unsigned("Rename to fetch delay") @@ -37,12 +39,10 @@ class DerivAlphaFullCPU(BaseCPU): "Issue/Execute/Writeback delay") issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal " "to the IEW stage)") + dispatchWidth = Param.Unsigned("Dispatch width") issueWidth = Param.Unsigned("Issue width") - executeWidth = Param.Unsigned("Execute width") - executeIntWidth = Param.Unsigned("Integer execute width") - executeFloatWidth = Param.Unsigned("Floating point execute width") - executeBranchWidth = Param.Unsigned("Branch execute width") - executeMemoryWidth = Param.Unsigned("Memory execute width") + wbWidth = Param.Unsigned("Writeback width") + wbDepth = Param.Unsigned("Writeback depth") fuPool = Param.FUPool(NULL, "Functional Unit pool") iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit " @@ -53,6 +53,9 @@ class DerivAlphaFullCPU(BaseCPU): trapLatency = Param.Tick("Trap latency") fetchTrapLatency = Param.Tick("Fetch trap latency") + backComSize = Param.Unsigned("Time buffer size for backwards communication") + forwardComSize = Param.Unsigned("Time buffer size for forward communication") + predType = Param.String("Branch predictor type ('local', 'tournament')") localPredictorSize = Param.Unsigned("Size of local predictor") localCtrBits = Param.Unsigned("Bits per counter") diff --git a/src/python/m5/objects/OzoneCPU.py b/src/python/m5/objects/OzoneCPU.py index f2d9aea84..88fb63c74 100644 --- a/src/python/m5/objects/OzoneCPU.py +++ b/src/python/m5/objects/OzoneCPU.py @@ -7,11 +7,11 @@ class DerivOzoneCPU(BaseCPU): numThreads = Param.Unsigned("number of HW thread contexts") - if not build_env['FULL_SYSTEM']: - mem = Param.FunctionalMemory(NULL, "memory") - checker = Param.BaseCPU("Checker CPU") + icache_port = Port("Instruction Port") + dcache_port = Port("Data Port") + width = Param.Unsigned("Width") frontEndWidth = Param.Unsigned("Front end width") backEndWidth = Param.Unsigned("Back end width") diff --git a/src/python/m5/objects/Pci.py b/src/python/m5/objects/Pci.py index 9e1e91b13..29014bb37 100644 --- a/src/python/m5/objects/Pci.py +++ b/src/python/m5/objects/Pci.py @@ -1,5 +1,5 @@ from m5.config import * -from Device import BasicPioDevice, DmaDevice +from Device import BasicPioDevice, DmaDevice, PioDevice class PciConfigData(SimObject): type = 'PciConfigData' @@ -38,18 +38,22 @@ class PciConfigData(SimObject): MaximumLatency = Param.UInt8(0x00, "Maximum Latency") MinimumGrant = Param.UInt8(0x00, "Minimum Grant") -class PciConfigAll(BasicPioDevice): +class PciConfigAll(PioDevice): type = 'PciConfigAll' + pio_latency = Param.Tick(1, "Programmed IO latency in simticks") + bus = Param.UInt8(0x00, "PCI bus to act as config space for") + size = Param.MemorySize32('16MB', "Size of config space") + class PciDevice(DmaDevice): type = 'PciDevice' abstract = True + config = Port("PCI configuration space port") pci_bus = Param.Int("PCI bus") pci_dev = Param.Int("PCI device number") pci_func = Param.Int("PCI function code") pio_latency = Param.Tick(1, "Programmed IO latency in simticks") configdata = Param.PciConfigData(Parent.any, "PCI Config data") - configspace = Param.PciConfigAll(Parent.any, "PCI Configspace") class PciFake(PciDevice): type = 'PciFake' diff --git a/src/python/m5/objects/PhysicalMemory.py b/src/python/m5/objects/PhysicalMemory.py index bed90d555..9cc7510a2 100644 --- a/src/python/m5/objects/PhysicalMemory.py +++ b/src/python/m5/objects/PhysicalMemory.py @@ -3,6 +3,7 @@ from MemObject import * class PhysicalMemory(MemObject): type = 'PhysicalMemory' + port = Port("the access port") range = Param.AddrRange("Device Address") file = Param.String('', "memory mapped file") latency = Param.Latency(Parent.clock, "latency of an access") diff --git a/src/python/m5/objects/System.py b/src/python/m5/objects/System.py index 9a1e1d690..386f39277 100644 --- a/src/python/m5/objects/System.py +++ b/src/python/m5/objects/System.py @@ -1,9 +1,12 @@ from m5 import build_env from m5.config import * +class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing'] + class System(SimObject): type = 'System' physmem = Param.PhysicalMemory(Parent.any, "phsyical memory") + mem_mode = Param.MemoryMode('atomic', "The mode the memory system is in") if build_env['FULL_SYSTEM']: boot_cpu_frequency = Param.Frequency(Self.cpu[0].clock.frequency, "boot processor frequency") diff --git a/src/sim/builder.cc b/src/sim/builder.cc index 121275c83..9074cc899 100644 --- a/src/sim/builder.cc +++ b/src/sim/builder.cc @@ -33,17 +33,14 @@ #include "base/inifile.hh" #include "base/misc.hh" #include "sim/builder.hh" -#include "sim/configfile.hh" -#include "sim/config_node.hh" #include "sim/host.hh" #include "sim/sim_object.hh" #include "sim/root.hh" using namespace std; -SimObjectBuilder::SimObjectBuilder(ConfigNode *_configNode) - : ParamContext(_configNode->getPath(), NoAutoInit), - configNode(_configNode) +SimObjectBuilder::SimObjectBuilder(const std::string &_iniSection) + : ParamContext(_iniSection, NoAutoInit) { } @@ -78,8 +75,7 @@ SimObjectBuilder::parseParams(IniFile &iniFile) void SimObjectBuilder::printErrorProlog(ostream &os) { - ccprintf(os, "Error creating object '%s' of type '%s':\n", - iniSection, configNode->getType()); + ccprintf(os, "Error creating object '%s':\n", iniSection); } @@ -112,9 +108,13 @@ SimObjectClass::SimObjectClass(const string &className, CreateFunc createFunc) // // SimObject * -SimObjectClass::createObject(IniFile &configDB, ConfigNode *configNode) +SimObjectClass::createObject(IniFile &configDB, const std::string &iniSection) { - const string &type = configNode->getType(); + string type; + if (!configDB.find(iniSection, "type", type)) { + // no C++ type associated with this object + return NULL; + } // look up className to get appropriate createFunc if (classMap->find(type) == classMap->end()) @@ -125,7 +125,7 @@ SimObjectClass::createObject(IniFile &configDB, ConfigNode *configNode) // call createFunc with config hierarchy node to get object // builder instance (context with parameters for object creation) - SimObjectBuilder *objectBuilder = (*createFunc)(configNode); + SimObjectBuilder *objectBuilder = (*createFunc)(iniSection); assert(objectBuilder != NULL); @@ -166,7 +166,7 @@ SimObjectClass::describeAllClasses(ostream &os) os << "[" << className << "]\n"; // create dummy object builder just to instantiate parameters - SimObjectBuilder *objectBuilder = (*createFunc)(NULL); + SimObjectBuilder *objectBuilder = (*createFunc)(""); // now get the object builder to describe ite params objectBuilder->describeParams(os); diff --git a/src/sim/builder.hh b/src/sim/builder.hh index 8d0846155..2997fe5c3 100644 --- a/src/sim/builder.hh +++ b/src/sim/builder.hh @@ -55,14 +55,8 @@ class SimObject; // class SimObjectBuilder : public ParamContext { - private: - // The corresponding node in the configuration hierarchy. - // (optional: may be null if the created object is not in the - // hierarchy) - ConfigNode *configNode; - public: - SimObjectBuilder(ConfigNode *_configNode); + SimObjectBuilder(const std::string &_iniSection); virtual ~SimObjectBuilder(); @@ -77,9 +71,6 @@ class SimObjectBuilder : public ParamContext // configuration hierarchy node label and position) virtual const std::string &getInstanceName() { return iniSection; } - // return the configuration hierarchy node for this context. - virtual ConfigNode *getConfigNode() { return configNode; } - // Create the actual SimObject corresponding to the parameter // values in this context. This function is overridden in derived // classes to call a specific constructor for a particular @@ -125,7 +116,7 @@ class SimObjectClass // for the object (specified by the second string argument), and // an optional config hierarchy node (specified by the third // argument). A pointer to the new SimObjectBuilder is returned. - typedef SimObjectBuilder *(*CreateFunc)(ConfigNode *configNode); + typedef SimObjectBuilder *(*CreateFunc)(const std::string &iniSection); static std::map<std::string,CreateFunc> *classMap; @@ -137,7 +128,8 @@ class SimObjectClass // create SimObject given name of class and pointer to // configuration hierarchy node - static SimObject *createObject(IniFile &configDB, ConfigNode *configNode); + static SimObject *createObject(IniFile &configDB, + const std::string &iniSection); // print descriptions of all parameters registered with all // SimObject classes @@ -156,15 +148,15 @@ class OBJ_CLASS##Builder : public SimObjectBuilder \ #define END_DECLARE_SIM_OBJECT_PARAMS(OBJ_CLASS) \ \ - OBJ_CLASS##Builder(ConfigNode *configNode); \ + OBJ_CLASS##Builder(const std::string &iniSection); \ virtual ~OBJ_CLASS##Builder() {} \ \ OBJ_CLASS *create(); \ }; #define BEGIN_INIT_SIM_OBJECT_PARAMS(OBJ_CLASS) \ -OBJ_CLASS##Builder::OBJ_CLASS##Builder(ConfigNode *configNode) \ - : SimObjectBuilder(configNode), + OBJ_CLASS##Builder::OBJ_CLASS##Builder(const std::string &iSec) \ + : SimObjectBuilder(iSec), #define END_INIT_SIM_OBJECT_PARAMS(OBJ_CLASS) \ @@ -176,9 +168,9 @@ OBJ_CLASS *OBJ_CLASS##Builder::create() #define REGISTER_SIM_OBJECT(CLASS_NAME, OBJ_CLASS) \ SimObjectBuilder * \ -new##OBJ_CLASS##Builder(ConfigNode *configNode) \ +new##OBJ_CLASS##Builder(const std::string &iniSection) \ { \ - return new OBJ_CLASS##Builder(configNode); \ + return new OBJ_CLASS##Builder(iniSection); \ } \ \ SimObjectClass the##OBJ_CLASS##Class(CLASS_NAME, \ diff --git a/src/sim/byteswap.hh b/src/sim/byteswap.hh index a3138a25e..f1f244150 100644 --- a/src/sim/byteswap.hh +++ b/src/sim/byteswap.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Gabe Black + * Authors: Ali Saidi + * Nathan Binkert */ //The purpose of this file is to provide endainness conversion utility @@ -35,6 +36,7 @@ #ifndef __SIM_BYTE_SWAP_HH__ #define __SIM_BYTE_SWAP_HH__ +#include "base/misc.hh" #include "sim/host.hh" // This lets us figure out what the byte order of the host system is @@ -48,6 +50,10 @@ #include <machine/endian.h> #endif +#if defined(__APPLE__) +#include <libkern/OSByteOrder.h> +#endif + //These functions actually perform the swapping for parameters //of various bit lengths static inline uint64_t @@ -55,6 +61,8 @@ swap_byte64(uint64_t x) { #if defined(linux) return bswap_64(x); +#elif defined(__APPLE__) + return OSSwapInt64(x); #else return (uint64_t)((((uint64_t)(x) & 0xff) << 56) | ((uint64_t)(x) & 0xff00ULL) << 40 | @@ -72,6 +80,8 @@ swap_byte32(uint32_t x) { #if defined(linux) return bswap_32(x); +#elif defined(__APPLE__) + return OSSwapInt32(x); #else return (uint32_t)(((uint32_t)(x) & 0xff) << 24 | ((uint32_t)(x) & 0xff00) << 8 | ((uint32_t)(x) & 0xff0000) >> 8 | @@ -84,31 +94,31 @@ swap_byte16(uint16_t x) { #if defined(linux) return bswap_16(x); +#elif defined(__APPLE__) + return OSSwapInt16(x); #else return (uint16_t)(((uint16_t)(x) & 0xff) << 8 | ((uint16_t)(x) & 0xff00) >> 8); #endif } -//This lets the compiler figure out how to call the swap_byte functions above -//for different data types. -static inline uint64_t swap_byte(uint64_t x) {return swap_byte64(x);} -static inline int64_t swap_byte(int64_t x) {return swap_byte64((uint64_t)x);} -static inline uint32_t swap_byte(uint32_t x) {return swap_byte32(x);} -static inline int32_t swap_byte(int32_t x) {return swap_byte32((uint32_t)x);} -//This is to prevent the following two functions from compiling on -//64bit machines. It won't detect everything, so it should be changed. -#ifndef __x86_64__ -static inline long swap_byte(long x) {return swap_byte32((long)x);} -static inline unsigned long swap_byte(unsigned long x) - { return swap_byte32((unsigned long)x);} -#endif -static inline uint16_t swap_byte(uint16_t x) {return swap_byte32(x);} -static inline int16_t swap_byte(int16_t x) {return swap_byte16((uint16_t)x);} -static inline uint8_t swap_byte(uint8_t x) {return x;} -static inline int8_t swap_byte(int8_t x) {return x;} -static inline double swap_byte(double x) {return swap_byte64((uint64_t)x);} -static inline float swap_byte(float x) {return swap_byte32((uint32_t)x);} +// This function lets the compiler figure out how to call the +// swap_byte functions above for different data types. Since the +// sizeof() values are known at compiel time, it should inline to a +// direct call to the right swap_byteNN() function. +template <typename T> +static inline T swap_byte(T x) { + if (sizeof(T) == 8) + return swap_byte64((uint64_t)x); + else if (sizeof(T) == 4) + return swap_byte32((uint32_t)x); + else if (sizeof(T) == 2) + return swap_byte16((uint16_t)x); + else if (sizeof(T) == 1) + return x; + else + panic("Can't byte-swap values larger than 64 bits"); +} //The conversion functions with fixed endianness on both ends don't need to //be in a namespace diff --git a/src/sim/debug.cc b/src/sim/debug.cc index b82219f7d..be9566836 100644 --- a/src/sim/debug.cc +++ b/src/sim/debug.cc @@ -127,12 +127,12 @@ DebugContext::checkParams() // handy function to schedule DebugBreakEvent on main event queue // (callable from debugger) // -extern "C" void sched_break_cycle(Tick when) +void sched_break_cycle(Tick when) { new DebugBreakEvent(&mainEventQueue, when); } -extern "C" void eventq_dump() +void eventq_dump() { mainEventQueue.dump(); } diff --git a/src/sim/faults.hh b/src/sim/faults.hh index 23385c649..00264d8fc 100644 --- a/src/sim/faults.hh +++ b/src/sim/faults.hh @@ -54,11 +54,7 @@ class FaultBase : public RefCounted { public: virtual FaultName name() = 0; -#if FULL_SYSTEM virtual void invoke(ThreadContext * tc); -#else - virtual void invoke(ThreadContext * tc); -#endif // template<typename T> // bool isA() {return dynamic_cast<T *>(this);} virtual bool isMachineCheckFault() {return false;} diff --git a/src/sim/main.cc b/src/sim/main.cc index 741926056..d0725ab37 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -41,7 +41,7 @@ #include <libgen.h> #include <stdlib.h> #include <signal.h> -#include <unistd.h> +#include <getopt.h> #include <list> #include <string> @@ -57,10 +57,12 @@ #include "base/time.hh" #include "cpu/base.hh" #include "cpu/smt.hh" +#include "mem/mem_object.hh" +#include "mem/port.hh" #include "sim/async.hh" #include "sim/builder.hh" -#include "sim/configfile.hh" #include "sim/host.hh" +#include "sim/serialize.hh" #include "sim/sim_events.hh" #include "sim/sim_exit.hh" #include "sim/sim_object.hh" @@ -113,40 +115,11 @@ abortHandler(int sigtype) #endif } - -const char *briefCopyright = -"Copyright (c) 2001-2006\n" -"The Regents of The University of Michigan\n" -"All Rights Reserved\n"; - -/// Print welcome message. -void -sayHello(ostream &out) -{ - extern const char *compileDate; // from date.cc - - ccprintf(out, "M5 Simulator System\n"); - // display copyright - ccprintf(out, "%s\n", briefCopyright); - ccprintf(out, "M5 compiled %d\n", compileDate); - ccprintf(out, "M5 started %s\n", Time::start); - - char *host = getenv("HOSTNAME"); - if (!host) - host = getenv("HOST"); - - if (host) - ccprintf(out, "M5 executing on %s\n", host); -} - - -extern "C" { void init_main(); } +extern "C" { void init_cc_main(); } int main(int argc, char **argv) { - sayHello(cerr); - signal(SIGFPE, SIG_IGN); // may occur on misspeculated paths signal(SIGTRAP, SIG_IGN); signal(SIGUSR1, dumpStatsHandler); // dump intermediate stats @@ -157,119 +130,145 @@ main(int argc, char **argv) Py_SetProgramName(argv[0]); // default path to m5 python code is the currently executing - // file... Python ZipImporter will find embedded zip archive - char *pythonpath = argv[0]; - - bool interactive = false; - bool getopt_done = false; - do { - switch (getopt(argc, argv, "+p:i")) { - // -p <path> prepends <path> to PYTHONPATH instead of - // using built-in zip archive. Useful when - // developing/debugging changes to built-in Python - // libraries, as the new Python can be tested without - // building a new m5 binary. - case 'p': - pythonpath = optarg; - break; - - // -i forces entry into interactive mode after the - // supplied script is executed (just like the -i option to - // the Python interpreter). - case 'i': - interactive = true; - break; - - case -1: - getopt_done = true; - break; - - default: - fatal("Unrecognized option %c\n", optopt); - } - } while (!getopt_done); - - // Fix up argc & argv to hide arguments we just processed. - // getopt() sets optind to the index of the first non-processed - // argv element. - argc -= optind; - argv += optind; - - // Set up PYTHONPATH to make sure the m5 module is found - string newpath(pythonpath); + // file... Python ZipImporter will find embedded zip archive. + // The M5_ARCHIVE environment variable can be used to override this. + char *m5_archive = getenv("M5_ARCHIVE"); + string pythonpath = m5_archive ? m5_archive : argv[0]; char *oldpath = getenv("PYTHONPATH"); if (oldpath != NULL) { - newpath += ":"; - newpath += oldpath; + pythonpath += ":"; + pythonpath += oldpath; } - if (setenv("PYTHONPATH", newpath.c_str(), true) == -1) + if (setenv("PYTHONPATH", pythonpath.c_str(), true) == -1) fatal("setenv: %s\n", strerror(errno)); // initialize embedded Python interpreter Py_Initialize(); PySys_SetArgv(argc, argv); - // initialize SWIG 'main' module - init_main(); + // initialize SWIG 'cc_main' module + init_cc_main(); - if (argc > 0) { - // extra arg(s): first is script file, remaining ones are args - // to script file - char *filename = argv[0]; - FILE *fp = fopen(filename, "r"); - if (!fp) { - fatal("cannot open file '%s'\n", filename); - } + PyRun_SimpleString("import m5"); + PyRun_SimpleString("m5.main()"); - PyRun_AnyFile(fp, filename); - } else { - // no script file argument... force interactive prompt - interactive = true; - } + // clean up Python intepreter. + Py_Finalize(); +} + + +void +setOutputDir(const string &dir) +{ + simout.setDirectory(dir); +} + + +IniFile inifile; + +SimObject * +createSimObject(const string &name) +{ + return SimObjectClass::createObject(inifile, name); +} - if (interactive) { - // The following code to import readline was copied from Python - // 2.4.3's Modules/main.c. - // Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 - // Python Software Foundation; All Rights Reserved - // We should only enable this if we're actually using an - // interactive prompt. - PyObject *v; - v = PyImport_ImportModule("readline"); - if (v == NULL) - PyErr_Clear(); - else - Py_DECREF(v); - - PyRun_InteractiveLoop(stdin, "stdin"); + +/** + * Pointer to the Python function that maps names to SimObjects. + */ +PyObject *resolveFunc = NULL; + +/** + * Convert a pointer to the Python object that SWIG wraps around a C++ + * SimObject pointer back to the actual C++ pointer. See main.i. + */ +extern "C" SimObject *convertSwigSimObjectPtr(PyObject *); + + +SimObject * +resolveSimObject(const string &name) +{ + PyObject *pyPtr = PyEval_CallFunction(resolveFunc, "(s)", name.c_str()); + if (pyPtr == NULL) { + PyErr_Print(); + panic("resolveSimObject: failure on call to Python for %s", name); } - // clean up Python intepreter. - Py_Finalize(); + SimObject *simObj = convertSwigSimObjectPtr(pyPtr); + if (simObj == NULL) + panic("resolveSimObject: failure on pointer conversion for %s", name); + + return simObj; } -/// Initialize C++ configuration. Exported to Python via SWIG; invoked -/// from m5.instantiate(). +/** + * Load config.ini into C++ database. Exported to Python via SWIG; + * invoked from m5.instantiate(). + */ void -initialize() +loadIniFile(PyObject *_resolveFunc) { + resolveFunc = _resolveFunc; configStream = simout.find("config.out"); // The configuration database is now complete; start processing it. - IniFile inifile; inifile.load("config.ini"); // Initialize statistics database Stats::InitSimStats(); +} - // Now process the configuration hierarchy and create the SimObjects. - ConfigHierarchy configHierarchy(inifile); - configHierarchy.build(); - configHierarchy.createSimObjects(); +/** + * Look up a MemObject port. Helper function for connectPorts(). + */ +Port * +lookupPort(SimObject *so, const std::string &name, int i) +{ + MemObject *mo = dynamic_cast<MemObject *>(so); + if (mo == NULL) { + warn("error casting SimObject %s to MemObject", so->name()); + return NULL; + } + + Port *p = mo->getPort(name, i); + if (p == NULL) + warn("error looking up port %s on object %s", name, so->name()); + return p; +} + + +/** + * Connect the described MemObject ports. Called from Python via SWIG. + */ +int +connectPorts(SimObject *o1, const std::string &name1, int i1, + SimObject *o2, const std::string &name2, int i2) +{ + Port *p1 = lookupPort(o1, name1, i1); + Port *p2 = lookupPort(o2, name2, i2); + + if (p1 == NULL || p2 == NULL) { + warn("connectPorts: port lookup error"); + return 0; + } + + p1->setPeer(p2); + p2->setPeer(p1); + + return 1; +} + +/** + * Do final initialization steps after object construction but before + * start of simulation. + */ +void +finalInit() +{ // Parse and check all non-config-hierarchy parameters. ParamContext::parseAllContexts(inifile); ParamContext::checkAllContexts(); @@ -277,28 +276,16 @@ initialize() // Echo all parameter settings to stats file as well. ParamContext::showAllContexts(*configStream); - // Any objects that can't connect themselves until after construction should - // do so now - SimObject::connectAll(); - // Do a second pass to finish initializing the sim objects SimObject::initAll(); // Restore checkpointed state, if any. +#if 0 configHierarchy.unserializeSimObjects(); - - // Done processing the configuration database. - // Check for unreferenced entries. - if (inifile.printUnreferenced()) - panic("unreferenced sections/entries in the intermediate ini file"); +#endif SimObject::regAllStats(); - // uncomment the following to get PC-based execution-time profile -#ifdef DO_PROFILE - init_profile((char *)&_init, (char *)&_fini); -#endif - // Check to make sure that the stats package is properly initialized Stats::check(); @@ -393,6 +380,37 @@ simulate(Tick num_cycles = -1) // not reached... only exit is return on SimLoopExitEvent } +Event * +createCountedDrain() +{ + return new CountedDrainEvent(); +} + +void +cleanupCountedDrain(Event *counted_drain) +{ + CountedDrainEvent *event = + dynamic_cast<CountedDrainEvent *>(counted_drain); + if (event == NULL) { + fatal("Called cleanupCountedDrain() on an event that was not " + "a CountedDrainEvent."); + } + assert(event->getCount() == 0); + delete event; +} + +void +serializeAll(const std::string &cpt_dir) +{ + Serializable::serializeAll(cpt_dir); +} + +void +unserializeAll(const std::string &cpt_dir) +{ + Serializable::unserializeAll(cpt_dir); +} + /** * Queue of C++ callbacks to invoke on simulator exit. */ @@ -407,6 +425,16 @@ registerExitCallback(Callback *callback) exitCallbacks.add(callback); } +BaseCPU * +convertToBaseCPUPtr(SimObject *obj) +{ + BaseCPU *ptr = dynamic_cast<BaseCPU *>(obj); + + if (ptr == NULL) + warn("Casting to BaseCPU pointer failed"); + return ptr; +} + /** * Do C++ simulator exit processing. Exported to SWIG to be invoked * when simulator terminates via Python's atexit mechanism. diff --git a/src/sim/param.cc b/src/sim/param.cc index 7f648b8e1..b1c50946b 100644 --- a/src/sim/param.cc +++ b/src/sim/param.cc @@ -39,8 +39,6 @@ #include "base/range.hh" #include "base/str.hh" #include "base/trace.hh" -#include "sim/config_node.hh" -#include "sim/configfile.hh" #include "sim/param.hh" #include "sim/sim_object.hh" @@ -521,7 +519,9 @@ parseSimObjectParam(ParamContext *context, const string &s, SimObject *&value) obj = NULL; } else { - obj = context->resolveSimObject(s); + // defined in main.cc + extern SimObject *resolveSimObject(const string &); + obj = resolveSimObject(s); if (obj == NULL) return false; @@ -696,22 +696,6 @@ ParamContext::printErrorProlog(ostream &os) } // -// Resolve an object name to a SimObject pointer. The object will be -// created as a side-effect if necessary. If the name contains a -// colon (e.g., "iq:IQ"), then the object is local (invisible to -// outside this context). If there is no colon, the name needs to be -// resolved through the configuration hierarchy (only possible for -// SimObjectBuilder objects, which return non-NULL for configNode()). -// -SimObject * -ParamContext::resolveSimObject(const string &name) -{ - ConfigNode *n = getConfigNode(); - return n ? n->resolveSimObject(name) : NULL; -} - - -// // static method: call parseParams() on all registered contexts // void diff --git a/src/sim/param.hh b/src/sim/param.hh index 49db17df9..1bc55c125 100644 --- a/src/sim/param.hh +++ b/src/sim/param.hh @@ -36,10 +36,10 @@ #include <string> #include <vector> -#include "sim/configfile.hh" #include "sim/startup.hh" // forward decls +class IniFile; class BaseParam; class SimObject; @@ -132,18 +132,10 @@ class ParamContext : protected StartupCallback // print context information for parameter error virtual void printErrorProlog(std::ostream &); - // resolve a SimObject name in this context to an object pointer. - virtual SimObject *resolveSimObject(const std::string &name); - // generate the name for this instance of this context (used as a // prefix to create unique names in resolveSimObject() virtual const std::string &getInstanceName() { return iniSection; } - // return the configuration hierarchy node for this context. Bare - // ParamContext objects have no corresponding node, so the default - // implementation returns NULL. - virtual ConfigNode *getConfigNode() { return NULL; } - // Parse all parameters registered with all ParamContext objects. static void parseAllContexts(IniFile &iniFile); diff --git a/src/sim/process.cc b/src/sim/process.cc index 5080c3ac1..f989300a3 100644 --- a/src/sim/process.cc +++ b/src/sim/process.cc @@ -326,11 +326,10 @@ LiveProcess::argsInit(int intSize, int pageSize) // set bottom of stack stack_min = stack_base - space_needed; // align it - stack_min &= ~(intSize-1); + stack_min = roundDown(stack_min, pageSize); stack_size = stack_base - stack_min; // map memory - pTable->allocate(roundDown(stack_min, pageSize), - roundUp(stack_size, pageSize)); + pTable->allocate(stack_min, roundUp(stack_size, pageSize)); // map out initial stack contents Addr argv_array_base = stack_min + intSize; // room for argc @@ -359,7 +358,10 @@ LiveProcess::argsInit(int intSize, int pageSize) Addr prog_entry = objFile->entryPoint(); threadContexts[0]->setPC(prog_entry); threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst)); + +#if THE_ISA != ALPHA_ISA //e.g. MIPS or Sparc threadContexts[0]->setNextNPC(prog_entry + (2 * sizeof(MachInst))); +#endif num_processes++; } diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index ae52cdd41..fcf0b957a 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -37,7 +37,6 @@ #include "sim/pseudo_inst.hh" #include "arch/vtophys.hh" #include "cpu/base.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/thread_context.hh" #include "cpu/quiesce_event.hh" #include "kern/kernel_stats.hh" @@ -52,8 +51,6 @@ using namespace std; -extern Sampler *SampCPU; - using namespace Stats; using namespace TheISA; @@ -209,12 +206,7 @@ namespace AlphaPseudo { if (!doCheckpointInsts) return; - - - Tick when = curTick + delay * Clock::Int::ns; - Tick repeat = period * Clock::Int::ns; - - Checkpoint::setup(when, repeat); + exitSimLoop("checkpoint"); } uint64_t @@ -286,7 +278,6 @@ namespace AlphaPseudo void switchcpu(ThreadContext *tc) { - if (SampCPU) - SampCPU->switchCPUs(); + exitSimLoop("switchcpu"); } } diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc index 5270802d1..6a1d084b7 100644 --- a/src/sim/serialize.cc +++ b/src/sim/serialize.cc @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Erik Hallnor + * Authors: Nathan Binkert + * Erik Hallnor * Steve Reinhardt */ @@ -44,7 +45,6 @@ #include "base/output.hh" #include "base/str.hh" #include "base/trace.hh" -#include "sim/config_node.hh" #include "sim/eventq.hh" #include "sim/param.hh" #include "sim/serialize.hh" @@ -231,8 +231,9 @@ Globals::unserialize(Checkpoint *cp) } void -Serializable::serializeAll() +Serializable::serializeAll(const std::string &cpt_dir) { + setCheckpointDir(cpt_dir); string dir = Checkpoint::dir(); if (mkdir(dir.c_str(), 0775) == -1 && errno != EEXIST) fatal("couldn't mkdir %s\n", dir); @@ -244,56 +245,42 @@ Serializable::serializeAll() globals.serialize(outstream); SimObject::serializeAll(outstream); - - assert(Serializable::ckptPrevCount + 1 == Serializable::ckptCount); - Serializable::ckptPrevCount++; - if (ckptMaxCount && ++ckptCount >= ckptMaxCount) - exitSimLoop(curTick + 1, "Maximum number of checkpoints dropped"); - } - void -Serializable::unserializeGlobals(Checkpoint *cp) +Serializable::unserializeAll(const std::string &cpt_dir) { - globals.unserialize(cp); -} - - -class SerializeEvent : public Event -{ - protected: - Tick repeat; + setCheckpointDir(cpt_dir); + string dir = Checkpoint::dir(); + string cpt_file = dir + Checkpoint::baseFilename; + string section = ""; - public: - SerializeEvent(Tick _when, Tick _repeat); - virtual void process(); - virtual void serialize(std::ostream &os) - { - panic("Cannot serialize the SerializeEvent"); - } + DPRINTFR(Config, "Loading checkpoint dir '%s'\n", + dir); + Checkpoint *cp = new Checkpoint(dir, section); + unserializeGlobals(cp); -}; - -SerializeEvent::SerializeEvent(Tick _when, Tick _repeat) - : Event(&mainEventQueue, Serialize_Pri), repeat(_repeat) -{ - setFlags(AutoDelete); - schedule(_when); + SimObject::unserializeAll(cp); } void -SerializeEvent::process() +Serializable::unserializeGlobals(Checkpoint *cp) { - Serializable::serializeAll(); - if (repeat) - schedule(curTick + repeat); + globals.unserialize(cp); } const char *Checkpoint::baseFilename = "m5.cpt"; static string checkpointDirBase; +void +setCheckpointDir(const std::string &name) +{ + checkpointDirBase = name; + if (checkpointDirBase[checkpointDirBase.size() - 1] != '/') + checkpointDirBase += "/"; +} + string Checkpoint::dir() { @@ -304,75 +291,11 @@ Checkpoint::dir() } void -Checkpoint::setup(Tick when, Tick period) +debug_serialize(const std::string &cpt_dir) { - new SerializeEvent(when, period); + Serializable::serializeAll(cpt_dir); } -class SerializeParamContext : public ParamContext -{ - private: - SerializeEvent *event; - - public: - SerializeParamContext(const string §ion); - ~SerializeParamContext(); - void checkParams(); -}; - -SerializeParamContext serialParams("serialize"); - -Param<string> serialize_dir(&serialParams, "dir", - "dir to stick checkpoint in " - "(sprintf format with cycle #)"); - -Param<Counter> serialize_cycle(&serialParams, - "cycle", - "cycle to serialize", - 0); - -Param<Counter> serialize_period(&serialParams, - "period", - "period to repeat serializations", - 0); - -Param<int> serialize_count(&serialParams, "count", - "maximum number of checkpoints to drop"); - -SerializeParamContext::SerializeParamContext(const string §ion) - : ParamContext(section), event(NULL) -{ } - -SerializeParamContext::~SerializeParamContext() -{ -} - -void -SerializeParamContext::checkParams() -{ - checkpointDirBase = simout.resolve(serialize_dir); - - // guarantee that directory ends with a '/' - if (checkpointDirBase[checkpointDirBase.size() - 1] != '/') - checkpointDirBase += "/"; - - if (serialize_cycle > 0) - Checkpoint::setup(serialize_cycle, serialize_period); - - Serializable::ckptMaxCount = serialize_count; -} - -void -debug_serialize() -{ - Serializable::serializeAll(); -} - -void -debug_serialize(Tick when) -{ - new SerializeEvent(when, 0); -} //////////////////////////////////////////////////////////////////////// // @@ -442,9 +365,8 @@ Serializable::create(Checkpoint *cp, const std::string §ion) } -Checkpoint::Checkpoint(const std::string &cpt_dir, const std::string &path, - const ConfigNode *_configNode) - : db(new IniFile), basePath(path), configNode(_configNode), cptDir(cpt_dir) +Checkpoint::Checkpoint(const std::string &cpt_dir, const std::string &path) + : db(new IniFile), basePath(path), cptDir(cpt_dir) { string filename = cpt_dir + "/" + Checkpoint::baseFilename; if (!db->load(filename)) { @@ -470,9 +392,6 @@ Checkpoint::findObj(const std::string §ion, const std::string &entry, if (!db->find(section, entry, path)) return false; - if ((value = configNode->resolveSimObject(path)) != NULL) - return true; - if ((value = objMap[path]) != NULL) return true; diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index 1eb721cf4..880fb0785 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Erik Hallnor + * Authors: Nathan Binkert + * Erik Hallnor * Steve Reinhardt */ @@ -42,8 +43,8 @@ #include <map> #include "sim/host.hh" -#include "sim/configfile.hh" +class IniFile; class Serializable; class Checkpoint; @@ -125,7 +126,8 @@ class Serializable static int ckptCount; static int ckptMaxCount; static int ckptPrevCount; - static void serializeAll(); + static void serializeAll(const std::string &cpt_dir); + static void unserializeAll(const std::string &cpt_dir); static void unserializeGlobals(Checkpoint *cp); }; @@ -177,7 +179,7 @@ class SerializableClass // an optional config hierarchy node (specified by the third // argument). A pointer to the new SerializableBuilder is returned. typedef Serializable *(*CreateFunc)(Checkpoint *cp, - const std::string §ion); + const std::string §ion); static std::map<std::string,CreateFunc> *classMap; @@ -191,7 +193,7 @@ class SerializableClass // create Serializable given name of class and pointer to // configuration hierarchy node static Serializable *createObject(Checkpoint *cp, - const std::string §ion); + const std::string §ion); }; // @@ -203,18 +205,19 @@ class SerializableClass SerializableClass the##OBJ_CLASS##Class(CLASS_NAME, \ OBJ_CLASS::createForUnserialize); +void +setCheckpointDir(const std::string &name); + class Checkpoint { private: IniFile *db; const std::string basePath; - const ConfigNode *configNode; std::map<std::string, Serializable*> objMap; public: - Checkpoint(const std::string &cpt_dir, const std::string &path, - const ConfigNode *_configNode); + Checkpoint(const std::string &cpt_dir, const std::string &path); const std::string cptDir; @@ -238,9 +241,6 @@ class Checkpoint // Filename for base checkpoint file within directory. static const char *baseFilename; - - // Set up a checkpoint creation event or series of events. - static void setup(Tick when, Tick period = 0); }; #endif // __SERIALIZE_HH__ diff --git a/src/sim/sim_events.cc b/src/sim/sim_events.cc index b7901832d..d9e8bdeaa 100644 --- a/src/sim/sim_events.cc +++ b/src/sim/sim_events.cc @@ -78,6 +78,14 @@ exitSimLoop(const std::string &message, int exit_code) exitSimLoop(curTick, message, exit_code); } +void +CountedDrainEvent::process() +{ + if (--count == 0) { + exitSimLoop("Finished drain"); + } +} + // // constructor: automatically schedules at specified time // diff --git a/src/sim/sim_events.hh b/src/sim/sim_events.hh index 4f305ad38..3c4a9dd05 100644 --- a/src/sim/sim_events.hh +++ b/src/sim/sim_events.hh @@ -44,6 +44,11 @@ class SimLoopExitEvent : public Event int code; public: + // Default constructor. Only really used for derived classes. + SimLoopExitEvent() + : Event(&mainEventQueue, Sim_Exit_Pri) + { } + SimLoopExitEvent(Tick _when, const std::string &_cause, int c = 0) : Event(&mainEventQueue, Sim_Exit_Pri), cause(_cause), code(c) @@ -62,6 +67,22 @@ class SimLoopExitEvent : public Event virtual const char *description(); }; +class CountedDrainEvent : public SimLoopExitEvent +{ + private: + // Count of how many objects have not yet drained + int count; + public: + CountedDrainEvent() + : count(0) + { } + void process(); + + void setCount(int _count) { count = _count; } + + int getCount() { return count; } +}; + // // Event class to terminate simulation after 'n' related events have // occurred using a shared counter: used to terminate when *all* diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc index 117ca9325..d12b06b7a 100644 --- a/src/sim/sim_object.cc +++ b/src/sim/sim_object.cc @@ -37,8 +37,6 @@ #include "base/misc.hh" #include "base/trace.hh" #include "base/stats/events.hh" -#include "base/serializer.hh" -#include "sim/configfile.hh" #include "sim/host.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" @@ -74,6 +72,7 @@ SimObject::SimObject(Params *p) doRecordEvent = !Stats::event_ignore.match(name()); simObjectList.push_back(this); + state = Running; } // @@ -89,6 +88,7 @@ SimObject::SimObject(const string &_name) doRecordEvent = !Stats::event_ignore.match(name()); simObjectList.push_back(this); + state = Running; } void @@ -220,6 +220,24 @@ SimObject::serializeAll(ostream &os) } } +void +SimObject::unserializeAll(Checkpoint *cp) +{ + SimObjectList::reverse_iterator ri = simObjectList.rbegin(); + SimObjectList::reverse_iterator rend = simObjectList.rend(); + + for (; ri != rend; ++ri) { + SimObject *obj = *ri; + DPRINTFR(Config, "Unserializing '%s'\n", + obj->name()); + if(cp->sectionExists(obj->name())) + obj->unserialize(cp, obj->name()); + else + warn("Not unserializing '%s': no section found in checkpoint.\n", + obj->name()); + } +} + #ifdef DEBUG // // static function: flag which objects should have the debugger break @@ -237,7 +255,6 @@ SimObject::debugObjectBreak(const string &objs) } } -extern "C" void debugObjectBreak(const char *objs) { @@ -252,10 +269,35 @@ SimObject::recordEvent(const std::string &stat) Stats::recordEvent(stat); } +unsigned int +SimObject::drain(Event *drain_event) +{ + state = Drained; + return 0; +} + +void +SimObject::resume() +{ + state = Running; +} + +void +SimObject::setMemoryMode(State new_mode) +{ + panic("setMemoryMode() should only be called on systems"); +} + +void +SimObject::switchOut() +{ + panic("Unimplemented!"); +} + void -SimObject::drain(Serializer *serializer) +SimObject::takeOverFrom(BaseCPU *cpu) { - serializer->signalDrained(); + panic("Unimplemented!"); } DEFINE_SIM_OBJECT_CLASS_NAME("SimObject", SimObject) diff --git a/src/sim/sim_object.hh b/src/sim/sim_object.hh index 84e9376a0..38f2bdd23 100644 --- a/src/sim/sim_object.hh +++ b/src/sim/sim_object.hh @@ -44,7 +44,8 @@ #include "sim/serialize.hh" #include "sim/startup.hh" -class Serializer; +class BaseCPU; +class Event; /* * Abstract superclass for simulation objects. Represents things that @@ -58,15 +59,25 @@ class SimObject : public Serializable, protected StartupCallback std::string name; }; + enum State { + Running, + Draining, + Drained + }; + private: + State state; + protected: Params *_params; + void changeState(State new_state) { state = new_state; } + public: const Params *params() const { return _params; } - private: - friend class Serializer; + State getState() { return state; } + private: typedef std::vector<SimObject *> SimObjectList; // list of all instantiated simulation objects @@ -100,13 +111,18 @@ class SimObject : public Serializable, protected StartupCallback // static: call nameOut() & serialize() on all SimObjects static void serializeAll(std::ostream &); + static void unserializeAll(Checkpoint *cp); // Methods to drain objects in order to take checkpoints // Or switch from timing -> atomic memory model - virtual void drain(Serializer *serializer); - virtual void resume() { return;} ; - virtual void serializationComplete() - { assert(0 && "Unimplemented"); }; + // Drain returns 0 if the simobject can drain immediately or + // the number of times the drain_event's process function will be called + // before the object will be done draining. Normally this should be 1 + virtual unsigned int drain(Event *drain_event); + virtual void resume(); + virtual void setMemoryMode(State new_mode); + virtual void switchOut(); + virtual void takeOverFrom(BaseCPU *cpu); #ifdef DEBUG public: diff --git a/src/sim/stat_control.cc b/src/sim/stat_control.cc index f7fc03d74..041830ab7 100644 --- a/src/sim/stat_control.cc +++ b/src/sim/stat_control.cc @@ -221,8 +221,7 @@ SetupEvent(int flags, Tick when, Tick repeat) /* namespace Stats */ } -extern "C" void -debugDumpStats() +void debugDumpStats() { Stats::DumpNow(); } diff --git a/src/sim/syscall_emul.cc b/src/sim/syscall_emul.cc index 848b6f869..e72890612 100644 --- a/src/sim/syscall_emul.cc +++ b/src/sim/syscall_emul.cc @@ -27,7 +27,6 @@ * * Authors: Steve Reinhardt * Ali Saidi - * Korey Sewell */ #include <fcntl.h> @@ -92,7 +91,9 @@ SyscallReturn exitFunc(SyscallDesc *desc, int callnum, Process *process, ThreadContext *tc) { - exitSimLoop("target called exit()", tc->getSyscallArg(0) & 0xff); + if (tc->exit()) { + exitSimLoop("target called exit()", tc->getSyscallArg(0) & 0xff); + } return 1; } diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index f027dbf24..a3ff006ef 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -27,14 +27,13 @@ * * Authors: Steve Reinhardt * Kevin Lim - * Korey Sewell */ #ifndef __SIM_SYSCALL_EMUL_HH__ #define __SIM_SYSCALL_EMUL_HH__ -#define BSD_HOST (defined(__APPLE__) || defined(__OpenBSD__) || \ - defined(__FreeBSD__)) +#define NO_STAT64 (defined(__APPLE__) || defined(__OpenBSD__) || \ + defined(__FreeBSD__) || defined(__CYGWIN__)) /// /// @file syscall_emul.hh @@ -507,7 +506,7 @@ fstat64Func(SyscallDesc *desc, int callnum, Process *process, return -EBADF; } -#if BSD_HOST +#if NO_STAT64 struct stat hostBuf; int result = fstat(process->sim_fd(fd), &hostBuf); #else @@ -557,7 +556,7 @@ lstat64Func(SyscallDesc *desc, int callnum, Process *process, if (!tc->getMemPort()->tryReadString(path, tc->getSyscallArg(0))) return -EFAULT; -#if BSD_HOST +#if NO_STAT64 struct stat hostBuf; int result = lstat(path.c_str(), &hostBuf); #else diff --git a/src/sim/system.cc b/src/sim/system.cc index b3c7870fd..ad70b9b03 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -63,7 +63,7 @@ System::System(Params *p) #else page_ptr(0), #endif - _params(p) + memoryMode(p->mem_mode), _params(p) { // add self to global system list systemList.push_back(this); @@ -119,8 +119,6 @@ System::System(Params *p) DPRINTF(Loader, "Kernel end = %#x\n", kernelEnd); DPRINTF(Loader, "Kernel entry = %#x\n", kernelEntry); DPRINTF(Loader, "Kernel loaded...\n"); - - kernelBinning = new Kernel::Binning(this); #endif // FULL_SYSTEM // increment the number of running systms @@ -145,6 +143,14 @@ int rgdb_wait = -1; #endif // FULL_SYSTEM + +void +System::setMemoryMode(MemoryMode mode) +{ + assert(getState() == Drained); + memoryMode = mode; +} + int System::registerThreadContext(ThreadContext *tc, int id) { @@ -245,13 +251,15 @@ System::printSystems() } } -extern "C" void printSystems() { System::printSystems(); } +const char *System::MemoryModeStrings[3] = {"invalid", "atomic", + "timing"}; + #if FULL_SYSTEM // In full system mode, only derived classes (e.g. AlphaLinuxSystem) @@ -264,12 +272,15 @@ DEFINE_SIM_OBJECT_CLASS_NAME("System", System) BEGIN_DECLARE_SIM_OBJECT_PARAMS(System) SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; END_DECLARE_SIM_OBJECT_PARAMS(System) BEGIN_INIT_SIM_OBJECT_PARAMS(System) - INIT_PARAM(physmem, "physical memory") + INIT_PARAM(physmem, "physical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings) END_INIT_SIM_OBJECT_PARAMS(System) @@ -278,6 +289,7 @@ CREATE_SIM_OBJECT(System) System::Params *p = new System::Params; p->name = getInstanceName(); p->physmem = physmem; + p->mem_mode = mem_mode; return new System(p); } diff --git a/src/sim/system.hh b/src/sim/system.hh index 059dc92dc..a1b53c2eb 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -61,6 +61,23 @@ class RemoteGDB; class System : public SimObject { public: + enum MemoryMode { + Invalid=0, + Atomic, + Timing + }; + + static const char *MemoryModeStrings[3]; + + + MemoryMode getMemoryMode() { assert(memoryMode); return memoryMode; } + + /** Change the memory mode of the system. This should only be called by the + * python!! + * @param mode Mode to change to (atomic/timing) + */ + void setMemoryMode(MemoryMode mode); + PhysicalMemory *physmem; PCEventQueue pcEventQueue; @@ -108,6 +125,8 @@ class System : public SimObject protected: + MemoryMode memoryMode; + #if FULL_SYSTEM /** * Fix up an address used to match PCs for hooking simulator @@ -153,6 +172,7 @@ class System : public SimObject { std::string name; PhysicalMemory *physmem; + MemoryMode mem_mode; #if FULL_SYSTEM Tick boot_cpu_frequency; diff --git a/util/rundiff b/util/rundiff index 533f448b1..c34bb53a3 100755 --- a/util/rundiff +++ b/util/rundiff @@ -39,7 +39,7 @@ # "filename" is a pipe (|). Thus to compare the instruction traces # from two versions of m5 (m5a and m5b), you can do this: # -# rundiff 'm5a --trace:flags=InstExec |' 'm5b --trace:flags=InstExec |' +# rundiff 'm5a --traceflags=InstExec |' 'm5b --traceflags=InstExec |' # use strict; diff --git a/util/tracediff b/util/tracediff index f11431293..a7efc260d 100755 --- a/util/tracediff +++ b/util/tracediff @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright (c) 2003-2005 The Regents of The University of Michigan +# Copyright (c) 2003-2006 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -36,7 +36,7 @@ # If you want to pass different arguments to the two instances of m5, # you can embed them in the simulator arguments like this: # -# % tracediff "m5.opt --foo.bar=1" "m5.opt --foo.bar=2" [common args] +# % tracediff "m5.opt --option1" "m5.opt --option2" [common args] # if (@ARGV < 2) { |