diff options
author | Ali Saidi <saidi@eecs.umich.edu> | 2006-07-19 17:59:04 -0400 |
---|---|---|
committer | Ali Saidi <saidi@eecs.umich.edu> | 2006-07-19 17:59:04 -0400 |
commit | 15e5ce96c76a20b9b27e5f7d7ed29e962422ad1a (patch) | |
tree | 610a888b2de5d25c166f65819d8ad3058d13723c | |
parent | b36796914af8bfc6729cc8a519e57572460e43e8 (diff) | |
parent | 6175f712b3216f3e5387b07d9c41c1931c09acd9 (diff) | |
download | gem5-15e5ce96c76a20b9b27e5f7d7ed29e962422ad1a.tar.xz |
Merge zizzer:/bk/newmem
into zeep.pool:/z/saidi/work/m5.newmem.head
--HG--
extra : convert_revision : 8c747208d72ffbb0160a2ad4a75383420debdf83
148 files changed, 6395 insertions, 1900 deletions
diff --git a/SConstruct b/SConstruct index b18fe66d3..259b6c583 100644 --- a/SConstruct +++ b/SConstruct @@ -494,10 +494,10 @@ for build_path in build_paths: exports = 'env') # Set up the regression tests for each build. -# for e in envList: -# SConscript('m5-test/SConscript', -# build_dir = os.path.join(build_dir, 'test', e.Label), -# exports = { 'env' : e }, duplicate = False) + for e in envList: + SConscript('tests/SConscript', + build_dir = os.path.join(build_path, 'test', e.Label), + exports = { 'env' : e }, duplicate = False) Help(help_text) diff --git a/configs/test/SysPaths.py b/configs/test/SysPaths.py index e458d5225..3f96a546f 100644 --- a/configs/test/SysPaths.py +++ b/configs/test/SysPaths.py @@ -2,41 +2,39 @@ import os, sys from os.path import isdir, join as joinpath from os import environ as env -systemdir = None -bindir = None -diskdir = None -scriptdir = None +def disk(file): + system() + return joinpath(disk.dir, file) + +def binary(file): + system() + return joinpath(binary.dir, file) -def load_defaults(): - global systemdir, bindir, diskdir, scriptdir - if not systemdir: +def script(file): + system() + return joinpath(script.dir, file) + +def system(): + if not system.dir: try: path = env['M5_PATH'].split(':') except KeyError: path = [ '/dist/m5/system', '/n/poolfs/z/dist/m5/system' ] - for systemdir in path: - if os.path.isdir(systemdir): + for system.dir in path: + if os.path.isdir(system.dir): break else: raise ImportError, "Can't find a path to system files." - if not bindir: - bindir = joinpath(systemdir, 'binaries') - if not diskdir: - diskdir = joinpath(systemdir, 'disks') - if not scriptdir: - scriptdir = joinpath(systemdir, 'boot') - -def disk(file): - load_defaults() - return joinpath(diskdir, file) - -def binary(file): - load_defaults() - return joinpath(bindir, file) - -def script(file): - load_defaults() - return joinpath(scriptdir, file) - + if not binary.dir: + binary.dir = joinpath(system.dir, 'binaries') + if not disk.dir: + disk.dir = joinpath(system.dir, 'disks') + if not script.dir: + script.dir = joinpath(system.dir, 'boot') + +system.dir = None +binary.dir = None +disk.dir = None +script.dir = None diff --git a/configs/test/fs.py b/configs/test/fs.py index aa530dd55..f4c50fc23 100644 --- a/configs/test/fs.py +++ b/configs/test/fs.py @@ -1,14 +1,19 @@ +import optparse, os, sys + import m5 from m5.objects import * -import os from SysPaths import * +from FullO3Config import * -parser = optparse.OptionParser(option_list=m5.standardOptions) +parser = optparse.OptionParser() +parser.add_option("-d", "--detailed", action="store_true") parser.add_option("-t", "--timing", action="store_true") +parser.add_option("-m", "--maxtick", type="int") +parser.add_option("--dual", help="Run full system using dual systems", + action="store_true") (options, args) = parser.parse_args() -m5.setStandardOptions(options) if args: print "Error: script doesn't take any positional arguments" @@ -17,192 +22,69 @@ if args: # Base for tests is directory containing this file. test_base = os.path.dirname(__file__) +script.dir = '/z/saidi/work/m5.newmem/configs/boot' + linux_image = env.get('LINUX_IMAGE', disk('linux-latest.img')) -class IdeControllerPciData(PciConfigData): - VendorID = 0x8086 - DeviceID = 0x7111 - Command = 0x0 - Status = 0x280 - Revision = 0x0 - ClassCode = 0x01 - SubClassCode = 0x01 - ProgIF = 0x85 - BAR0 = 0x00000001 - BAR1 = 0x00000001 - BAR2 = 0x00000001 - BAR3 = 0x00000001 - BAR4 = 0x00000001 - BAR5 = 0x00000001 - InterruptLine = 0x1f - InterruptPin = 0x01 - BAR0Size = '8B' - BAR1Size = '4B' - BAR2Size = '8B' - BAR3Size = '4B' - BAR4Size = '16B' - -class SinicPciData(PciConfigData): - VendorID = 0x1291 - DeviceID = 0x1293 - Status = 0x0290 - SubClassCode = 0x00 - ClassCode = 0x02 - ProgIF = 0x00 - BAR0 = 0x00000000 - BAR1 = 0x00000000 - BAR2 = 0x00000000 - BAR3 = 0x00000000 - BAR4 = 0x00000000 - BAR5 = 0x00000000 - MaximumLatency = 0x34 - MinimumGrant = 0xb0 - InterruptLine = 0x1e - InterruptPin = 0x01 - BAR0Size = '64kB' - -class NSGigEPciData(PciConfigData): - VendorID = 0x100B - DeviceID = 0x0022 - Status = 0x0290 - SubClassCode = 0x00 - ClassCode = 0x02 - ProgIF = 0x00 - BAR0 = 0x00000001 - BAR1 = 0x00000000 - BAR2 = 0x00000000 - BAR3 = 0x00000000 - BAR4 = 0x00000000 - BAR5 = 0x00000000 - MaximumLatency = 0x34 - MinimumGrant = 0xb0 - InterruptLine = 0x1e - InterruptPin = 0x01 - BAR0Size = '256B' - BAR1Size = '4kB' - -class LinuxRootDisk(IdeDisk): - raw_image = RawDiskImage(image_file=linux_image, read_only=True) - image = CowDiskImage(child=Parent.raw_image, read_only=False) - -class LinuxSwapDisk(IdeDisk): - raw_image = RawDiskImage(image_file = disk('linux-bigswap2.img'), - read_only=True) - image = CowDiskImage(child = Parent.raw_image, read_only=False) - -class SpecwebFilesetDisk(IdeDisk): - raw_image = RawDiskImage(image_file = disk('specweb-fileset.img'), - read_only=True) - image = CowDiskImage(child = Parent.raw_image, read_only=False) +class CowIdeDisk(IdeDisk): + image = CowDiskImage(child=RawDiskImage(read_only=True), + read_only=False) + + def childImage(self, ci): + self.image.child.image_file = ci class BaseTsunami(Tsunami): - cchip = TsunamiCChip(pio_addr=0x801a0000000) - pchip = TsunamiPChip(pio_addr=0x80180000000) - pciconfig = PciConfigAll(pio_addr=0x801fe000000) - fake_sm_chip = IsaFake(pio_addr=0x801fc000370) - - fake_uart1 = IsaFake(pio_addr=0x801fc0002f8) - fake_uart2 = IsaFake(pio_addr=0x801fc0003e8) - fake_uart3 = IsaFake(pio_addr=0x801fc0002e8) - fake_uart4 = IsaFake(pio_addr=0x801fc0003f0) - - fake_ppc = IsaFake(pio_addr=0x801fc0003bc) - - fake_OROM = IsaFake(pio_addr=0x800000a0000, pio_size=0x60000) - - fake_pnp_addr = IsaFake(pio_addr=0x801fc000279) - fake_pnp_write = IsaFake(pio_addr=0x801fc000a79) - fake_pnp_read0 = IsaFake(pio_addr=0x801fc000203) - fake_pnp_read1 = IsaFake(pio_addr=0x801fc000243) - fake_pnp_read2 = IsaFake(pio_addr=0x801fc000283) - fake_pnp_read3 = IsaFake(pio_addr=0x801fc0002c3) - fake_pnp_read4 = IsaFake(pio_addr=0x801fc000303) - fake_pnp_read5 = IsaFake(pio_addr=0x801fc000343) - fake_pnp_read6 = IsaFake(pio_addr=0x801fc000383) - fake_pnp_read7 = IsaFake(pio_addr=0x801fc0003c3) - - fake_ata0 = IsaFake(pio_addr=0x801fc0001f0) - fake_ata1 = IsaFake(pio_addr=0x801fc000170) - - fb = BadDevice(pio_addr=0x801fc0003d0, devicename='FrameBuffer') - io = TsunamiIO(pio_addr=0x801fc000000) - uart = Uart8250(pio_addr=0x801fc0003f8) ethernet = NSGigE(configdata=NSGigEPciData(), pci_bus=0, pci_dev=1, pci_func=0) etherint = NSGigEInt(device=Parent.ethernet) - console = AlphaConsole(pio_addr=0x80200000000, disk=Parent.simple_disk) - -class LinuxTsunami(BaseTsunami): - disk0 = LinuxRootDisk(driveID='master') - disk1 = SpecwebFilesetDisk(driveID='slave') - disk2 = LinuxSwapDisk(driveID='master') - ide = IdeController(disks=[Parent.disk0, Parent.disk1, Parent.disk2], - configdata=IdeControllerPciData(), + ide = IdeController(disks=[Parent.disk0, Parent.disk2], pci_func=0, pci_dev=0, pci_bus=0) class MyLinuxAlphaSystem(LinuxAlphaSystem): - magicbus = Bus(bus_id=0) - magicbus2 = Bus(bus_id=1) + iobus = Bus(bus_id=0) + membus = Bus(bus_id=1) bridge = Bridge() physmem = PhysicalMemory(range = AddrRange('128MB')) - bridge.side_a = magicbus.port - bridge.side_b = magicbus2.port - physmem.port = magicbus2.port - tsunami = LinuxTsunami() - tsunami.cchip.pio = magicbus.port - tsunami.pchip.pio = magicbus.port - tsunami.pciconfig.pio = magicbus.port - tsunami.fake_sm_chip.pio = magicbus.port - tsunami.ethernet.pio = magicbus.port - tsunami.ethernet.dma = magicbus.port - tsunami.fake_uart1.pio = magicbus.port - tsunami.fake_uart2.pio = magicbus.port - tsunami.fake_uart3.pio = magicbus.port - tsunami.fake_uart4.pio = magicbus.port - tsunami.ide.pio = magicbus.port - tsunami.ide.dma = magicbus.port - tsunami.fake_ppc.pio = magicbus.port - tsunami.fake_OROM.pio = magicbus.port - tsunami.fake_pnp_addr.pio = magicbus.port - tsunami.fake_pnp_write.pio = magicbus.port - tsunami.fake_pnp_read0.pio = magicbus.port - tsunami.fake_pnp_read1.pio = magicbus.port - tsunami.fake_pnp_read2.pio = magicbus.port - tsunami.fake_pnp_read3.pio = magicbus.port - tsunami.fake_pnp_read4.pio = magicbus.port - tsunami.fake_pnp_read5.pio = magicbus.port - tsunami.fake_pnp_read6.pio = magicbus.port - tsunami.fake_pnp_read7.pio = magicbus.port - tsunami.fake_ata0.pio = magicbus.port - tsunami.fake_ata1.pio = magicbus.port - tsunami.fb.pio = magicbus.port - tsunami.io.pio = magicbus.port - tsunami.uart.pio = magicbus.port - tsunami.console.pio = magicbus.port - raw_image = RawDiskImage(image_file=disk('linux-latest.img'), - read_only=True) - simple_disk = SimpleDisk(disk=Parent.raw_image) + bridge.side_a = iobus.port + bridge.side_b = membus.port + physmem.port = membus.port + disk0 = CowIdeDisk(driveID='master') + disk2 = CowIdeDisk(driveID='master') + disk0.childImage(linux_image) + disk2.childImage(disk('linux-bigswap2.img')) + tsunami = BaseTsunami() + tsunami.attachIO(iobus) + tsunami.ide.pio = iobus.port + tsunami.ide.dma = iobus.port + tsunami.ide.config = iobus.port + tsunami.ethernet.pio = iobus.port + tsunami.ethernet.dma = iobus.port + tsunami.ethernet.config = iobus.port + simple_disk = SimpleDisk(disk=RawDiskImage(image_file = linux_image, + read_only = True)) intrctrl = IntrControl() - if options.timing: + if options.detailed: + cpu = DetailedO3CPU() + elif options.timing: cpu = TimingSimpleCPU() + mem_mode = 'timing' else: cpu = AtomicSimpleCPU() - cpu.mem = magicbus2 + cpu.mem = membus + cpu.icache_port = membus.port + cpu.dcache_port = membus.port cpu.itb = AlphaITB() cpu.dtb = AlphaDTB() + cpu.clock = '2GHz' sim_console = SimConsole(listener=ConsoleListener(port=3456)) kernel = binary('vmlinux') pal = binary('ts_osfpal') console = binary('console') boot_osflags = 'root=/dev/hda1 console=ttyS0' -# readfile = os.path.join(test_base, 'halt.sh') - - -class TsunamiRoot(System): +class TsunamiRoot(Root): pass - def DualRoot(clientSystem, serverSystem): self = Root() self.client = clientSystem @@ -212,15 +94,29 @@ def DualRoot(clientSystem, serverSystem): self.etherlink = EtherLink(int1 = Parent.client.tsunami.etherint[0], int2 = Parent.server.tsunami.etherint[0], dump = Parent.etherdump) - self.clock = '5GHz' + self.clock = '1THz' return self -root = DualRoot( - MyLinuxAlphaSystem(readfile=script('netperf-stream-nt-client.rcS')), - MyLinuxAlphaSystem(readfile=script('netperf-server.rcS'))) +if options.dual: + root = DualRoot( + MyLinuxAlphaSystem(readfile=script('netperf-stream-nt-client.rcS')), + MyLinuxAlphaSystem(readfile=script('netperf-server.rcS'))) +else: + root = TsunamiRoot(clock = '2GHz', system = MyLinuxAlphaSystem()) m5.instantiate(root) -exit_event = m5.simulate() +#exit_event = m5.simulate(2600000000000) +#if exit_event.getCause() != "user interrupt received": +# m5.checkpoint(root, 'cpt') +# exit_event = m5.simulate(300000000000) +# if exit_event.getCause() != "user interrupt received": +# m5.checkpoint(root, 'cptA') + + +if options.maxtick: + exit_event = m5.simulate(options.maxtick) +else: + exit_event = m5.simulate() print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() diff --git a/configs/test/test.py b/configs/test/test.py index 625304a08..3b637f70f 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -1,38 +1,14 @@ # Simple test script # # Alpha: "m5 test.py" -# MIPS: "m5 test.py -a Mips -c hello_mips" +# MIPS: "m5 test.py -c hello_mips" -import os, optparse, sys import m5 +import os, optparse, sys +m5.AddToPath('../common') +from SEConfig import * from m5.objects import * -from FullO3Config import * - -# parse command-line arguments -parser = optparse.OptionParser(option_list=m5.standardOptions) - -parser.add_option("-c", "--cmd", default="hello", - help="The binary to run in syscall emulation mode.") -parser.add_option("-o", "--options", default="", - help="The options to pass to the binary, use \" \" around the entire\ - string.") -parser.add_option("-i", "--input", default="", - help="A file of input to give to the binary.") -parser.add_option("-t", "--timing", action="store_true", - help="Use simple timing CPU.") -parser.add_option("-d", "--detailed", action="store_true", - help="Use detailed CPU.") -parser.add_option("-m", "--maxtick", type="int", - help="Set the maximum number of ticks to run for") - -(options, args) = parser.parse_args() -m5.setStandardOptions(options) -if args: - print "Error: script doesn't take any positional arguments" - sys.exit(1) - -# build configuration this_dir = os.path.dirname(__file__) process = LiveProcess() @@ -41,16 +17,7 @@ process.cmd = options.cmd + " " + options.options if options.input != "": process.input = options.input -magicbus = Bus() -mem = PhysicalMemory() - -if options.timing and options.detailed: - print "Error: you may only specify one cpu model"; - sys.exit(1) - -if options.timing: - cpu = TimingSimpleCPU() -elif options.detailed: +if options.detailed: #check for SMT workload workloads = options.cmd.split(';') if len(workloads) > 1: @@ -70,15 +37,10 @@ elif options.detailed: process += [smt_process, ] smt_idx += 1 - cpu = DetailedO3CPU() -else: - cpu = AtomicSimpleCPU() -cpu.workload = process -cpu.mem = magicbus +root = MySESystem(process) -system = System(physmem = mem, cpu = cpu) -mem.port = magicbus.port -root = Root(system = system) +if options.timing or options.detailed: + root.system.mem_mode = 'timing' # instantiate configuration m5.instantiate(root) diff --git a/src/SConscript b/src/SConscript index 0d0cb2486..10faf5aaf 100644 --- a/src/SConscript +++ b/src/SConscript @@ -89,7 +89,6 @@ base_sources = Split(''' cpu/pc_event.cc cpu/quiesce_event.cc cpu/static_inst.cc - cpu/sampler/sampler.cc cpu/simple_thread.cc cpu/thread_state.cc @@ -299,7 +298,7 @@ alpha_eio_sources = Split(''' encumbered/eio/eio.cc ''') -if env['TARGET_ISA'] == 'ALPHA_ISA': +if env['TARGET_ISA'] == 'alpha': syscall_emulation_sources += alpha_eio_sources memtest_sources = Split(''' diff --git a/src/arch/SConscript b/src/arch/SConscript index bc517341a..0a5962889 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -140,8 +140,15 @@ def isa_desc_emitter(target, source, env): # Pieces are in place, so create the builder. python = sys.executable # use same Python binary used to run scons -isa_desc_builder = Builder(action=python + ' $SOURCES $TARGET.dir $CPU_MODELS', - emitter = isa_desc_emitter) + +# Also include the CheckerCPU as one of the models if it is being +# enabled via command line. +if env['USE_CHECKER']: + isa_desc_builder = Builder(action=python + ' $SOURCES $TARGET.dir $CPU_MODELS CheckerCPU', + emitter = isa_desc_emitter) +else: + isa_desc_builder = Builder(action=python + ' $SOURCES $TARGET.dir $CPU_MODELS', + emitter = isa_desc_emitter) env.Append(BUILDERS = { 'ISADesc' : isa_desc_builder }) diff --git a/src/arch/alpha/freebsd/system.cc b/src/arch/alpha/freebsd/system.cc index 7cf68e0db..8d50e1612 100644 --- a/src/arch/alpha/freebsd/system.cc +++ b/src/arch/alpha/freebsd/system.cc @@ -97,6 +97,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(FreebsdAlphaSystem) Param<Tick> boot_cpu_frequency; SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; Param<string> kernel; Param<string> console; @@ -115,6 +116,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(FreebsdAlphaSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -133,6 +136,7 @@ CREATE_SIM_OBJECT(FreebsdAlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/alpha/linux/system.cc b/src/arch/alpha/linux/system.cc index 9fe63c390..ef4e18cb5 100644 --- a/src/arch/alpha/linux/system.cc +++ b/src/arch/alpha/linux/system.cc @@ -191,6 +191,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(LinuxAlphaSystem) Param<Tick> boot_cpu_frequency; SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; Param<string> kernel; Param<string> console; @@ -209,6 +210,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(LinuxAlphaSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -227,6 +230,7 @@ CREATE_SIM_OBJECT(LinuxAlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/alpha/regfile.hh b/src/arch/alpha/regfile.hh index 1025412cd..9ecad6f42 100644 --- a/src/arch/alpha/regfile.hh +++ b/src/arch/alpha/regfile.hh @@ -112,6 +112,10 @@ namespace AlphaISA lock_flag = 0; lock_addr = 0; } + + void serialize(std::ostream &os); + + void unserialize(Checkpoint *cp, const std::string §ion); #if FULL_SYSTEM protected: typedef uint64_t InternalProcReg; diff --git a/src/arch/alpha/system.cc b/src/arch/alpha/system.cc index dce7365aa..a7e615531 100644 --- a/src/arch/alpha/system.cc +++ b/src/arch/alpha/system.cc @@ -221,6 +221,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaSystem) Param<Tick> boot_cpu_frequency; SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; Param<std::string> kernel; Param<std::string> console; @@ -239,6 +240,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -257,6 +260,7 @@ CREATE_SIM_OBJECT(AlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/alpha/tru64/system.cc b/src/arch/alpha/tru64/system.cc index 6c0edc1ee..3ef1e4d3c 100644 --- a/src/arch/alpha/tru64/system.cc +++ b/src/arch/alpha/tru64/system.cc @@ -95,6 +95,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Tru64AlphaSystem) Param<Tick> boot_cpu_frequency; SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; Param<string> kernel; Param<string> console; @@ -113,6 +114,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Tru64AlphaSystem) INIT_PARAM(boot_cpu_frequency, "frequency of the boot cpu"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -131,6 +134,7 @@ CREATE_SIM_OBJECT(Tru64AlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/mips/isa/formats/fp.isa b/src/arch/mips/isa/formats/fp.isa index 1e5d62626..cdb892b3f 100644 --- a/src/arch/mips/isa/formats/fp.isa +++ b/src/arch/mips/isa/formats/fp.isa @@ -142,10 +142,10 @@ output exec {{ cpu->setFloatRegBits(inst, 0, mips_nan, size); //Read FCSR from FloatRegFile - uint32_t fcsr_bits = cpu->tc->readFloatRegBits(FCSR); + uint32_t fcsr_bits = cpu->tcBase()->readFloatRegBits(FCSR); //Write FCSR from FloatRegFile - cpu->tc->setFloatRegBits(FCSR, genInvalidVector(fcsr_bits)); + cpu->tcBase()->setFloatRegBits(FCSR, genInvalidVector(fcsr_bits)); if (traceData) { traceData->setData(mips_nan); } return true; @@ -158,12 +158,12 @@ output exec {{ fpResetCauseBits(%(CPU_exec_context)s *cpu) { //Read FCSR from FloatRegFile - uint32_t fcsr = cpu->tc->readFloatRegBits(FCSR); + uint32_t fcsr = cpu->tcBase()->readFloatRegBits(FCSR); fcsr = bits(fcsr, 31, 18) << 18 | bits(fcsr, 11, 0); //Write FCSR from FloatRegFile - cpu->tc->setFloatRegBits(FCSR, fcsr); + cpu->tcBase()->setFloatRegBits(FCSR, fcsr); } }}; @@ -176,8 +176,9 @@ def template FloatingPointExecute {{ //When is the right time to reset cause bits? //start of every instruction or every cycle? +#if FULL_SYSTEM fpResetCauseBits(xc); - +#endif %(op_decl)s; %(op_rd)s; @@ -192,7 +193,10 @@ def template FloatingPointExecute {{ //---- //Check for IEEE 754 FP Exceptions //fault = fpNanOperands((FPOp*)this, xc, Fd, traceData); - if (!fpInvalidOp((FPOp*)this, xc, Fd, traceData) && + if ( +#if FULL_SYSTEM + !fpInvalidOp((FPOp*)this, xc, Fd, traceData) && +#endif fault == NoFault) { %(op_wb)s; diff --git a/src/arch/mips/isa_traits.cc b/src/arch/mips/isa_traits.cc index 85acc4e8c..a8b41270e 100644 --- a/src/arch/mips/isa_traits.cc +++ b/src/arch/mips/isa_traits.cc @@ -46,6 +46,12 @@ MipsISA::copyRegs(ThreadContext *src, ThreadContext *dest) } void +MipsISA::copyMiscRegs(ThreadContext *src, ThreadContext *dest) +{ + panic("Copy Misc. Regs Not Implemented Yet\n"); +} + +void MipsISA::MiscRegFile::copyMiscRegs(ThreadContext *tc) { panic("Copy Misc. Regs Not Implemented Yet\n"); diff --git a/src/arch/mips/isa_traits.hh b/src/arch/mips/isa_traits.hh index ff994bef9..2f485c7fd 100644 --- a/src/arch/mips/isa_traits.hh +++ b/src/arch/mips/isa_traits.hh @@ -129,7 +129,7 @@ namespace MipsISA template <class TC> void zeroRegisters(TC *tc); - const Addr MaxAddr = (Addr)-1; +// const Addr MaxAddr = (Addr)-1; void copyRegs(ThreadContext *src, ThreadContext *dest); diff --git a/src/arch/mips/process.cc b/src/arch/mips/process.cc index 031c2030e..cb847fe04 100644 --- a/src/arch/mips/process.cc +++ b/src/arch/mips/process.cc @@ -41,6 +41,8 @@ using namespace std; using namespace MipsISA; +Addr MipsLiveProcess::stack_start = 0x7FFFFFFF; + MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile, System *_system, int stdin_fd, int stdout_fd, int stderr_fd, std::vector<std::string> &argv, std::vector<std::string> &envp) @@ -49,10 +51,11 @@ MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile, { // Set up stack. On MIPS, stack starts at the top of kuseg // user address space. MIPS stack grows down from here - stack_base = 0x7FFFFFFF; + stack_base = stack_start; // Set pointer for next thread stack. Reserve 8M for main stack. next_thread_stack_base = stack_base - (8 * 1024 * 1024); + stack_start = next_thread_stack_base; // Set up break point (Top of Heap) brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize(); diff --git a/src/arch/mips/process.hh b/src/arch/mips/process.hh index b0ef20399..4baee134b 100644 --- a/src/arch/mips/process.hh +++ b/src/arch/mips/process.hh @@ -50,6 +50,9 @@ class MipsLiveProcess : public LiveProcess std::vector<std::string> &envp); void startup(); + + + static Addr stack_start; }; diff --git a/src/arch/sparc/system.cc b/src/arch/sparc/system.cc index e197e7918..63cbbe057 100644 --- a/src/arch/sparc/system.cc +++ b/src/arch/sparc/system.cc @@ -141,6 +141,7 @@ SparcSystem::unserialize(Checkpoint *cp, const std::string §ion) BEGIN_DECLARE_SIM_OBJECT_PARAMS(SparcSystem) SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; Param<std::string> kernel; Param<std::string> reset_bin; @@ -161,6 +162,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SparcSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(reset_bin, "file that contains the reset code"), INIT_PARAM(hypervisor_bin, "file that contains the hypervisor code"), @@ -183,6 +186,7 @@ CREATE_SIM_OBJECT(SparcSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->reset_bin = reset_bin; p->hypervisor_bin = hypervisor_bin; diff --git a/src/base/timebuf.hh b/src/base/timebuf.hh index 160a97034..a484a3179 100644 --- a/src/base/timebuf.hh +++ b/src/base/timebuf.hh @@ -215,6 +215,11 @@ class TimeBuffer { return wire(this, 0); } + + int getSize() + { + return size; + } }; #endif // __BASE_TIMEBUF_HH__ diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 327ce6075..27c24107c 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -48,8 +48,10 @@ ccfilename = sys.argv[1] + '.cc' # To define a new flag, simply add it to this list. # baseFlags = [ + 'Activity', 'AlphaConsole', 'BADADDR', + 'BE', 'BPredRAS', 'Bus', 'BusAddrRanges', @@ -84,6 +86,7 @@ baseFlags = [ 'EthernetPIO', 'EthernetSM', 'Event', + 'FE', 'Fault', 'Fetch', 'Flow', @@ -97,6 +100,7 @@ baseFlags = [ 'GDBSend', 'GDBWrite', 'HWPrefetch', + 'IBE', 'IEW', 'IIC', 'IICMore', @@ -115,13 +119,8 @@ baseFlags = [ 'MSHR', 'Mbox', 'MemDepUnit', - 'BaseCPU' 'O3CPU', 'OzoneCPU', - 'FE', - 'IBE', - 'BE', - 'O3CPU', 'OzoneLSQ', 'PCEvent', 'PCIA', @@ -135,6 +134,7 @@ baseFlags = [ 'RenameMap', 'SQL', 'Sampler', + 'Scoreboard', 'ScsiCtrl', 'ScsiDisk', 'ScsiNone', @@ -158,8 +158,6 @@ baseFlags = [ 'Uart', 'VtoPhys', 'WriteBarrier', - 'Activity', - 'Scoreboard', 'Writeback', ] @@ -178,7 +176,7 @@ compoundFlagMap = { 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'EthernetNoData' : [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ], - 'O3CPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'O3CPU', 'Activity','Scoreboard','Writeback'], + 'O3CPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'O3CPU', 'Activity','Scoreboard','Writeback'], 'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU'] } diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 3dcc2f1ec..7d45c7870 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -71,7 +71,8 @@ virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::InstRecord *traceData) c # Generate a temporary CPU list, including the CheckerCPU if # it's enabled. This isn't used for anything else other than StaticInst # headers. -temp_cpu_list = env['CPU_MODELS'] +temp_cpu_list = env['CPU_MODELS'][:] + if env['USE_CHECKER']: temp_cpu_list.append('CheckerCPU') @@ -113,6 +114,9 @@ CheckerSupportedCPUList = ['O3CPU', 'OzoneCPU'] # ################################################################# +# Keep a list of CPU models that support SMT +env['SMT_CPU_MODELS'] = [] + sources = [] need_simple_base = False @@ -130,13 +134,13 @@ if need_simple_base: if 'FastCPU' in env['CPU_MODELS']: sources += Split('fast/cpu.cc') +need_bp_unit = False if 'O3CPU' in env['CPU_MODELS']: + need_bp_unit = True sources += SConscript('o3/SConscript', exports = 'env') sources += Split(''' - o3/2bit_local_pred.cc o3/base_dyn_inst.cc o3/bpred_unit.cc - o3/btb.cc o3/commit.cc o3/decode.cc o3/fetch.cc @@ -148,18 +152,18 @@ if 'O3CPU' in env['CPU_MODELS']: o3/lsq_unit.cc o3/lsq.cc o3/mem_dep_unit.cc - o3/ras.cc o3/rename.cc o3/rename_map.cc o3/rob.cc o3/scoreboard.cc o3/store_set.cc - o3/tournament_pred.cc ''') if env['USE_CHECKER']: sources += Split('o3/checker_builder.cc') + env['SMT_CPU_MODELS'].append('O3CPU') if 'OzoneCPU' in env['CPU_MODELS']: + need_bp_unit = True sources += Split(''' ozone/base_dyn_inst.cc ozone/bpred_unit.cc @@ -174,6 +178,14 @@ if 'OzoneCPU' in env['CPU_MODELS']: if env['USE_CHECKER']: sources += Split('ozone/checker_builder.cc') +if need_bp_unit: + sources += Split(''' + o3/2bit_local_pred.cc + o3/btb.cc + o3/ras.cc + o3/tournament_pred.cc + ''') + if env['USE_CHECKER']: sources += Split('checker/cpu.cc') checker_supports = False @@ -181,9 +193,10 @@ if env['USE_CHECKER']: if i in env['CPU_MODELS']: checker_supports = True if not checker_supports: - print "Checker only supports CPU models %s, please " \ - "set USE_CHECKER=False or use one of those CPU models" \ - % CheckerSupportedCPUList + print "Checker only supports CPU models", + for i in CheckerSupportedCPUList: + print i, + print ", please set USE_CHECKER=False or use one of those CPU models" Exit(1) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 40cec416b..ce440aeff 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -41,7 +41,6 @@ #include "cpu/cpuevent.hh" #include "cpu/thread_context.hh" #include "cpu/profile.hh" -#include "cpu/sampler/sampler.hh" #include "sim/param.hh" #include "sim/process.hh" #include "sim/sim_events.hh" @@ -60,11 +59,11 @@ int maxThreadsPerCPU = 1; #if FULL_SYSTEM BaseCPU::BaseCPU(Params *p) - : SimObject(p->name), clock(p->clock), checkInterrupts(true), + : MemObject(p->name), clock(p->clock), checkInterrupts(true), params(p), number_of_threads(p->numberOfThreads), system(p->system) #else BaseCPU::BaseCPU(Params *p) - : SimObject(p->name), clock(p->clock), params(p), + : MemObject(p->name), clock(p->clock), params(p), number_of_threads(p->numberOfThreads), system(p->system) #endif { diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 51f3bb905..2be6e4e81 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -36,17 +36,17 @@ #include "base/statistics.hh" #include "config/full_system.hh" -#include "cpu/sampler/sampler.hh" #include "sim/eventq.hh" -#include "sim/sim_object.hh" +#include "mem/mem_object.hh" #include "arch/isa_traits.hh" class BranchPred; class CheckerCPU; class ThreadContext; class System; +class Port; -class BaseCPU : public SimObject +class BaseCPU : public MemObject { protected: // CPU's clock period in terms of the number of ticks of curTime. diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index 785387e60..6d6ae1e0a 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -66,7 +66,6 @@ class ThreadContext; class MemInterface; class Checkpoint; class Request; -class Sampler; /** * CheckerCPU class. Dynamically verifies instructions as they are @@ -128,6 +127,12 @@ class CheckerCPU : public BaseCPU Port *dcachePort; + virtual Port *getPort(const std::string &name, int idx) + { + panic("Not supported on checker!"); + return NULL; + } + public: // Primary thread being run. SimpleThread *thread; @@ -165,7 +170,7 @@ class CheckerCPU : public BaseCPU virtual Counter totalInstructions() const { - return numInst - startNumInst; + return 0; } // number of simulated loads @@ -374,7 +379,7 @@ class Checker : public CheckerCPU : CheckerCPU(p) { } - void switchOut(Sampler *s); + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); void verify(DynInstPtr &inst); diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index 137e1c46d..81f97726c 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -236,9 +236,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst) willChangePC = true; newPC = thread->readPC(); DPRINTF(Checker, "Fault, PC is now %#x\n", newPC); -#else // !FULL_SYSTEM - fatal("fault (%d) detected @ PC 0x%08p", fault, thread->readPC()); -#endif // FULL_SYSTEM +#endif } else { #if THE_ISA != MIPS_ISA // go to the next instruction @@ -295,7 +293,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst) template <class DynInstPtr> void -Checker<DynInstPtr>::switchOut(Sampler *s) +Checker<DynInstPtr>::switchOut() { instList.clear(); } diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index c0ac8f01d..c035e92ac 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -120,7 +120,7 @@ class CheckerThreadContext : public ThreadContext void suspend() { actualTC->suspend(); } /// Set the status to Unallocated. - void deallocate() { actualTC->deallocate(); } + void deallocate(int delay = 0) { actualTC->deallocate(delay); } /// Set the status to Halted. void halt() { actualTC->halt(); } diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py index 1add32745..5b0c6c4da 100644 --- a/src/cpu/cpu_models.py +++ b/src/cpu/cpu_models.py @@ -79,18 +79,6 @@ CpuModel('OzoneCPU', 'ozone_exec.cc', CpuModel('CheckerCPU', 'checker_cpu_exec.cc', '#include "cpu/checker/cpu.hh"', { 'CPU_exec_context': 'CheckerCPU' }) - -# Maybe there is a more clever way to determine ISA -# here but since the environment variable isnt passed through -# here the easiest way is this... -sub_template = 'not found' -for argument in sys.argv: - if 'ALPHA' in argument: - sub_template = 'AlphaDynInst<AlphaSimpleImpl>' - -if sub_template == 'not found': - sys.exit('NO CPU_exec_context substitution defined for this ISA') - CpuModel('O3CPU', 'o3_cpu_exec.cc', '#include "cpu/o3/isa_specific.hh"', - { 'CPU_exec_context': sub_template }) + { 'CPU_exec_context': 'O3DynInst' }) diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 490305cbf..5e767655d 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -91,7 +91,10 @@ Param<unsigned> renameWidth; Param<unsigned> commitToIEWDelay; Param<unsigned> renameToIEWDelay; Param<unsigned> issueToExecuteDelay; +Param<unsigned> dispatchWidth; Param<unsigned> issueWidth; +Param<unsigned> wbWidth; +Param<unsigned> wbDepth; SimObjectParam<FUPool *> fuPool; Param<unsigned> iewToCommitDelay; @@ -99,7 +102,9 @@ Param<unsigned> renameToROBDelay; Param<unsigned> commitWidth; Param<unsigned> squashWidth; Param<Tick> trapLatency; -Param<Tick> fetchTrapLatency; + +Param<unsigned> backComSize; +Param<unsigned> forwardComSize; Param<std::string> predType; Param<unsigned> localPredictorSize; @@ -207,7 +212,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) "Issue/Execute/Writeback delay"), INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -216,7 +224,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(commitWidth, "Commit width"), INIT_PARAM(squashWidth, "Squash width"), INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), - INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), + + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), @@ -333,7 +343,10 @@ CREATE_SIM_OBJECT(DerivO3CPU) params->commitToIEWDelay = commitToIEWDelay; params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; params->issueWidth = issueWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; @@ -341,7 +354,9 @@ CREATE_SIM_OBJECT(DerivO3CPU) params->commitWidth = commitWidth; params->squashWidth = squashWidth; params->trapLatency = trapLatency; - params->fetchTrapLatency = fetchTrapLatency; + + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; params->predType = predType; params->localPredictorSize = localPredictorSize; diff --git a/src/cpu/o3/alpha/impl.hh b/src/cpu/o3/alpha/impl.hh index 8cd8692c6..b928ae654 100644 --- a/src/cpu/o3/alpha/impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -36,6 +36,7 @@ #include "cpu/o3/alpha/params.hh" #include "cpu/o3/cpu_policy.hh" + // Forward declarations. template <class Impl> class AlphaDynInst; @@ -88,7 +89,4 @@ struct AlphaSimpleImpl /** The O3Impl to be used. */ typedef AlphaSimpleImpl O3CPUImpl; -/** The O3Impl to be used. */ -typedef DynInst O3DynInst; - #endif // __CPU_O3_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh index b1f2a487d..c618cee08 100644 --- a/src/cpu/o3/alpha/params.hh +++ b/src/cpu/o3/alpha/params.hh @@ -54,16 +54,7 @@ class AlphaSimpleParams : public O3Params #if FULL_SYSTEM AlphaITB *itb; AlphaDTB *dtb; -#else - std::vector<Process *> workload; - Process *process; -#endif // FULL_SYSTEM - - MemObject *mem; - - BaseCPU *checker; - - unsigned decodeToFetchDelay; +#endif }; #endif // __CPU_O3_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index 78b0ee788..ad52b0d2e 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -70,18 +70,19 @@ class AlphaTC : public O3ThreadContext<Impl> { panic("Not supported on Alpha!"); } - // This function exits the thread context in the CPU and returns - // 1 if the CPU has no more active threads (meaning it's OK to exit); - // Used in syscall-emulation mode when a thread executes the 'exit' - // syscall. + /** This function exits the thread context in the CPU and returns + * 1 if the CPU has no more active threads (meaning it's OK to exit); + * Used in syscall-emulation mode when a thread executes the 'exit' + * syscall. + */ virtual int exit() { - this->cpu->deallocateContext(this->thread->readTid()); + this->deallocate(); // If there are still threads executing in the system if (this->cpu->numActiveThreads()) - return 0; + return 0; // don't exit simulation else - return 1; + return 1; // exit simulation } }; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 60b555269..956b6ec3e 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -162,10 +162,6 @@ class DefaultCommit /** Sets the pointer to the queue coming from IEW. */ void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); - void setFetchStage(Fetch *fetch_stage); - - Fetch *fetchStage; - /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); @@ -187,11 +183,14 @@ class DefaultCommit /** Initializes stage by sending back the number of free entries. */ void initStage(); - /** Initializes the switching out of commit. */ - void switchOut(); + /** Initializes the draining of commit. */ + bool drain(); + + /** Resumes execution after draining. */ + void resume(); /** Completes the switch out of commit. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -332,10 +331,6 @@ class DefaultCommit /** Vector of all of the threads. */ std::vector<Thread *> thread; - Fault fetchFault; - - int fetchTrapWait; - /** Records that commit has written to the time buffer this cycle. Used for * the CPU to determine if it can deschedule itself if there is no activity. */ @@ -383,8 +378,8 @@ class DefaultCommit /** Number of Active Threads */ unsigned numThreads; - /** Is a switch out pending. */ - bool switchPending; + /** Is a drain pending. */ + bool drainPending; /** Is commit switched out. */ bool switchedOut; @@ -394,10 +389,6 @@ class DefaultCommit */ Tick trapLatency; - Tick fetchTrapLatency; - - Tick fetchFaultTick; - /** The commit PC of each thread. Refers to the instruction that * is currently being processed/committed. */ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 06b8e8a95..c667d633a 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -80,10 +80,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) renameWidth(params->renameWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), - switchPending(false), + drainPending(false), switchedOut(false), - trapLatency(params->trapLatency), - fetchTrapLatency(params->fetchTrapLatency) + trapLatency(params->trapLatency) { _status = Active; _nextStatus = Inactive; @@ -123,9 +122,6 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) tcSquash[i] = false; PC[i] = nextPC[i] = 0; } - - fetchFaultTick = 0; - fetchTrapWait = 0; } template <class Impl> @@ -235,7 +231,6 @@ DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr) cpu->activateStage(O3CPU::CommitIdx); trapLatency = cpu->cycles(trapLatency); - fetchTrapLatency = cpu->cycles(fetchTrapLatency); } template <class Impl> @@ -294,13 +289,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) template <class Impl> void -DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage) -{ - fetchStage = fetch_stage; -} - -template <class Impl> -void DefaultCommit<Impl>::setIEWStage(IEW *iew_stage) { iewStage = iew_stage; @@ -350,23 +338,38 @@ DefaultCommit<Impl>::initStage() } template <class Impl> -void -DefaultCommit<Impl>::switchOut() +bool +DefaultCommit<Impl>::drain() { - switchPending = true; + drainPending = true; + + // If it's already drained, return true. + if (rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + return true; + } + + return false; } template <class Impl> void -DefaultCommit<Impl>::doSwitchOut() +DefaultCommit<Impl>::switchOut() { switchedOut = true; - switchPending = false; + drainPending = false; rob->switchOut(); } template <class Impl> void +DefaultCommit<Impl>::resume() +{ + drainPending = false; +} + +template <class Impl> +void DefaultCommit<Impl>::takeOverFrom() { switchedOut = false; @@ -557,11 +560,15 @@ DefaultCommit<Impl>::tick() wroteToTimeBuffer = false; _nextStatus = Inactive; - if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalSwitched(); + if (drainPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + drainPending = false; return; } + if ((*activeThreads).size() <= 0) + return; + list<unsigned>::iterator threads = (*activeThreads).begin(); // Check if any of the threads are done squashing. Change the @@ -575,7 +582,7 @@ DefaultCommit<Impl>::tick() commitStatus[tid] = Running; } else { DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" - "insts this cycle.\n", tid); + " insts this cycle.\n", tid); rob->doSquash(tid); toIEW->commitInfo[tid].robSquashing = true; wroteToTimeBuffer = true; @@ -989,6 +996,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Check if the instruction caused a fault. If so, trap. Fault inst_fault = head_inst->getFault(); + // DTB will sometimes need the machine instruction for when + // faults happen. So we will set it here, prior to the DTB + // possibly needing it for its fault. + thread[tid]->setInst( + static_cast<TheISA::MachInst>(head_inst->staticInst->machInst)); + if (inst_fault != NoFault) { head_inst->setCompleted(); DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", @@ -1011,12 +1024,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // execution doesn't generate extra squashes. thread[tid]->inSyscall = true; - // DTB will sometimes need the machine instruction for when - // faults happen. So we will set it here, prior to the DTB - // possibly needing it for its fault. - thread[tid]->setInst( - static_cast<TheISA::MachInst>(head_inst->staticInst->machInst)); - // Execute the trap. Although it's slightly unrealistic in // terms of timing (as it doesn't wait for the full timing of // the trap event to complete before updating state), it's diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index feca4cdf2..c43cc2cf8 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -115,6 +115,36 @@ FullO3CPU<Impl>::ActivateThreadEvent::description() } template <class Impl> +FullO3CPU<Impl>::DeallocateContextEvent::DeallocateContextEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::DeallocateContextEvent::init(int thread_num, + FullO3CPU<Impl> *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template <class Impl> +void +FullO3CPU<Impl>::DeallocateContextEvent::process() +{ + cpu->deactivateThread(tid); + cpu->removeThread(tid); +} + +template <class Impl> +const char * +FullO3CPU<Impl>::DeallocateContextEvent::description() +{ + return "FullO3CPU \"Deallocate Context\" event"; +} + +template <class Impl> FullO3CPU<Impl>::FullO3CPU(Params *params) : BaseO3CPU(params), tickEvent(this), @@ -141,15 +171,14 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) TheISA::NumMiscRegs * number_of_threads, TheISA::ZeroReg), - // For now just have these time buffers be pretty big. - // @todo: Make these time buffer sizes parameters or derived - // from latencies - timeBuffer(5, 5), - fetchQueue(5, 5), - decodeQueue(5, 5), - renameQueue(5, 5), - iewQueue(5, 5), - activityRec(NumStages, 10, params->activity), + timeBuffer(params->backComSize, params->forwardComSize), + fetchQueue(params->backComSize, params->forwardComSize), + decodeQueue(params->backComSize, params->forwardComSize), + renameQueue(params->backComSize, params->forwardComSize), + iewQueue(params->backComSize, params->forwardComSize), + activityRec(NumStages, + params->backComSize + params->forwardComSize, + params->activity), globalSeqNum(1), @@ -158,7 +187,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) physmem(system->physmem), #endif // FULL_SYSTEM mem(params->mem), - switchCount(0), + drainCount(0), deferRegistration(params->deferRegistration), numThreads(number_of_threads) { @@ -214,7 +243,6 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) commit.setIEWQueue(&iewQueue); commit.setRenameQueue(&renameQueue); - commit.setFetchStage(&fetch); commit.setIEWStage(&iew); rename.setIEWStage(&iew); rename.setCommitStage(&commit); @@ -361,6 +389,18 @@ FullO3CPU<Impl>::fullCPURegStats() } template <class Impl> +Port * +FullO3CPU<Impl>::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return iew.getDcachePort(); + else if (if_name == "icache_port") + return fetch.getIcachePort(); + else + panic("No Such Port\n"); +} + +template <class Impl> void FullO3CPU<Impl>::tick() { @@ -400,7 +440,8 @@ FullO3CPU<Impl>::tick() } if (!tickEvent.scheduled()) { - if (_status == SwitchedOut) { + if (_status == SwitchedOut || + getState() == SimObject::Drained) { // increment stat lastRunningCycle = curTick; } else if (!activityRec.active()) { @@ -461,16 +502,107 @@ FullO3CPU<Impl>::init() template <class Impl> void +FullO3CPU<Impl>::activateThread(unsigned tid) +{ + list<unsigned>::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive == activeThreads.end()) { + DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", + tid); + + activeThreads.push_back(tid); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::deactivateThread(unsigned tid) +{ + //Remove From Active List, if Active + list<unsigned>::iterator thread_it = + find(activeThreads.begin(), activeThreads.end(), tid); + + if (thread_it != activeThreads.end()) { + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(thread_it); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::activateContext(int tid, int delay) +{ + // Needs to set each stage to running as well. + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleActivateThreadEvent(tid, delay); + } else { + activateThread(tid); + } + + if(lastActivatedCycle < curTick) { + scheduleTickEvent(delay); + + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + fetch.wakeFromQuiesce(); + + lastActivatedCycle = curTick; + + _status = Running; + } +} + +template <class Impl> +void +FullO3CPU<Impl>::deallocateContext(int tid, int delay) +{ + // Schedule removal of thread data from CPU + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleDeallocateContextEvent(tid, delay); + } else { + deactivateThread(tid); + removeThread(tid); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::suspendContext(int tid) +{ + DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); + deactivateThread(tid); + if (activeThreads.size() == 0) + unscheduleTickEvent(); + _status = Idle; +} + +template <class Impl> +void +FullO3CPU<Impl>::haltContext(int tid) +{ + //For now, this is the same as deallocate + DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); + deallocateContext(tid, 1); +} + +template <class Impl> +void FullO3CPU<Impl>::insertThread(unsigned tid) { - DPRINTF(O3CPU,"[tid:%i] Initializing thread data"); + DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); // Will change now that the PC and thread state is internal to the CPU // and not in the ThreadContext. -#if 0 #if FULL_SYSTEM ThreadContext *src_tc = system->threadContexts[tid]; #else - ThreadContext *src_tc = thread[tid]; + ThreadContext *src_tc = tcBase(tid); #endif //Bind Int Regs to Rename Map @@ -490,11 +622,14 @@ FullO3CPU<Impl>::insertThread(unsigned tid) } //Copy Thread Data Into RegFile - this->copyFromTC(tid); + //this->copyFromTC(tid); - //Set PC/NPC - regFile.pc[tid] = src_tc->readPC(); - regFile.npc[tid] = src_tc->readNextPC(); + //Set PC/NPC/NNPC + setPC(src_tc->readPC(), tid); + setNextPC(src_tc->readNextPC(), tid); +#if THE_ISA != ALPHA_ISA + setNextNPC(src_tc->readNextNPC(), tid); +#endif src_tc->setStatus(ThreadContext::Active); @@ -503,16 +638,19 @@ FullO3CPU<Impl>::insertThread(unsigned tid) //Reset ROB/IQ/LSQ Entries commit.rob->resetEntries(); iew.resetEntries(); -#endif } template <class Impl> void FullO3CPU<Impl>::removeThread(unsigned tid) { - DPRINTF(O3CPU,"[tid:%i] Removing thread data"); -#if 0 - //Unbind Int Regs from Rename Map + DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); + + // Copy Thread Data From RegFile + // If thread is suspended, it might be re-allocated + //this->copyToTC(tid); + + // Unbind Int Regs from Rename Map for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(ireg); @@ -520,7 +658,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Unbind Float Regs from Rename Map + // Unbind Float Regs from Rename Map for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(freg); @@ -528,27 +666,20 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Copy Thread Data From RegFile - /* Fix Me: - * Do we really need to do this if we are removing a thread - * in the sense that it's finished (exiting)? If the thread is just - * being suspended we might... - */ -// this->copyToTC(tid); - - //Squash Throughout Pipeline + // Squash Throughout Pipeline fetch.squash(0,tid); decode.squash(tid); rename.squash(tid); + iew.squash(tid); + commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid); assert(iew.ldstQueue.getCount(tid) == 0); - //Reset ROB/IQ/LSQ Entries + // Reset ROB/IQ/LSQ Entries if (activeThreads.size() >= 1) { commit.rob->resetEntries(); iew.resetEntries(); } -#endif } @@ -604,6 +735,7 @@ FullO3CPU<Impl>::activateWhenReady(int tid) //blocks fetch contextSwitch = true; + //@todo: dont always add to waitlist //do waitlist cpuWaitList.push_back(tid); } @@ -611,149 +743,132 @@ FullO3CPU<Impl>::activateWhenReady(int tid) template <class Impl> void -FullO3CPU<Impl>::activateThread(unsigned int tid) -{ - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive == activeThreads.end()) { - DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", - tid); - - activeThreads.push_back(tid); +FullO3CPU<Impl>::serialize(std::ostream &os) +{ + SimObject::State so_state = SimObject::getState(); + SERIALIZE_ENUM(so_state); + BaseCPU::serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); + + // Use SimpleThread's ability to checkpoint to make it easier to + // write out the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + nameOut(os, csprintf("%s.xc.%i", name(), i)); + temp.copyTC(thread[i]->getTC()); + temp.serialize(os); } } - template <class Impl> void -FullO3CPU<Impl>::activateContext(int tid, int delay) -{ - // Needs to set each stage to running as well. - if (delay){ - DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " - "on cycle %d\n", tid, curTick + cycles(delay)); - scheduleActivateThreadEvent(tid, delay); - } else { - activateThread(tid); - } - - if(lastActivatedCycle < curTick) { - scheduleTickEvent(delay); - - // Be sure to signal that there's some activity so the CPU doesn't - // deschedule itself. - activityRec.activity(); - fetch.wakeFromQuiesce(); - - lastActivatedCycle = curTick; - - _status = Running; +FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) +{ + SimObject::State so_state; + UNSERIALIZE_ENUM(so_state); + BaseCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + + // Use SimpleThread's ability to checkpoint to make it easier to + // read in the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + temp.copyTC(thread[i]->getTC()); + temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); + thread[i]->getTC()->copyArchRegs(temp.getTC()); } } template <class Impl> -void -FullO3CPU<Impl>::suspendContext(int tid) +unsigned int +FullO3CPU<Impl>::drain(Event *drain_event) { - DPRINTF(O3CPU,"[tid: %i]: Suspended ...\n", tid); - unscheduleTickEvent(); - _status = Idle; -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + drainCount = 0; + fetch.drain(); + decode.drain(); + rename.drain(); + iew.drain(); + commit.drain(); - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + // Wake the CPU and record activity so everything can drain out if + // the CPU was not able to immediately drain. + if (getState() != SimObject::Drained) { + // A bit of a hack...set the drainEvent after all the drain() + // calls have been made, that way if all of the stages drain + // immediately, the signalDrained() function knows not to call + // process on the drain event. + drainEvent = drain_event; + + wakeCPU(); + activityRec.activity(); + + return 1; + } else { + return 0; } -*/ } template <class Impl> void -FullO3CPU<Impl>::deallocateContext(int tid) +FullO3CPU<Impl>::resume() { - DPRINTF(O3CPU,"[tid:%i]: Deallocating ...", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + assert(system->getMemoryMode() == System::Timing); + fetch.resume(); + decode.resume(); + rename.resume(); + iew.resume(); + commit.resume(); - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + changeState(SimObject::Running); - removeThread(tid); - } -*/ + if (_status == SwitchedOut || _status == Idle) + return; + + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + _status = Running; } template <class Impl> void -FullO3CPU<Impl>::haltContext(int tid) +FullO3CPU<Impl>::signalDrained() { - DPRINTF(O3CPU,"[tid:%i]: Halted ...", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + if (++drainCount == NumStages) { + if (tickEvent.scheduled()) + tickEvent.squash(); - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + changeState(SimObject::Drained); - removeThread(tid); + if (drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } -*/ + assert(drainCount <= 5); } template <class Impl> void -FullO3CPU<Impl>::switchOut(Sampler *_sampler) +FullO3CPU<Impl>::switchOut() { - sampler = _sampler; - switchCount = 0; fetch.switchOut(); - decode.switchOut(); rename.switchOut(); - iew.switchOut(); commit.switchOut(); + instList.clear(); + while (!removeList.empty()) { + removeList.pop(); + } - // Wake the CPU and record activity so everything can drain out if - // the CPU is currently idle. - wakeCPU(); - activityRec.activity(); -} - -template <class Impl> -void -FullO3CPU<Impl>::signalSwitched() -{ - if (++switchCount == NumStages) { - fetch.doSwitchOut(); - rename.doSwitchOut(); - commit.doSwitchOut(); - instList.clear(); - while (!removeList.empty()) { - removeList.pop(); - } - + _status = SwitchedOut; #if USE_CHECKER - if (checker) - checker->switchOut(sampler); + if (checker) + checker->switchOut(); #endif - - if (tickEvent.scheduled()) - tickEvent.squash(); - sampler->signalSwitched(); - _status = SwitchedOut; - } - assert(switchCount <= 5); } template <class Impl> @@ -761,7 +876,7 @@ void FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) { // Flush out any old data from the time buffers. - for (int i = 0; i < 10; ++i) { + for (int i = 0; i < timeBuffer.getSize(); ++i) { timeBuffer.advance(); fetchQueue.advance(); decodeQueue.advance(); @@ -932,7 +1047,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatReg(phys_reg, val); } @@ -941,7 +1057,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatReg(phys_reg, val, 64); } @@ -950,7 +1067,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatRegBits(phys_reg, val); } diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 1cff6142d..83cb966e3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -57,6 +57,8 @@ class Checker; class ThreadContext; template <class> class O3ThreadContext; + +class Checkpoint; class MemObject; class Process; @@ -197,6 +199,49 @@ class FullO3CPU : public BaseO3CPU /** The tick event used for scheduling CPU ticks. */ ActivateThreadEvent activateThreadEvent[Impl::MaxThreads]; + class DeallocateContextEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU<Impl> *cpu; + + public: + /** Constructs the event. */ + DeallocateContextEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU<Impl> *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule cpu to deallocate thread context.*/ + void scheduleDeallocateContextEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (deallocateContextEvent[tid].squashed()) + deallocateContextEvent[tid].reschedule(curTick + cycles(delay)); + else if (!deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule thread deallocation in CPU */ + void unscheduleDeallocateContextEvent(int tid) + { + if (deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads]; + public: /** Constructs a CPU with the given parameters. */ FullO3CPU(Params *params); @@ -206,6 +251,9 @@ class FullO3CPU : public BaseO3CPU /** Registers statistics. */ void fullCPURegStats(); + /** Returns a specific port. */ + Port *getPort(const std::string &if_name, int idx); + /** Ticks CPU, calling tick() on each stage, and checking the overall * activity to see if the CPU should deschedule itself. */ @@ -219,7 +267,10 @@ class FullO3CPU : public BaseO3CPU { return activeThreads.size(); } /** Add Thread to Active Threads List */ - void activateThread(unsigned int tid); + void activateThread(unsigned tid); + + /** Remove Thread from Active Threads List */ + void deactivateThread(unsigned tid); /** Setup CPU to insert a thread's context */ void insertThread(unsigned tid); @@ -247,7 +298,7 @@ class FullO3CPU : public BaseO3CPU /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. */ - void deallocateContext(int tid); + void deallocateContext(int tid, int delay = 1); /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. @@ -263,6 +314,13 @@ class FullO3CPU : public BaseO3CPU /** Update The Order In Which We Process Threads. */ void updateThreadPriority(); + /** Serialize state. */ + virtual void serialize(std::ostream &os); + + /** Unserialize from a checkpoint. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + public: /** Executes a syscall on this cycle. * --------------------------------------- * Note: this is a virtual function. CPU-Specific @@ -270,14 +328,21 @@ class FullO3CPU : public BaseO3CPU */ virtual void syscall(int tid) { panic("Unimplemented!"); } - /** Switches out this CPU. */ - void switchOut(Sampler *sampler); + /** Starts draining the CPU's pipeline of all instructions in + * order to stop all memory accesses. */ + virtual unsigned int drain(Event *drain_event); + + /** Resumes execution after a drain. */ + virtual void resume(); /** Signals to this CPU that a stage has completed switching out. */ - void signalSwitched(); + void signalDrained(); + + /** Switches out this CPU. */ + virtual void switchOut(); /** Takes over from another CPU. */ - void takeOverFrom(BaseCPU *oldCPU); + virtual void takeOverFrom(BaseCPU *oldCPU); /** Get the current instruction sequence number, and increment it. */ InstSeqNum getAndIncrementInstSeq() @@ -550,11 +615,11 @@ class FullO3CPU : public BaseO3CPU /** Pointer to memory. */ MemObject *mem; - /** Pointer to the sampler */ - Sampler *sampler; + /** Event to call process() on once draining has completed. */ + Event *drainEvent; - /** Counter of how many stages have completed switching out. */ - int switchCount; + /** Counter of how many stages have completed draining. */ + int drainCount; /** Pointers to all of the threads in the CPU. */ std::vector<Thread *> thread; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 1edf3335d..7f5ecbc26 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -109,8 +109,14 @@ class DefaultDecode /** Sets pointer to list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); + /** Drains the decode stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the decode stage. */ - void switchOut(); + void switchOut() { } /** Takes over from another CPU's thread. */ void takeOverFrom(); diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 16be01784..8b851c032 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -165,11 +165,12 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr) } template <class Impl> -void -DefaultDecode<Impl>::switchOut() +bool +DefaultDecode<Impl>::drain() { - // Decode can immediately switch out. - cpu->signalSwitched(); + // Decode is done draining at any time. + cpu->signalDrained(); + return true; } template <class Impl> diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh new file mode 100644 index 000000000..a2cdf2dba --- /dev/null +++ b/src/cpu/o3/dyn_inst.hh @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#ifndef __CPU_O3_DYN_INST_HH__ +#define __CPU_O3_DYN_INST_HH__ + +#include "arch/isa_specific.hh" + +#if THE_ISA == ALPHA_ISA +template <class Impl> +class AlphaDynInst; + +struct AlphaSimpleImpl; + +typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst; +#endif + +#endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 7fcd21b7d..931919af8 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -40,8 +40,6 @@ #include "mem/port.hh" #include "sim/eventq.hh" -class Sampler; - /** * DefaultFetch class handles both single threaded and SMT fetch. Its * width is specified by the parameters; each cycle it tries to fetch @@ -164,6 +162,9 @@ class DefaultFetch /** Registers statistics. */ void regStats(); + /** Returns the icache port. */ + Port *getIcachePort() { return icachePort; } + /** Sets CPU pointer. */ void setCPU(O3CPU *cpu_ptr); @@ -182,11 +183,14 @@ class DefaultFetch /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); - /** Begins the switch out of the fetch stage. */ - void switchOut(); + /** Begins the drain of the fetch stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume(); - /** Completes the switch out of the fetch stage. */ - void doSwitchOut(); + /** Tells fetch stage to prepare to be switched out. */ + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -400,6 +404,12 @@ class DefaultFetch /** The cache line being fetched. */ uint8_t *cacheData[Impl::MaxThreads]; + /** The PC of the cacheline that has been loaded. */ + Addr cacheDataPC[Impl::MaxThreads]; + + /** Whether or not the cache data is valid. */ + bool cacheDataValid[Impl::MaxThreads]; + /** Size of instructions. */ int instSize; @@ -423,6 +433,9 @@ class DefaultFetch */ bool interruptPending; + /** Is there a drain pending. */ + bool drainPending; + /** Records if fetch is switched out. */ bool switchedOut; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 60eb76d17..4184e1867 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -109,6 +109,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) numThreads(params->numberOfThreads), numFetchingThreads(params->smtNumFetchingThreads), interruptPending(false), + drainPending(false), switchedOut(false) { if (numThreads > Impl::MaxThreads) @@ -161,6 +162,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; stalls[tid].decode = 0; stalls[tid].rename = 0; @@ -279,10 +282,6 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr) // Name is finally available, so create the port. icachePort = new IcachePort(this); - Port *mem_dport = mem->getPort(""); - icachePort->setPeer(mem_dport); - mem_dport->setPeer(icachePort); - #if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(icachePort); @@ -360,14 +359,19 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) return; } - // Wake up the CPU (if it went to sleep and was waiting on this completion - // event). - cpu->wakeCPU(); + memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize); + cacheDataValid[tid] = true; - DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", - tid); + if (!drainPending) { + // Wake up the CPU (if it went to sleep and was waiting on + // this completion event). + cpu->wakeCPU(); - switchToActive(); + DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", + tid); + + switchToActive(); + } // Only switch to IcacheAccessComplete if we're not stalled as well. if (checkStall(tid)) { @@ -383,18 +387,27 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) } template <class Impl> +bool +DefaultFetch<Impl>::drain() +{ + // Fetch is ready to drain at any time. + cpu->signalDrained(); + drainPending = true; + return true; +} + +template <class Impl> void -DefaultFetch<Impl>::switchOut() +DefaultFetch<Impl>::resume() { - // Fetch is ready to switch out at any time. - switchedOut = true; - cpu->signalSwitched(); + drainPending = false; } template <class Impl> void -DefaultFetch<Impl>::doSwitchOut() +DefaultFetch<Impl>::switchOut() { + switchedOut = true; // Branch predictor needs to have its state cleared. branchPred.switchOut(); } @@ -498,7 +511,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (cacheBlocked || (interruptPending && flags == 0) || switchedOut) { + if (cacheBlocked || (interruptPending && flags == 0)) { // Hold off fetch from getting new instructions when: // Cache is blocked, or // while an interrupt is pending and we're not in PAL mode, or @@ -509,6 +522,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Align the fetch PC so it's at the start of a cache block. fetch_PC = icacheBlockAlignPC(fetch_PC); + // If we've already got the block, no need to try to fetch it again. + if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { + return true; + } + // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. // Build request here. @@ -540,7 +558,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Build packet here. PacketPtr data_pkt = new Packet(mem_req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataStatic(cacheData[tid]); + data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); + + cacheDataPC[tid] = fetch_PC; + cacheDataValid[tid] = false; DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); @@ -898,7 +919,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) ////////////////////////////////////////// int tid = getFetchingThread(fetchPolicy); - if (tid == -1) { + if (tid == -1 || drainPending) { DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); // Breaks looping condition in tick() diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 2af68d8fc..6c6be4787 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -31,11 +31,12 @@ #ifndef __CPU_O3_IEW_HH__ #define __CPU_O3_IEW_HH__ +#include "config/full_system.hh" + #include <queue> #include "base/statistics.hh" #include "base/timebuf.hh" -#include "config/full_system.hh" #include "cpu/o3/comm.hh" #include "cpu/o3/scoreboard.hh" #include "cpu/o3/lsq.hh" @@ -125,6 +126,9 @@ class DefaultIEW /** Initializes stage; sends back the number of free IQ and LSQ entries. */ void initStage(); + /** Returns the dcache port. */ + Port *getDcachePort() { return ldstQueue.getDcachePort(); } + /** Sets CPU pointer for IEW, IQ, and LSQ. */ void setCPU(O3CPU *cpu_ptr); @@ -143,11 +147,14 @@ class DefaultIEW /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *sb_ptr); - /** Starts switch out of IEW stage. */ - void switchOut(); + /** Drains IEW stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume(); /** Completes switch out of IEW stage. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -204,6 +211,45 @@ class DefaultIEW /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } + void incrWb(InstSeqNum &sn) + { + if (++wbOutstanding == wbMax) + ableToIssue = false; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#ifdef DEBUG + wbList.insert(sn); +#endif + } + + void decrWb(InstSeqNum &sn) + { + if (wbOutstanding-- == wbMax) + ableToIssue = true; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#ifdef DEBUG + assert(wbList.find(sn) != wbList.end()); + wbList.erase(sn); +#endif + } + +#ifdef DEBUG + std::set<InstSeqNum> wbList; + + void dumpWb() + { + std::set<InstSeqNum>::iterator wb_it = wbList.begin(); + while (wb_it != wbList.end()) { + cprintf("[sn:%lli]\n", + (*wb_it)); + wb_it++; + } + } +#endif + + bool canIssue() { return ableToIssue; } + + bool ableToIssue; + private: /** Sends commit proper information for a squash due to a branch * mispredict. @@ -384,11 +430,8 @@ class DefaultIEW */ unsigned issueToExecuteDelay; - /** Width of issue's read path, in instructions. The read path is both - * the skid buffer and the rename instruction queue. - * Note to self: is this really different than issueWidth? - */ - unsigned issueReadWidth; + /** Width of dispatch, in instructions. */ + unsigned dispatchWidth; /** Width of issue, in instructions. */ unsigned issueWidth; @@ -403,6 +446,17 @@ class DefaultIEW */ unsigned wbCycle; + /** Number of instructions in flight that will writeback. */ + unsigned wbOutstanding; + + /** Writeback width. */ + unsigned wbWidth; + + /** Writeback width * writeback depth, where writeback depth is + * the number of cycles of writing back instructions that can be + * buffered. */ + unsigned wbMax; + /** Number of active threads. */ unsigned numThreads; diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 8e6fd46a1..684ae2295 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -42,16 +42,17 @@ using namespace std; template<class Impl> DefaultIEW<Impl>::DefaultIEW(Params *params) - : // @todo: Make this into a parameter. - issueToExecQueue(5, 5), + : issueToExecQueue(params->backComSize, params->forwardComSize), instQueue(params), ldstQueue(params), fuPool(params->fuPool), commitToIEWDelay(params->commitToIEWDelay), renameToIEWDelay(params->renameToIEWDelay), issueToExecuteDelay(params->issueToExecuteDelay), - issueReadWidth(params->issueWidth), + dispatchWidth(params->dispatchWidth), issueWidth(params->issueWidth), + wbOutstanding(0), + wbWidth(params->wbWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -74,8 +75,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) fetchRedirect[i] = false; } + wbMax = wbWidth * params->wbDepth; + updateLSQNextCycle = false; + ableToIssue = true; + skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -348,16 +353,23 @@ DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr) } template <class Impl> +bool +DefaultIEW<Impl>::drain() +{ + // IEW is ready to drain at any time. + cpu->signalDrained(); + return true; +} + +template <class Impl> void -DefaultIEW<Impl>::switchOut() +DefaultIEW<Impl>::resume() { - // IEW is ready to switch out at any time. - cpu->signalSwitched(); } template <class Impl> void -DefaultIEW<Impl>::doSwitchOut() +DefaultIEW<Impl>::switchOut() { // Clear any state. switchedOut = true; @@ -400,7 +412,7 @@ DefaultIEW<Impl>::takeOverFrom() updateLSQNextCycle = false; // @todo: Fix hardcoded number - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < issueToExecQueue.getSize(); ++i) { issueToExecQueue.advance(); } } @@ -559,12 +571,12 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; - if (wbNumInst == issueWidth) { + if (wbNumInst == wbWidth) { ++wbCycle; wbNumInst = 0; } - assert(wbCycle < 5); + assert((wbCycle * wbWidth + wbNumInst) < wbMax); } // Add finished instruction to queue to commit. @@ -937,7 +949,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) // Loop through the instructions, putting them in the instruction // queue. for ( ; dis_num_inst < insts_to_add && - dis_num_inst < issueReadWidth; + dis_num_inst < dispatchWidth; ++dis_num_inst) { inst = insts_to_dispatch.front(); @@ -1189,6 +1201,7 @@ DefaultIEW<Impl>::executeInsts() ++iewExecSquashedInsts; + decrWb(inst->seqNum); continue; } @@ -1351,6 +1364,8 @@ DefaultIEW<Impl>::writebackInsts() } writebackCount[tid]++; } + + decrWb(inst->seqNum); } } diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index b99bd0900..36e0842be 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -687,6 +687,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() int total_issued = 0; while (total_issued < totalWidth && + iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -784,6 +785,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() listOrder.erase(order_it++); statIssuedInstType[tid][op_class]++; + iewStage->incrWb(issuing_inst->seqNum); } else { statFuBusy[op_class]++; fuBusy[tid]++; diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 89791fec9..190734dc2 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -65,6 +65,13 @@ class LSQ { /** Registers statistics of each LSQ unit. */ void regStats(); + /** Returns dcache port. + * @todo: Dcache port needs to be moved up to this level for SMT + * to work. For now it just returns the port from one of the + * threads. + */ + Port *getDcachePort() { return &dcachePort; } + /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); /** Sets the CPU pointer. */ @@ -251,6 +258,15 @@ class LSQ { bool willWB(unsigned tid) { return thread[tid].willWB(); } + /** Returns if the cache is currently blocked. */ + bool cacheBlocked() + { return retryTid != -1; } + + /** Sets the retry thread id, indicating that one of the LSQUnits + * tried to access the cache but the cache was blocked. */ + void setRetryTid(int tid) + { retryTid = tid; } + /** Debugging function to print out all instructions. */ void dumpInsts(); /** Debugging function to print out instructions from a specific thread. */ @@ -267,7 +283,49 @@ class LSQ { template <class T> Fault write(RequestPtr req, T &data, int store_idx); - private: + /** DcachePort class for this LSQ. Handles doing the + * communication with the cache/memory. + */ + class DcachePort : public Port + { + protected: + /** Pointer to LSQ. */ + LSQ *lsq; + + public: + /** Default constructor. */ + DcachePort(LSQ *_lsq) + : lsq(_lsq) + { } + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + /** Timing version of receive. Handles writing back and + * completing the load or store that has returned from + * memory. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of the previous send. */ + virtual void recvRetry(); + }; + + /** D-cache port. */ + DcachePort dcachePort; + + protected: /** The LSQ policy for SMT mode. */ LSQPolicy lsqPolicy; @@ -296,6 +354,10 @@ class LSQ { /** Number of Threads. */ unsigned numThreads; + + /** The thread id of the LSQ Unit that is currently waiting for a + * retry. */ + int retryTid; }; template <class Impl> diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 5173f8be1..4e3957029 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -36,9 +36,53 @@ using namespace std; template <class Impl> +Tick +LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("O3CPU doesn't expect recvStatusChange callback!"); +} + +template <class Impl> +bool +LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt); + return true; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvRetry() +{ + lsq->thread[lsq->retryTid].recvRetry(); + // Speculatively clear the retry Tid. This will get set again if + // the LSQUnit was unable to complete its access. + lsq->retryTid = -1; +} + +template <class Impl> LSQ<Impl>::LSQ(Params *params) - : LQEntries(params->LQEntries), SQEntries(params->SQEntries), - numThreads(params->numberOfThreads) + : dcachePort(this), LQEntries(params->LQEntries), + SQEntries(params->SQEntries), numThreads(params->numberOfThreads), + retryTid(-1) { DPRINTF(LSQ, "Creating LSQ object.\n"); @@ -94,7 +138,8 @@ LSQ<Impl>::LSQ(Params *params) //Initialize LSQs for (int tid=0; tid < numThreads; tid++) { - thread[tid].init(params, maxLQEntries, maxSQEntries, tid); + thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid); + thread[tid].setDcachePort(&dcachePort); } } @@ -130,6 +175,8 @@ LSQ<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; + dcachePort.setName(name()); + for (int tid=0; tid < numThreads; tid++) { thread[tid].setCPU(cpu_ptr); } @@ -502,6 +549,9 @@ LSQ<Impl>::hasStoresToWB() { list<unsigned>::iterator active_threads = (*activeThreads).begin(); + if ((*activeThreads).empty()) + return false; + while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; if (!hasStoresToWB(tid)) diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 74b8fe5bb..512b5a63c 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -64,6 +64,7 @@ class LSQUnit { typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::LSQ LSQ; typedef typename Impl::CPUPol::IssueStruct IssueStruct; public: @@ -71,7 +72,7 @@ class LSQUnit { LSQUnit(); /** Initializes the LSQ unit with the specified number of entries. */ - void init(Params *params, unsigned maxLQEntries, + void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id); /** Returns the name of the LSQ unit. */ @@ -87,6 +88,10 @@ class LSQUnit { void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } + /** Sets the pointer to the dcache port. */ + void setDcachePort(Port *dcache_port) + { dcachePort = dcache_port; } + /** Switches out LSQ unit. */ void switchOut(); @@ -206,6 +211,9 @@ class LSQUnit { !storeQueue[storeWBIdx].completed && !isStoreBlocked; } + /** Handles doing the retry. */ + void recvRetry(); + private: /** Writes back the instruction, sending it to IEW. */ void writeback(DynInstPtr &inst, PacketPtr pkt); @@ -216,9 +224,6 @@ class LSQUnit { /** Completes the store at the specified index. */ void completeStore(int store_idx); - /** Handles doing the retry. */ - void recvRetry(); - /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx); /** Decrements the given store index (circular queue). */ @@ -239,54 +244,11 @@ class LSQUnit { /** Pointer to the IEW stage. */ IEW *iewStage; - /** Pointer to memory object. */ - MemObject *mem; + /** Pointer to the LSQ. */ + LSQ *lsq; - /** DcachePort class for this LSQ Unit. Handles doing the - * communication with the cache/memory. - * @todo: Needs to be moved to the LSQ level and have some sort - * of arbitration. - */ - class DcachePort : public Port - { - protected: - /** Pointer to CPU. */ - O3CPU *cpu; - /** Pointer to LSQ. */ - LSQUnit *lsq; - - public: - /** Default constructor. */ - DcachePort(O3CPU *_cpu, LSQUnit *_lsq) - : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) - { } - - protected: - /** Atomic version of receive. Panics. */ - virtual Tick recvAtomic(PacketPtr pkt); - - /** Functional version of receive. Panics. */ - virtual void recvFunctional(PacketPtr pkt); - - /** Receives status change. Other than range changing, panics. */ - virtual void recvStatusChange(Status status); - - /** Returns the address ranges of this device. */ - virtual void getDeviceAddressRanges(AddrRangeList &resp, - AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - - /** Timing version of receive. Handles writing back and - * completing the load or store that has returned from - * memory. */ - virtual bool recvTiming(PacketPtr pkt); - - /** Handles doing a retry of the previous send. */ - virtual void recvRetry(); - }; - - /** Pointer to the D-cache. */ - DcachePort *dcachePort; + /** Pointer to the dcache port. Used only for sending. */ + Port *dcachePort; /** Derived class to hold any sender state the LSQ needs. */ class LSQSenderState : public Packet::SenderState @@ -639,6 +601,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(load_inst); + iewStage->decrWb(load_inst->seqNum); ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not @@ -653,7 +616,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) } // If there's no forwarding case, then go access memory - DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", load_inst->seqNum, load_inst->readPC()); assert(!load_inst->memData); @@ -661,9 +624,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) ++usedPorts; - DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", - load_inst->readPC()); - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); @@ -673,8 +633,18 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) state->inst = load_inst; data_pkt->senderState = state; - // if we have a cache, do cache access too - if (!dcachePort->sendTiming(data_pkt)) { + // if we the cache is not blocked, do cache access + if (!lsq->cacheBlocked()) { + if (!dcachePort->sendTiming(data_pkt)) { + // If the access didn't succeed, tell the LSQ by setting + // the retry thread id. + lsq->setRetryTid(lsqID); + } + } + + // If the cache was blocked, or has become blocked due to the access, + // handle it. + if (lsq->cacheBlocked()) { ++lsqCacheBlocked; // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 714acb2ef..4f5dbbf1c 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -31,6 +31,7 @@ #include "config/use_checker.hh" +#include "cpu/o3/lsq.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" #include "mem/packet.hh" @@ -77,6 +78,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); if (isSwitchedOut() || inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); delete state; delete pkt; return; @@ -95,46 +97,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) } template <class Impl> -Tick -LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt) -{ - panic("O3CPU model does not work with atomic mode!"); - return curTick; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt) -{ - panic("O3CPU doesn't expect recvFunctional callback!"); -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvStatusChange(Status status) -{ - if (status == RangeChange) - return; - - panic("O3CPU doesn't expect recvStatusChange callback!"); -} - -template <class Impl> -bool -LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt) -{ - lsq->completeDataAccess(pkt); - return true; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvRetry() -{ - lsq->recvRetry(); -} - -template <class Impl> LSQUnit<Impl>::LSQUnit() : loads(0), stores(0), storesToWB(0), stalled(false), isStoreBlocked(false), isLoadBlocked(false), @@ -144,13 +106,15 @@ LSQUnit<Impl>::LSQUnit() template<class Impl> void -LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, +LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id) { DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); switchedOut = false; + lsq = lsq_ptr; + lsqID = id; // Add 1 for the sentinel entry (they are circular queues). @@ -167,8 +131,6 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, usedPorts = 0; cachePorts = params->cachePorts; - mem = params->mem; - memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -179,11 +141,6 @@ void LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; - dcachePort = new DcachePort(cpu, this); - - Port *mem_dport = mem->getPort(""); - dcachePort->setPeer(mem_dport); - mem_dport->setPeer(dcachePort); #if USE_CHECKER if (cpu->checker) { @@ -591,7 +548,7 @@ LSQUnit<Impl>::writebackStores() storeQueue[storeWBIdx].canWB && usedPorts < cachePorts) { - if (isStoreBlocked) { + if (isStoreBlocked || lsq->cacheBlocked()) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" " is blocked!\n"); break; @@ -833,6 +790,7 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) // Squashed instructions do not need to complete their access. if (inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); assert(!inst->isStore()); ++lsqIgnoredResponses; return; @@ -914,6 +872,7 @@ LSQUnit<Impl>::recvRetry() } else { // Still blocked! ++lsqCacheBlocked; + lsq->setRetryTid(lsqID); } } else if (isLoadBlocked) { DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " diff --git a/src/cpu/o3/params.hh b/src/cpu/o3/params.hh index 69a1bb937..1c234bcd7 100755 --- a/src/cpu/o3/params.hh +++ b/src/cpu/o3/params.hh @@ -47,6 +47,18 @@ class O3Params : public BaseO3CPU::Params unsigned activity; // + // Pointers to key objects + // +#if !FULL_SYSTEM + std::vector<Process *> workload; + Process *process; +#endif // FULL_SYSTEM + + MemObject *mem; + + BaseCPU *checker; + + // // Caches // // MemInterface *icacheInterface; @@ -86,7 +98,10 @@ class O3Params : public BaseO3CPU::Params unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; + unsigned dispatchWidth; unsigned issueWidth; + unsigned wbWidth; + unsigned wbDepth; FUPool *fuPool; // @@ -100,6 +115,12 @@ class O3Params : public BaseO3CPU::Params Tick fetchTrapLatency; // + // Timebuffer sizes + // + unsigned backComSize; + unsigned forwardComSize; + + // // Branch predictor (BP, BTB, RAS) // std::string predType; diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 6972f055f..b6677b4b1 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -86,10 +86,6 @@ class PhysRegFile //The duplication is unfortunate but it's better than having //different ways to access certain registers. - //Add these in later when everything else is in place -// void serialize(std::ostream &os); -// void unserialize(Checkpoint *cp, const std::string §ion); - /** Reads an integer register. */ uint64_t readIntReg(PhysRegIndex reg_idx) { diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 581fc8f81..034087feb 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -157,12 +157,15 @@ class DefaultRename /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *_scoreboard); + /** Drains the rename stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the rename stage. */ void switchOut(); - /** Completes the switch out. */ - void doSwitchOut(); - /** Takes over from another CPU's thread. */ void takeOverFrom(); diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index df8b7f9da..805a72808 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -257,16 +257,17 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard) } template <class Impl> -void -DefaultRename<Impl>::switchOut() +bool +DefaultRename<Impl>::drain() { // Rename is ready to switch out at any time. - cpu->signalSwitched(); + cpu->signalDrained(); + return true; } template <class Impl> void -DefaultRename<Impl>::doSwitchOut() +DefaultRename<Impl>::switchOut() { // Clear any state, fix up the rename map. for (int i = 0; i < numThreads; i++) { diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index 6f8080ef4..7cd5a5143 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -308,7 +308,7 @@ class ROB private: /** The sequence number of the squashed instruction. */ - InstSeqNum squashedSeqNum; + InstSeqNum squashedSeqNum[Impl::MaxThreads]; /** Is the ROB done squashing. */ bool doneSquashing[Impl::MaxThreads]; diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index d9978b17f..1b9f666b8 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -41,10 +41,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth, : numEntries(_numEntries), squashWidth(_squashWidth), numInstsInROB(0), - squashedSeqNum(0), numThreads(_numThreads) { for (int tid=0; tid < numThreads; tid++) { + squashedSeqNum[tid] = 0; doneSquashing[tid] = true; threadEntries[tid] = 0; } @@ -352,11 +352,11 @@ void ROB<Impl>::doSquash(unsigned tid) { DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n", - tid, squashedSeqNum); + tid, squashedSeqNum[tid]); assert(squashIt[tid] != instList[tid].end()); - if ((*squashIt[tid])->seqNum < squashedSeqNum) { + if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -371,7 +371,7 @@ ROB<Impl>::doSquash(unsigned tid) for (int numSquashed = 0; numSquashed < squashWidth && squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum; + (*squashIt[tid])->seqNum > squashedSeqNum[tid]; ++numSquashed) { DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n", @@ -408,7 +408,7 @@ ROB<Impl>::doSquash(unsigned tid) // Check if ROB is done squashing. - if ((*squashIt[tid])->seqNum <= squashedSeqNum) { + if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -520,7 +520,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid) doneSquashing[tid] = false; - squashedSeqNum = squash_num; + squashedSeqNum[tid] = squash_num; if (!instList[tid].empty()) { InstIt tail_thread = instList[tid].end(); @@ -544,6 +544,7 @@ ROB<Impl>::readHeadInst() } } */ + template <class Impl> typename Impl::DynInstPtr ROB<Impl>::readHeadInst(unsigned tid) @@ -558,6 +559,7 @@ ROB<Impl>::readHeadInst(unsigned tid) return dummyInst; } } + /* template <class Impl> uint64_t diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index d097ee63e..df8d1a6d8 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -112,7 +112,7 @@ class O3ThreadContext : public ThreadContext virtual void suspend(); /** Set the status to Unallocated. */ - virtual void deallocate(); + virtual void deallocate(int delay = 0); /** Set the status to Halted. */ virtual void halt(); diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index cfb71f623..bf8cbf850 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -115,7 +115,8 @@ template <class Impl> void O3ThreadContext<Impl>::activate(int delay) { - DPRINTF(O3CPU, "Calling activate on AlphaTC\n"); + DPRINTF(O3CPU, "Calling activate on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Active) return; @@ -139,7 +140,8 @@ template <class Impl> void O3ThreadContext<Impl>::suspend() { - DPRINTF(O3CPU, "Calling suspend on AlphaTC\n"); + DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Suspended) return; @@ -163,22 +165,24 @@ O3ThreadContext<Impl>::suspend() template <class Impl> void -O3ThreadContext<Impl>::deallocate() +O3ThreadContext<Impl>::deallocate(int delay) { - DPRINTF(O3CPU, "Calling deallocate on AlphaTC\n"); + DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Unallocated) return; thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid()); + cpu->deallocateContext(thread->readTid(), delay); } template <class Impl> void O3ThreadContext<Impl>::halt() { - DPRINTF(O3CPU, "Calling halt on AlphaTC\n"); + DPRINTF(O3CPU, "Calling halt on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Halted) return; diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index f726ac99b..e411c12bd 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -55,7 +55,6 @@ class AlphaDTB; class PhysicalMemory; class MemoryController; -class Sampler; class RemoteGDB; class GDBListener; @@ -151,7 +150,7 @@ class OzoneCPU : public BaseCPU void suspend(); /// Set the status to Unallocated. - void deallocate(); + void deallocate(int delay = 0); /// Set the status to Halted. void halt(); @@ -356,12 +355,10 @@ class OzoneCPU : public BaseCPU int cpuId; - void switchOut(Sampler *sampler); + void switchOut(); void signalSwitched(); void takeOverFrom(BaseCPU *oldCPU); - Sampler *sampler; - int switchCount; #if FULL_SYSTEM @@ -375,6 +372,8 @@ class OzoneCPU : public BaseCPU PhysicalMemory *physmem; #endif + virtual Port *getPort(const std::string &name, int idx); + MemObject *mem; FrontEnd *frontEnd; @@ -386,7 +385,7 @@ class OzoneCPU : public BaseCPU virtual void activateContext(int thread_num, int delay); virtual void suspendContext(int thread_num); - virtual void deallocateContext(int thread_num); + virtual void deallocateContext(int thread_num, int delay); virtual void haltContext(int thread_num); // statistics diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index 2cdc8a3da..f58b81990 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -244,9 +244,8 @@ OzoneCPU<Impl>::~OzoneCPU() template <class Impl> void -OzoneCPU<Impl>::switchOut(Sampler *_sampler) +OzoneCPU<Impl>::switchOut() { - sampler = _sampler; switchCount = 0; // Front end needs state from back end, so switch out the back end first. backEnd->switchOut(); @@ -262,13 +261,12 @@ OzoneCPU<Impl>::signalSwitched() frontEnd->doSwitchOut(); #if USE_CHECKER if (checker) - checker->switchOut(sampler); + checker->switchOut(); #endif _status = SwitchedOut; if (tickEvent.scheduled()) tickEvent.squash(); - sampler->signalSwitched(); } assert(switchCount <= 2); } @@ -337,7 +335,7 @@ OzoneCPU<Impl>::suspendContext(int thread_num) template <class Impl> void -OzoneCPU<Impl>::deallocateContext(int thread_num) +OzoneCPU<Impl>::deallocateContext(int thread_num, int delay) { // for now, these are equivalent suspendContext(thread_num); @@ -421,6 +419,18 @@ OzoneCPU<Impl>::init() } template <class Impl> +Port * +OzoneCPU<Impl>::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return backEnd->getDcachePort(); + else if (if_name == "icache_port") + return frontEnd->getIcachePort(); + else + panic("No Such Port\n"); +} + +template <class Impl> void OzoneCPU<Impl>::serialize(std::ostream &os) { @@ -782,9 +792,9 @@ OzoneCPU<Impl>::OzoneTC::suspend() /// Set the status to Unallocated. template <class Impl> void -OzoneCPU<Impl>::OzoneTC::deallocate() +OzoneCPU<Impl>::OzoneTC::deallocate(int delay) { - cpu->deallocateContext(thread->readTid()); + cpu->deallocateContext(thread->readTid(), delay); } /// Set the status to Halted. diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh index 181609098..3ed3c4d18 100644 --- a/src/cpu/ozone/front_end.hh +++ b/src/cpu/ozone/front_end.hh @@ -119,6 +119,8 @@ class FrontEnd void regStats(); + Port *getIcachePort() { return &icachePort; } + void tick(); Fault fetchCacheLine(); void processInst(DynInstPtr &inst); diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index 40042489d..9da937320 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -36,6 +36,7 @@ #include "cpu/thread_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/front_end.hh" +#include "mem/mem_object.hh" #include "mem/packet.hh" #include "mem/request.hh" @@ -138,10 +139,6 @@ FrontEnd<Impl>::setCPU(CPUType *cpu_ptr) icachePort.setName(this->name() + "-iport"); - Port *mem_dport = mem->getPort(""); - icachePort.setPeer(mem_dport); - mem_dport->setPeer(&icachePort); - #if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(&icachePort); diff --git a/src/cpu/ozone/lw_back_end.hh b/src/cpu/ozone/lw_back_end.hh index bb3ef3a72..d836ceebd 100644 --- a/src/cpu/ozone/lw_back_end.hh +++ b/src/cpu/ozone/lw_back_end.hh @@ -51,6 +51,8 @@ class ThreadContext; template <class Impl> class OzoneThreadState; +class Port; + template <class Impl> class LWBackEnd { @@ -114,6 +116,8 @@ class LWBackEnd void setCommBuffer(TimeBuffer<CommStruct> *_comm); + Port *getDcachePort() { return LSQ.getDcachePort(); } + void tick(); void squash(); void generateTCEvent() { tcSquash = true; } diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh index a73d3ee6e..a4f1d805e 100644 --- a/src/cpu/ozone/lw_back_end_impl.hh +++ b/src/cpu/ozone/lw_back_end_impl.hh @@ -142,7 +142,7 @@ LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst) template <class Impl> LWBackEnd<Impl>::LWBackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - trapSquash(false), tcSquash(false), LSQ(params), + trapSquash(false), tcSquash(false), width(params->backEndWidth), exactFullStall(true) { numROBEntries = params->numROBEntries; @@ -557,6 +557,7 @@ LWBackEnd<Impl>::checkInterrupts() } } } +#endif template <class Impl> void @@ -580,7 +581,6 @@ LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency) // Generate trap squash event. generateTrapEvent(latency); } -#endif template <class Impl> void @@ -602,6 +602,7 @@ LWBackEnd<Impl>::tick() #if FULL_SYSTEM checkInterrupts(); +#endif if (trapSquash) { assert(!tcSquash); @@ -609,7 +610,6 @@ LWBackEnd<Impl>::tick() } else if (tcSquash) { squashFromTC(); } -#endif if (dispatchStatus != Blocked) { dispatchInsts(); @@ -1137,13 +1137,9 @@ LWBackEnd<Impl>::commitInst(int inst_num) thread->setInst( static_cast<TheISA::MachInst>(inst->staticInst->machInst)); -#if FULL_SYSTEM + handleFault(inst_fault); return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - inst->PC); -#endif // FULL_SYSTEM } int freed_regs = 0; diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index c749e3aee..2eb09d01a 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -91,8 +91,7 @@ class OzoneLWLSQ { void setBE(BackEnd *be_ptr) { be = be_ptr; } - /** Sets the page table pointer. */ -// void setPageTable(PageTable *pt_ptr); + Port *getDcachePort() { return &dcachePort; } /** Ticks the LSQ unit, which in this case only resets the number of * used cache ports. @@ -241,13 +240,11 @@ class OzoneLWLSQ { class DcachePort : public Port { protected: - OzoneCPU *cpu; - OzoneLWLSQ *lsq; public: - DcachePort(OzoneCPU *_cpu, OzoneLWLSQ *_lsq) - : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) + DcachePort(OzoneLWLSQ *_lsq) + : lsq(_lsq) { } protected: @@ -266,11 +263,8 @@ class OzoneLWLSQ { virtual void recvRetry(); }; - /** Pointer to the D-cache. */ - DcachePort *dcachePort; - - /** Pointer to the page table. */ -// PageTable *pTable; + /** D-cache port. */ + DcachePort dcachePort; public: struct SQEntry { @@ -639,7 +633,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) data_pkt->senderState = state; // if we have a cache, do cache access too - if (!dcachePort->sendTiming(data_pkt)) { + if (!dcachePort.sendTiming(data_pkt)) { // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) return NoFault; diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index a65a2a4d3..88e9c218f 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -131,8 +131,9 @@ OzoneLWLSQ<Impl>::completeDataAccess(PacketPtr pkt) template <class Impl> OzoneLWLSQ<Impl>::OzoneLWLSQ() - : switchedOut(false), loads(0), stores(0), storesToWB(0), stalled(false), - isStoreBlocked(false), isLoadBlocked(false), loadBlockedHandled(false) + : switchedOut(false), dcachePort(this), loads(0), stores(0), + storesToWB(0), stalled(false), isStoreBlocked(false), + isLoadBlocked(false), loadBlockedHandled(false) { } @@ -175,15 +176,11 @@ void OzoneLWLSQ<Impl>::setCPU(OzoneCPU *cpu_ptr) { cpu = cpu_ptr; - dcachePort = new DcachePort(cpu, this); - - Port *mem_dport = mem->getPort(""); - dcachePort->setPeer(mem_dport); - mem_dport->setPeer(dcachePort); + dcachePort.setName(this->name() + "-dport"); #if USE_CHECKER if (cpu->checker) { - cpu->checker->setDcachePort(dcachePort); + cpu->checker->setDcachePort(&dcachePort); } #endif } @@ -614,7 +611,7 @@ OzoneLWLSQ<Impl>::writebackStores() state->noWB = true; } - if (!dcachePort->sendTiming(data_pkt)) { + if (!dcachePort.sendTiming(data_pkt)) { // Need to handle becoming blocked on a store. isStoreBlocked = true; assert(retryPkt == NULL); diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index b7202cbbb..c396f5033 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -33,6 +33,7 @@ #include "cpu/simple/atomic.hh" #include "mem/packet_impl.hh" #include "sim/builder.hh" +#include "sim/system.hh" using namespace std; using namespace TheISA; @@ -55,18 +56,28 @@ AtomicSimpleCPU::TickEvent::description() return "AtomicSimpleCPU tick event"; } +Port * +AtomicSimpleCPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return &dcachePort; + else if (if_name == "icache_port") + return &icachePort; + else + panic("No Such Port\n"); +} void AtomicSimpleCPU::init() { //Create Memory Ports (conect them up) - Port *mem_dport = mem->getPort(""); - dcachePort.setPeer(mem_dport); - mem_dport->setPeer(&dcachePort); +// Port *mem_dport = mem->getPort(""); +// dcachePort.setPeer(mem_dport); +// mem_dport->setPeer(&dcachePort); - Port *mem_iport = mem->getPort(""); - icachePort.setPeer(mem_iport); - mem_iport->setPeer(&icachePort); +// Port *mem_iport = mem->getPort(""); +// icachePort.setPeer(mem_iport); +// mem_iport->setPeer(&icachePort); BaseCPU::init(); #if FULL_SYSTEM @@ -148,18 +159,31 @@ AtomicSimpleCPU::~AtomicSimpleCPU() void AtomicSimpleCPU::serialize(ostream &os) { - SERIALIZE_ENUM(_status); - BaseSimpleCPU::serialize(os); + SimObject::State so_state = SimObject::getState(); + SERIALIZE_ENUM(so_state); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); + BaseSimpleCPU::serialize(os); } void AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { - UNSERIALIZE_ENUM(_status); - BaseSimpleCPU::unserialize(cp, section); + SimObject::State so_state; + UNSERIALIZE_ENUM(so_state); tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + BaseSimpleCPU::unserialize(cp, section); +} + +void +AtomicSimpleCPU::resume() +{ + assert(system->getMemoryMode() == System::Atomic); + changeState(SimObject::Running); + if (thread->status() == ThreadContext::Active) { + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + } } void @@ -441,11 +465,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; SimObjectParam<MemObject *> mem; + SimObjectParam<System *> system; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - SimObjectParam<System *> system; Param<int> cpu_id; Param<Tick> profile; #else @@ -473,11 +497,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else @@ -510,11 +534,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU) params->width = width; params->simulate_stalls = simulate_stalls; params->mem = mem; + params->system = system; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->system = system; params->cpu_id = cpu_id; params->profile = profile; #else diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 951a8da06..b602af558 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -122,8 +122,11 @@ class AtomicSimpleCPU : public BaseSimpleCPU public: + virtual Port *getPort(const std::string &if_name, int idx = -1); + virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + virtual void resume(); void switchOut(); void takeOverFrom(BaseCPU *oldCPU); diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index db5dd2acf..240696c2b 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -41,7 +41,6 @@ #include "cpu/base.hh" #include "cpu/exetrace.hh" #include "cpu/profile.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/simple/base.hh" #include "cpu/simple_thread.hh" #include "cpu/smt.hh" @@ -56,10 +55,10 @@ #include "sim/sim_events.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" +#include "sim/system.hh" #if FULL_SYSTEM #include "base/remote_gdb.hh" -#include "sim/system.hh" #include "arch/tlb.hh" #include "arch/stacktrace.hh" #include "arch/vtophys.hh" @@ -179,8 +178,8 @@ void BaseSimpleCPU::serialize(ostream &os) { BaseCPU::serialize(os); - SERIALIZE_SCALAR(inst); - nameOut(os, csprintf("%s.xc", name())); +// SERIALIZE_SCALAR(inst); + nameOut(os, csprintf("%s.xc.0", name())); thread->serialize(os); } @@ -188,8 +187,8 @@ void BaseSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { BaseCPU::unserialize(cp, section); - UNSERIALIZE_SCALAR(inst); - thread->unserialize(cp, csprintf("%s.xc", section)); +// UNSERIALIZE_SCALAR(inst); + thread->unserialize(cp, csprintf("%s.xc.0", section)); } void diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 39bc86050..57cfa3c2c 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -38,7 +38,6 @@ #include "cpu/base.hh" #include "cpu/simple_thread.hh" #include "cpu/pc_event.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/static_inst.hh" #include "mem/packet.hh" #include "mem/port.hh" @@ -128,11 +127,6 @@ class BaseSimpleCPU : public BaseCPU // Static data storage TheISA::IntReg dataReg; - // Pointer to the sampler that is telling us to switchover. - // Used to signal the completion of the pipe drain and schedule - // the next switchover - Sampler *sampler; - StaticInstPtr curStaticInst; void checkForInterrupts(); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index d5bdcfa9b..246bcec05 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -33,23 +33,25 @@ #include "cpu/simple/timing.hh" #include "mem/packet_impl.hh" #include "sim/builder.hh" +#include "sim/system.hh" using namespace std; using namespace TheISA; +Port * +TimingSimpleCPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return &dcachePort; + else if (if_name == "icache_port") + return &icachePort; + else + panic("No Such Port\n"); +} void TimingSimpleCPU::init() { - //Create Memory Ports (conect them up) - Port *mem_dport = mem->getPort(""); - dcachePort.setPeer(mem_dport); - mem_dport->setPeer(&dcachePort); - - Port *mem_iport = mem->getPort(""); - icachePort.setPeer(mem_iport); - mem_iport->setPeer(&icachePort); - BaseCPU::init(); #if FULL_SYSTEM for (int i = 0; i < threadContexts.size(); ++i) { @@ -88,8 +90,9 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) { _status = Idle; ifetch_pkt = dcache_pkt = NULL; - quiesceEvent = NULL; - state = SimObject::Timing; + drainEvent = NULL; + fetchEvent = NULL; + changeState(SimObject::Running); } @@ -100,30 +103,31 @@ TimingSimpleCPU::~TimingSimpleCPU() void TimingSimpleCPU::serialize(ostream &os) { - SERIALIZE_ENUM(_status); + SimObject::State so_state = SimObject::getState(); + SERIALIZE_ENUM(so_state); BaseSimpleCPU::serialize(os); } void TimingSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { - UNSERIALIZE_ENUM(_status); + SimObject::State so_state; + UNSERIALIZE_ENUM(so_state); BaseSimpleCPU::unserialize(cp, section); } -bool -TimingSimpleCPU::quiesce(Event *quiesce_event) +unsigned int +TimingSimpleCPU::drain(Event *drain_event) { - // TimingSimpleCPU is ready to quiesce if it's not waiting for + // TimingSimpleCPU is ready to drain if it's not waiting for // an access to complete. if (status() == Idle || status() == Running || status() == SwitchedOut) { - DPRINTF(Config, "Ready to quiesce\n"); - return false; + changeState(SimObject::Drained); + return 0; } else { - DPRINTF(Config, "Waiting to quiesce\n"); - changeState(SimObject::Quiescing); - quiesceEvent = quiesce_event; - return true; + changeState(SimObject::Draining); + drainEvent = drain_event; + return 1; } } @@ -131,16 +135,21 @@ void TimingSimpleCPU::resume() { if (_status != SwitchedOut && _status != Idle) { - Event *e = - new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, true); - e->schedule(curTick); + // Delete the old event if it existed. + if (fetchEvent) { + if (fetchEvent->scheduled()) + fetchEvent->deschedule(); + + delete fetchEvent; + } + + fetchEvent = + new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false); + fetchEvent->schedule(curTick); } -} -void -TimingSimpleCPU::setMemoryMode(State new_mode) -{ - assert(new_mode == SimObject::Timing); + assert(system->getMemoryMode() == System::Timing); + changeState(SimObject::Running); } void @@ -148,6 +157,11 @@ TimingSimpleCPU::switchOut() { assert(status() == Running || status() == Idle); _status = SwitchedOut; + + // If we've been scheduled to resume but are then told to switch out, + // we'll need to cancel it. + if (fetchEvent && fetchEvent->scheduled()) + fetchEvent->deschedule(); } @@ -179,9 +193,9 @@ TimingSimpleCPU::activateContext(int thread_num, int delay) notIdleFraction++; _status = Running; // kick things off by initiating the fetch of the next instruction - Event *e = - new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, true); - e->schedule(curTick + cycles(delay)); + fetchEvent = + new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false); + fetchEvent->schedule(curTick + cycles(delay)); } @@ -422,8 +436,8 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) delete pkt->req; delete pkt; - if (getState() == SimObject::Quiescing) { - completeQuiesce(); + if (getState() == SimObject::Draining) { + completeDrain(); return; } @@ -479,8 +493,8 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) assert(_status == DcacheWaitResponse); _status = Running; - if (getState() == SimObject::Quiescing) { - completeQuiesce(); + if (getState() == SimObject::Draining) { + completeDrain(); delete pkt->req; delete pkt; @@ -499,11 +513,11 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) void -TimingSimpleCPU::completeQuiesce() +TimingSimpleCPU::completeDrain() { - DPRINTF(Config, "Done quiescing\n"); - changeState(SimObject::QuiescedTiming); - quiesceEvent->process(); + DPRINTF(Config, "Done draining\n"); + changeState(SimObject::Drained); + drainEvent->process(); } bool @@ -539,11 +553,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; SimObjectParam<MemObject *> mem; + SimObjectParam<System *> system; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - SimObjectParam<System *> system; Param<int> cpu_id; Param<Tick> profile; #else @@ -571,11 +585,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else @@ -606,11 +620,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; params->mem = mem; + params->system = system; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->system = system; params->cpu_id = cpu_id; params->profile = profile; #else diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index d91144e4a..ac36e5c99 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -64,7 +64,9 @@ class TimingSimpleCPU : public BaseSimpleCPU Status status() const { return _status; } - Event *quiesceEvent; + Event *drainEvent; + + Event *fetchEvent; private: @@ -130,12 +132,13 @@ class TimingSimpleCPU : public BaseSimpleCPU public: + virtual Port *getPort(const std::string &if_name, int idx = -1); + virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - virtual bool quiesce(Event *quiesce_event); + virtual unsigned int drain(Event *drain_event); virtual void resume(); - virtual void setMemoryMode(State new_mode); void switchOut(); void takeOverFrom(BaseCPU *oldCPU); @@ -154,7 +157,7 @@ class TimingSimpleCPU : public BaseSimpleCPU void completeDataAccess(Packet *); void advanceInst(Fault fault); private: - void completeQuiesce(); + void completeDrain(); }; #endif // __CPU_SIMPLE_TIMING_HH__ diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index db28b23e8..5f86cf2b7 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -123,15 +123,19 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, tc = new ProxyThreadContext<SimpleThread>(this); } -SimpleThread::SimpleThread(RegFile *regFile) - : ThreadState(-1, -1, NULL, -1, NULL), cpu(NULL) +#endif + +SimpleThread::SimpleThread() +#if FULL_SYSTEM + : ThreadState(-1, -1) +#else + : ThreadState(-1, -1, NULL, -1, NULL) +#endif { - regs = *regFile; tc = new ProxyThreadContext<SimpleThread>(this); + regs.clear(); } -#endif - SimpleThread::~SimpleThread() { delete tc; @@ -147,13 +151,8 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext) assert(process == oldContext->getProcessPtr()); #endif - // copy over functional state - _status = oldContext->status(); - copyArchRegs(oldContext); - cpuId = oldContext->readCpuId(); -#if !FULL_SYSTEM - funcExeInst = oldContext->readFuncExeInst(); -#else + copyState(oldContext); +#if FULL_SYSTEM EndQuiesceEvent *quiesce = oldContext->getQuiesceEvent(); if (quiesce) { // Point the quiesce event's TC at this TC so that it wakes up @@ -171,42 +170,50 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext) } void -SimpleThread::serialize(ostream &os) +SimpleThread::copyTC(ThreadContext *context) { - SERIALIZE_ENUM(_status); - regs.serialize(os); - // thread_num and cpu_id are deterministic from the config - SERIALIZE_SCALAR(funcExeInst); - SERIALIZE_SCALAR(inst); + copyState(context); #if FULL_SYSTEM - Tick quiesceEndTick = 0; - if (quiesceEvent->scheduled()) - quiesceEndTick = quiesceEvent->when(); - SERIALIZE_SCALAR(quiesceEndTick); - if (kernelStats) - kernelStats->serialize(os); + EndQuiesceEvent *quiesce = context->getQuiesceEvent(); + if (quiesce) { + quiesceEvent = quiesce; + } + Kernel::Statistics *stats = context->getKernelStats(); + if (stats) { + kernelStats = stats; + } #endif } +void +SimpleThread::copyState(ThreadContext *oldContext) +{ + // copy over functional state + _status = oldContext->status(); + copyArchRegs(oldContext); + cpuId = oldContext->readCpuId(); +#if !FULL_SYSTEM + funcExeInst = oldContext->readFuncExeInst(); +#endif + inst = oldContext->getInst(); +} + +void +SimpleThread::serialize(ostream &os) +{ + ThreadState::serialize(os); + regs.serialize(os); + // thread_num and cpu_id are deterministic from the config +} + void SimpleThread::unserialize(Checkpoint *cp, const std::string §ion) { - UNSERIALIZE_ENUM(_status); + ThreadState::unserialize(cp, section); regs.unserialize(cp, section); // thread_num and cpu_id are deterministic from the config - UNSERIALIZE_SCALAR(funcExeInst); - UNSERIALIZE_SCALAR(inst); - -#if FULL_SYSTEM - Tick quiesceEndTick; - UNSERIALIZE_SCALAR(quiesceEndTick); - if (quiesceEndTick) - quiesceEvent->schedule(quiesceEndTick); - if (kernelStats) - kernelStats->unserialize(cp, section); -#endif } #if FULL_SYSTEM diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index de65e9891..d36853db4 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -119,16 +119,20 @@ class SimpleThread : public ThreadState #else SimpleThread(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject *memobj); - // Constructor to use SimpleThread to pass reg file around. Not - // used for anything else. - SimpleThread(RegFile *regFile); #endif + + SimpleThread(); + virtual ~SimpleThread(); virtual void takeOverFrom(ThreadContext *oldContext); void regStats(const std::string &name); + void copyTC(ThreadContext *context); + + void copyState(ThreadContext *oldContext); + void serialize(std::ostream &os); void unserialize(Checkpoint *cp, const std::string §ion); diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index a98078634..ea1a65148 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -39,6 +39,7 @@ #include "base/misc.hh" #include "base/refcnt.hh" #include "cpu/op_class.hh" +#include "cpu/o3/dyn_inst.hh" #include "sim/host.hh" #include "arch/isa_traits.hh" @@ -51,9 +52,6 @@ class DynInst; class Packet; template <class Impl> -class AlphaDynInst; - -template <class Impl> class OzoneDynInst; class CheckerCPU; diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 70d705144..e019e22bc 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -143,7 +143,7 @@ class ThreadContext virtual void suspend() = 0; /// Set the status to Unallocated. - virtual void deallocate() = 0; + virtual void deallocate(int delay = 0) = 0; /// Set the status to Halted. virtual void halt() = 0; @@ -318,7 +318,7 @@ class ProxyThreadContext : public ThreadContext void suspend() { actualTC->suspend(); } /// Set the status to Unallocated. - void deallocate() { actualTC->deallocate(); } + void deallocate(int delay = 0) { actualTC->deallocate(); } /// Set the status to Halted. void halt() { actualTC->halt(); } diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc index 872678a41..6a96560f1 100644 --- a/src/cpu/thread_state.cc +++ b/src/cpu/thread_state.cc @@ -31,6 +31,12 @@ #include "base/output.hh" #include "cpu/profile.hh" #include "cpu/thread_state.hh" +#include "sim/serialize.hh" + +#if FULL_SYSTEM +#include "cpu/quiesce_event.hh" +#include "kern/kernel_stats.hh" +#endif #if FULL_SYSTEM ThreadState::ThreadState(int _cpuId, int _tid) @@ -49,6 +55,43 @@ ThreadState::ThreadState(int _cpuId, int _tid, Process *_process, numLoad = 0; } +void +ThreadState::serialize(std::ostream &os) +{ + SERIALIZE_ENUM(_status); + // thread_num and cpu_id are deterministic from the config + SERIALIZE_SCALAR(funcExeInst); + SERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick = 0; + if (quiesceEvent->scheduled()) + quiesceEndTick = quiesceEvent->when(); + SERIALIZE_SCALAR(quiesceEndTick); + if (kernelStats) + kernelStats->serialize(os); +#endif +} + +void +ThreadState::unserialize(Checkpoint *cp, const std::string §ion) +{ + + UNSERIALIZE_ENUM(_status); + // thread_num and cpu_id are deterministic from the config + UNSERIALIZE_SCALAR(funcExeInst); + UNSERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick; + UNSERIALIZE_SCALAR(quiesceEndTick); + if (quiesceEndTick) + quiesceEvent->schedule(quiesceEndTick); + if (kernelStats) + kernelStats->unserialize(cp, section); +#endif +} + #if FULL_SYSTEM void diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh index cb1449ac5..b03a2e2bb 100644 --- a/src/cpu/thread_state.hh +++ b/src/cpu/thread_state.hh @@ -49,6 +49,8 @@ namespace Kernel { }; #endif +class Checkpoint; + /** * Struct for holding general thread state that is needed across CPU * models. This includes things such as pointers to the process, @@ -65,6 +67,10 @@ struct ThreadState { short _asid, MemObject *mem); #endif + void serialize(std::ostream &os); + + void unserialize(Checkpoint *cp, const std::string §ion); + void setCpuId(int id) { cpuId = id; } int readCpuId() { return cpuId; } diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc index 63435e87c..5ffc02d34 100644 --- a/src/dev/ide_ctrl.cc +++ b/src/dev/ide_ctrl.cc @@ -227,177 +227,143 @@ IdeController::setDmaComplete(IdeDisk *disk) // Read and write handling //// -void -IdeController::readConfig(int offset, uint8_t *data) +Tick +IdeController::readConfig(Packet *pkt) { - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::readConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 1) <= IDE_CTRL_CONF_END) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; + if (offset < PCI_DEVICE_SPECIFIC) + return PciDev::readConfig(pkt); + assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END); + pkt->allocate(); + + switch (pkt->getSize()) { + case sizeof(uint8_t): switch (offset) { case IDE_CTRL_CONF_DEV_TIMING: - *data = config_regs.sidetim; + pkt->set<uint8_t>(config_regs.sidetim); break; case IDE_CTRL_CONF_UDMA_CNTRL: - *data = config_regs.udmactl; + pkt->set<uint8_t>(config_regs.udmactl); break; case IDE_CTRL_CONF_PRIM_TIMING+1: - *data = htole(config_regs.idetim0) >> 8; + pkt->set<uint8_t>(htole(config_regs.idetim0) >> 8); break; case IDE_CTRL_CONF_SEC_TIMING+1: - *data = htole(config_regs.idetim1) >> 8; + pkt->set<uint8_t>(htole(config_regs.idetim1) >> 8); break; case IDE_CTRL_CONF_IDE_CONFIG: - *data = htole(config_regs.ideconfig) & 0xFF; + pkt->set<uint8_t>(htole(config_regs.ideconfig) & 0xFF); break; case IDE_CTRL_CONF_IDE_CONFIG+1: - *data = htole(config_regs.ideconfig) >> 8; + pkt->set<uint8_t>(htole(config_regs.ideconfig) >> 8); break; default: panic("Invalid PCI configuration read for size 1 at offset: %#x!\n", offset); } - - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - DPRINTF(IdeCtrl, "PCI read offset: %#x size: 1 data: %#x\n", - offset, (uint32_t)*data); -} - -void -IdeController::readConfig(int offset, uint16_t *data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::readConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 2) <= IDE_CTRL_CONF_END) { - + DPRINTF(IdeCtrl, "PCI read offset: %#x size: 1 data: %#x\n", offset, + (uint32_t)pkt->get<uint8_t>()); + break; + case sizeof(uint16_t): switch (offset) { case IDE_CTRL_CONF_PRIM_TIMING: - *data = config_regs.idetim0; + pkt->set<uint16_t>(config_regs.idetim0); break; case IDE_CTRL_CONF_SEC_TIMING: - *data = config_regs.idetim1; + pkt->set<uint16_t>(config_regs.idetim1); break; case IDE_CTRL_CONF_UDMA_TIMING: - *data = config_regs.udmatim; + pkt->set<uint16_t>(config_regs.udmatim); break; case IDE_CTRL_CONF_IDE_CONFIG: - *data = config_regs.ideconfig; + pkt->set<uint16_t>(config_regs.ideconfig); break; default: panic("Invalid PCI configuration read for size 2 offset: %#x!\n", offset); } - - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); + DPRINTF(IdeCtrl, "PCI read offset: %#x size: 2 data: %#x\n", offset, + (uint32_t)pkt->get<uint16_t>()); + break; + case sizeof(uint32_t): + panic("No 32bit reads implemented for this device."); + DPRINTF(IdeCtrl, "PCI read offset: %#x size: 4 data: %#x\n", offset, + (uint32_t)pkt->get<uint32_t>()); + break; + default: + panic("invalid access size(?) for PCI configspace!\n"); } - DPRINTF(IdeCtrl, "PCI read offset: %#x size: 2 data: %#x\n", offset, *data); -} + pkt->result = Packet::Success; + return configDelay; -void -IdeController::readConfig(int offset, uint32_t *data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::readConfig(offset, data); - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - DPRINTF(IdeCtrl, "PCI read offset: %#x size: 4 data: %#x\n", offset, *data); } -void -IdeController::writeConfig(int offset, const uint8_t data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::writeConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 1) <= IDE_CTRL_CONF_END) { - switch (offset) { - case IDE_CTRL_CONF_DEV_TIMING: - config_regs.sidetim = data; - break; - case IDE_CTRL_CONF_UDMA_CNTRL: - config_regs.udmactl = data; - break; - case IDE_CTRL_CONF_IDE_CONFIG: - config_regs.ideconfig = (config_regs.ideconfig & 0xFF00) | (data); - break; - case IDE_CTRL_CONF_IDE_CONFIG+1: - config_regs.ideconfig = (config_regs.ideconfig & 0x00FF) | data << 8; - break; - default: - panic("Invalid PCI configuration write for size 1 offset: %#x!\n", - offset); - } - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - DPRINTF(IdeCtrl, "PCI write offset: %#x size: 1 data: %#x\n", - offset, (uint32_t)data); -} - -void -IdeController::writeConfig(int offset, const uint16_t data) +Tick +IdeController::writeConfig(Packet *pkt) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::writeConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 2) <= IDE_CTRL_CONF_END) { + PciDev::writeConfig(pkt); + } else { + assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END); - switch (offset) { - case IDE_CTRL_CONF_PRIM_TIMING: - config_regs.idetim0 = data; - break; - case IDE_CTRL_CONF_SEC_TIMING: - config_regs.idetim1 = data; + switch (pkt->getSize()) { + case sizeof(uint8_t): + switch (offset) { + case IDE_CTRL_CONF_DEV_TIMING: + config_regs.sidetim = pkt->get<uint8_t>(); + break; + case IDE_CTRL_CONF_UDMA_CNTRL: + config_regs.udmactl = pkt->get<uint8_t>(); + break; + case IDE_CTRL_CONF_IDE_CONFIG: + config_regs.ideconfig = (config_regs.ideconfig & 0xFF00) | + (pkt->get<uint8_t>()); + break; + case IDE_CTRL_CONF_IDE_CONFIG+1: + config_regs.ideconfig = (config_regs.ideconfig & 0x00FF) | + pkt->get<uint8_t>() << 8; + break; + default: + panic("Invalid PCI configuration write for size 1 offset: %#x!\n", + offset); + } + DPRINTF(IdeCtrl, "PCI write offset: %#x size: 1 data: %#x\n", + offset, (uint32_t)pkt->get<uint8_t>()); break; - case IDE_CTRL_CONF_UDMA_TIMING: - config_regs.udmatim = data; + case sizeof(uint16_t): + switch (offset) { + case IDE_CTRL_CONF_PRIM_TIMING: + config_regs.idetim0 = pkt->get<uint16_t>(); + break; + case IDE_CTRL_CONF_SEC_TIMING: + config_regs.idetim1 = pkt->get<uint16_t>(); + break; + case IDE_CTRL_CONF_UDMA_TIMING: + config_regs.udmatim = pkt->get<uint16_t>(); + break; + case IDE_CTRL_CONF_IDE_CONFIG: + config_regs.ideconfig = pkt->get<uint16_t>(); + break; + default: + panic("Invalid PCI configuration write for size 2 offset: %#x!\n", + offset); + } + DPRINTF(IdeCtrl, "PCI write offset: %#x size: 2 data: %#x\n", + offset, (uint32_t)pkt->get<uint16_t>()); break; - case IDE_CTRL_CONF_IDE_CONFIG: - config_regs.ideconfig = data; + case sizeof(uint32_t): + panic("Write of unimplemented PCI config. register: %x\n", offset); break; default: - panic("Invalid PCI configuration write for size 2 offset: %#x!\n", - offset); + panic("invalid access size(?) for PCI configspace!\n"); } - - } else { - panic("Write of unimplemented PCI config. register: %x\n", offset); } - DPRINTF(IdeCtrl, "PCI write offset: %#x size: 2 data: %#x\n", offset, data); - - /* Trap command register writes and enable IO/BM as appropriate. */ - if (offset == PCI_COMMAND) { - if (letoh(config.command) & PCI_CMD_IOSE) - io_enabled = true; - else - io_enabled = false; - - if (letoh(config.command) & PCI_CMD_BME) - bm_enabled = true; - else - bm_enabled = false; - } - -} - -void -IdeController::writeConfig(int offset, const uint32_t data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::writeConfig(offset, data); - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - - DPRINTF(IdeCtrl, "PCI write offset: %#x size: 4 data: %#x\n", offset, data); + /* Trap command register writes and enable IO/BM as appropriate as well as + * BARs. */ switch(offset) { case PCI0_BASE_ADDR0: if (BARAddrs[0] != 0) @@ -423,9 +389,24 @@ IdeController::writeConfig(int offset, const uint32_t data) if (BARAddrs[4] != 0) bmi_addr = BARAddrs[4]; break; + + case PCI_COMMAND: + if (letoh(config.command) & PCI_CMD_IOSE) + io_enabled = true; + else + io_enabled = false; + + if (letoh(config.command) & PCI_CMD_BME) + bm_enabled = true; + else + bm_enabled = false; + break; } + pkt->result = Packet::Success; + return configDelay; } + Tick IdeController::read(Packet *pkt) { @@ -770,7 +751,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(IdeController) SimObjectParam<System *> system; SimObjectParam<Platform *> platform; - SimObjectParam<PciConfigAll *> configspace; SimObjectParam<PciConfigData *> configdata; Param<uint32_t> pci_bus; Param<uint32_t> pci_dev; @@ -784,7 +764,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(IdeController) INIT_PARAM(system, "System pointer"), INIT_PARAM(platform, "Platform pointer"), - INIT_PARAM(configspace, "PCI Configspace"), INIT_PARAM(configdata, "PCI Config data"), INIT_PARAM(pci_bus, "PCI bus ID"), INIT_PARAM(pci_dev, "PCI device number"), @@ -800,7 +779,6 @@ CREATE_SIM_OBJECT(IdeController) params->name = getInstanceName(); params->platform = platform; params->system = system; - params->configSpace = configspace; params->configData = configdata; params->busNum = pci_bus; params->deviceNum = pci_dev; diff --git a/src/dev/ide_ctrl.hh b/src/dev/ide_ctrl.hh index 1d30c8b31..5842d322e 100644 --- a/src/dev/ide_ctrl.hh +++ b/src/dev/ide_ctrl.hh @@ -204,12 +204,8 @@ class IdeController : public PciDev IdeController(Params *p); ~IdeController(); - virtual void writeConfig(int offset, const uint8_t data); - virtual void writeConfig(int offset, const uint16_t data); - virtual void writeConfig(int offset, const uint32_t data); - virtual void readConfig(int offset, uint8_t *data); - virtual void readConfig(int offset, uint16_t *data); - virtual void readConfig(int offset, uint32_t *data); + virtual Tick writeConfig(Packet *pkt); + virtual Tick readConfig(Packet *pkt); void setDmaComplete(IdeDisk *disk); diff --git a/src/dev/ide_disk.cc b/src/dev/ide_disk.cc index dc78021f8..12564ddd0 100644 --- a/src/dev/ide_disk.cc +++ b/src/dev/ide_disk.cc @@ -318,7 +318,7 @@ IdeDisk::doDmaTransfer() panic("Inconsistent DMA transfer state: dmaState = %d devState = %d\n", dmaState, devState); - if (ctrl->dmaPending()) { + if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) { dmaTransferEvent.schedule(curTick + DMA_BACKOFF_PERIOD); return; } else @@ -398,8 +398,7 @@ IdeDisk::doDmaRead() curPrd.getByteCount(), TheISA::PageBytes); } - if (ctrl->dmaPending()) { - panic("shouldn't be reentant??"); + if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) { dmaReadWaitEvent.schedule(curTick + DMA_BACKOFF_PERIOD); return; } else if (!dmaReadCG->done()) { @@ -474,8 +473,7 @@ IdeDisk::doDmaWrite() dmaWriteCG = new ChunkGenerator(curPrd.getBaseAddr(), curPrd.getByteCount(), TheISA::PageBytes); } - if (ctrl->dmaPending()) { - panic("shouldn't be reentant??"); + if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) { dmaWriteWaitEvent.schedule(curTick + DMA_BACKOFF_PERIOD); return; } else if (!dmaWriteCG->done()) { diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index e769ef037..660efabfd 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -32,10 +32,12 @@ #include "base/trace.hh" #include "dev/io_device.hh" #include "sim/builder.hh" +#include "sim/system.hh" -PioPort::PioPort(PioDevice *dev, Platform *p) - : Port(dev->name() + "-pioport"), device(dev), platform(p) +PioPort::PioPort(PioDevice *dev, System *s, std::string pname) + : Port(dev->name() + pname), device(dev), sys(s), + outTiming(0), drainEvent(NULL) { } @@ -68,30 +70,44 @@ PioPort::recvRetry() if (result) transmitList.pop_front(); } + if (transmitList.size() == 0 && drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } void PioPort::SendEvent::process() { + port->outTiming--; + assert(port->outTiming >= 0); if (port->Port::sendTiming(packet)) - return; + if (port->transmitList.size() == 0 && port->drainEvent) { + port->drainEvent->process(); + port->drainEvent = NULL; + } + return; port->transmitList.push_back(packet); } +void +PioPort::resendNacked(Packet *pkt) { + pkt->reinitNacked(); + if (transmitList.size()) { + transmitList.push_front(pkt); + } else { + if (!Port::sendTiming(pkt)) + transmitList.push_front(pkt); + } +}; bool PioPort::recvTiming(Packet *pkt) { if (pkt->result == Packet::Nacked) { - pkt->reinitNacked(); - if (transmitList.size()) { - transmitList.push_front(pkt); - } else { - if (!Port::sendTiming(pkt)) - transmitList.push_front(pkt); - } + resendNacked(pkt); } else { Tick latency = device->recvAtomic(pkt); // turn packet around to go back to requester @@ -101,6 +117,15 @@ PioPort::recvTiming(Packet *pkt) return true; } +unsigned int +PioPort::drain(Event *de) +{ + if (outTiming == 0 && transmitList.size() == 0) + return 0; + drainEvent = de; + return 1; +} + PioDevice::~PioDevice() { if (pioPort) @@ -115,6 +140,19 @@ PioDevice::init() pioPort->sendStatusChange(Port::RangeChange); } + +unsigned int +PioDevice::drain(Event *de) +{ + unsigned int count; + count = pioPort->drain(de); + if (count) + changeState(Draining); + else + changeState(Drained); + return count; +} + void BasicPioDevice::addressRanges(AddrRangeList &range_list) { @@ -124,8 +162,9 @@ BasicPioDevice::addressRanges(AddrRangeList &range_list) } -DmaPort::DmaPort(DmaDevice *dev, Platform *p) - : Port(dev->name() + "-dmaport"), device(dev), platform(p), pendingCount(0) +DmaPort::DmaPort(DmaDevice *dev, System *s) + : Port(dev->name() + "-dmaport"), device(dev), sys(s), pendingCount(0), + actionInProgress(0), drainEvent(NULL) { } bool @@ -155,6 +194,11 @@ DmaPort::recvTiming(Packet *pkt) } delete pkt->req; delete pkt; + + if (pendingCount == 0 && drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } else { panic("Got packet without sender state... huh?\n"); } @@ -166,6 +210,29 @@ DmaDevice::DmaDevice(Params *p) : PioDevice(p), dmaPort(NULL) { } + +unsigned int +DmaDevice::drain(Event *de) +{ + unsigned int count; + count = pioPort->drain(de) + dmaPort->drain(de); + if (count) + changeState(Draining); + else + changeState(Drained); + return count; +} + +unsigned int +DmaPort::drain(Event *de) +{ + if (pendingCount == 0) + return 0; + drainEvent = de; + return 1; +} + + void DmaPort::recvRetry() { @@ -191,6 +258,8 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, { assert(event); + assert(device->getState() == SimObject::Running); + DmaReqState *reqState = new DmaReqState(event, this, size); for (ChunkGenerator gen(addr, size, peerBlockSize()); @@ -208,51 +277,54 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, pendingCount++; sendDma(pkt); } + } void DmaPort::sendDma(Packet *pkt, bool front) { - // some kind of selction between access methods - // more work is going to have to be done to make - // switching actually work - /* MemState state = device->platform->system->memState; - - if (state == Timing) { */ - DPRINTF(DMA, "Attempting to send Packet %#x with addr: %#x\n", - pkt, pkt->getAddr()); - if (transmitList.size() || !sendTiming(pkt)) { - if (front) - transmitList.push_front(pkt); - else - transmitList.push_back(pkt); - DPRINTF(DMA, "-- Failed: queued\n"); - } else { - DPRINTF(DMA, "-- Done\n"); - } - /* } else if (state == Atomic) { - sendAtomic(pkt); - if (pkt->senderState) { - DmaReqState *state = dynamic_cast<DmaReqState*>(pkt->senderState); - assert(state); - state->completionEvent->schedule(curTick + (pkt->time - - pkt->req->getTime()) +1); - delete state; - } - pendingCount--; - assert(pendingCount >= 0); - delete pkt->req; - delete pkt; - - } else if (state == Functional) { - sendFunctional(pkt); - // Is this correct??? - completionEvent->schedule(pkt->req->responseTime - pkt->req->requestTime); - completionEvent == NULL; + // some kind of selction between access methods + // more work is going to have to be done to make + // switching actually work + + System::MemoryMode state = sys->getMemoryMode(); + if (state == System::Timing) { + DPRINTF(DMA, "Attempting to send Packet %#x with addr: %#x\n", + pkt, pkt->getAddr()); + if (transmitList.size() || !sendTiming(pkt)) { + if (front) + transmitList.push_front(pkt); + else + transmitList.push_back(pkt); + DPRINTF(DMA, "-- Failed: queued\n"); + } else { + DPRINTF(DMA, "-- Done\n"); + } + } else if (state == System::Atomic) { + Tick lat; + lat = sendAtomic(pkt); + assert(pkt->senderState); + DmaReqState *state = dynamic_cast<DmaReqState*>(pkt->senderState); + assert(state); + + state->numBytes += pkt->req->getSize(); + if (state->totBytes == state->numBytes) { + state->completionEvent->schedule(curTick + lat); + delete state; + delete pkt->req; + } + pendingCount--; + assert(pendingCount >= 0); + delete pkt; + + if (pendingCount == 0 && drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } + } else panic("Unknown memory command state."); - */ } DmaDevice::~DmaDevice() diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh index a2b61c7f4..fa3f98247 100644 --- a/src/dev/io_device.hh +++ b/src/dev/io_device.hh @@ -60,9 +60,9 @@ class PioPort : public Port /** The device that this port serves. */ PioDevice *device; - /** The platform that device/port are in. This is used to select which mode + /** The system that device/port are in. This is used to select which mode * we are currently operating in. */ - Platform *platform; + System *sys; /** A list of outgoing timing response packets that haven't been serviced * yet. */ @@ -82,6 +82,8 @@ class PioPort : public Port virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop); + void resendNacked(Packet *pkt); + /** * This class is used to implemented sendTiming() with a delay. When a delay * is requested a new event is created. When the event time expires it @@ -104,16 +106,27 @@ class PioPort : public Port friend class PioPort; }; + /** Number of timing requests that are emulating the device timing before + * attempting to end up on the bus. + */ + int outTiming; + + /** If we need to drain, keep the drain event around until we're done + * here.*/ + Event *drainEvent; + /** Schedule a sendTiming() event to be called in the future. */ void sendTiming(Packet *pkt, Tick time) - { new PioPort::SendEvent(this, pkt, time); } + { outTiming++; new PioPort::SendEvent(this, pkt, time); } /** This function is notification that the device should attempt to send a * packet again. */ virtual void recvRetry(); public: - PioPort(PioDevice *dev, Platform *p); + PioPort(PioDevice *dev, System *s, std::string pname = "-pioport"); + + unsigned int drain(Event *de); friend class PioPort::SendEvent; }; @@ -145,13 +158,20 @@ class DmaPort : public Port DmaDevice *device; std::list<Packet*> transmitList; - /** The platform that device/port are in. This is used to select which mode + /** The system that device/port are in. This is used to select which mode * we are currently operating in. */ - Platform *platform; + System *sys; /** Number of outstanding packets the dma port has. */ int pendingCount; + /** If a dmaAction is in progress. */ + int actionInProgress; + + /** If we need to drain, keep the drain event around until we're done + * here.*/ + Event *drainEvent; + virtual bool recvTiming(Packet *pkt); virtual Tick recvAtomic(Packet *pkt) { panic("dma port shouldn't be used for pio access."); } @@ -169,13 +189,14 @@ class DmaPort : public Port void sendDma(Packet *pkt, bool front = false); public: - DmaPort(DmaDevice *dev, Platform *p); + DmaPort(DmaDevice *dev, System *s); void dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, uint8_t *data = NULL); bool dmaPending() { return pendingCount > 0; } + unsigned int drain(Event *de); }; /** @@ -194,6 +215,8 @@ class PioDevice : public MemObject * transaction we should perform. */ Platform *platform; + System *sys; + /** The pioPort that handles the requests for us and provides us requests * that it sees. */ PioPort *pioPort; @@ -238,20 +261,22 @@ class PioDevice : public MemObject const Params *params() const { return _params; } PioDevice(Params *p) - : MemObject(p->name), platform(p->platform), pioPort(NULL), - _params(p) + : MemObject(p->name), platform(p->platform), sys(p->system), + pioPort(NULL), _params(p) {} virtual ~PioDevice(); virtual void init(); + virtual unsigned int drain(Event *de); + virtual Port *getPort(const std::string &if_name, int idx = -1) { if (if_name == "pio") { if (pioPort != NULL) panic("pio port already connected to."); - pioPort = new PioPort(this, params()->platform); + pioPort = new PioPort(this, sys); return pioPort; } else return NULL; @@ -308,17 +333,19 @@ class DmaDevice : public PioDevice bool dmaPending() { return dmaPort->dmaPending(); } + virtual unsigned int drain(Event *de); + virtual Port *getPort(const std::string &if_name, int idx = -1) { if (if_name == "pio") { if (pioPort != NULL) panic("pio port already connected to."); - pioPort = new PioPort(this, params()->platform); + pioPort = new PioPort(this, sys); return pioPort; } else if (if_name == "dma") { if (dmaPort != NULL) panic("dma port already connected to."); - dmaPort = new DmaPort(this, params()->platform); + dmaPort = new DmaPort(this, sys); return dmaPort; } else return NULL; diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc index 360fe8c9b..bf2279d93 100644 --- a/src/dev/ns_gige.cc +++ b/src/dev/ns_gige.cc @@ -465,11 +465,12 @@ NSGigE::regStats() /** * This is to write to the PCI general configuration registers */ -void -NSGigE::writeConfig(int offset, const uint16_t data) +Tick +NSGigE::writeConfig(Packet *pkt) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset < PCI_DEVICE_SPECIFIC) - PciDev::writeConfig(offset, data); + PciDev::writeConfig(pkt); else panic("Device specific PCI config space not implemented!\n"); @@ -484,6 +485,8 @@ NSGigE::writeConfig(int offset, const uint16_t data) ioEnable = false; break; } + pkt->result = Packet::Success; + return configDelay; } /** @@ -508,14 +511,7 @@ NSGigE::read(Packet *pkt) if (daddr > LAST && daddr <= RESERVED) { panic("Accessing reserved register"); } else if (daddr > RESERVED && daddr <= 0x3FC) { - if (pkt->getSize() == sizeof(uint8_t)) - readConfig(daddr & 0xff, pkt->getPtr<uint8_t>()); - if (pkt->getSize() == sizeof(uint16_t)) - readConfig(daddr & 0xff, pkt->getPtr<uint16_t>()); - if (pkt->getSize() == sizeof(uint32_t)) - readConfig(daddr & 0xff, pkt->getPtr<uint32_t>()); - pkt->result = Packet::Success; - return pioDelay; + return readConfig(pkt); } else if (daddr >= MIB_START && daddr <= MIB_END) { // don't implement all the MIB's. hopefully the kernel // doesn't actually DEPEND upon their values @@ -733,14 +729,7 @@ NSGigE::write(Packet *pkt) if (daddr > LAST && daddr <= RESERVED) { panic("Accessing reserved register"); } else if (daddr > RESERVED && daddr <= 0x3FC) { - if (pkt->getSize() == sizeof(uint8_t)) - writeConfig(daddr & 0xff, pkt->get<uint8_t>()); - if (pkt->getSize() == sizeof(uint16_t)) - writeConfig(daddr & 0xff, pkt->get<uint16_t>()); - if (pkt->getSize() == sizeof(uint32_t)) - writeConfig(daddr & 0xff, pkt->get<uint32_t>()); - pkt->result = Packet::Success; - return pioDelay; + return writeConfig(pkt); } else if (daddr > 0x3FC) panic("Something is messed up!\n"); @@ -1388,7 +1377,7 @@ NSGigE::doRxDmaRead() assert(rxDmaState == dmaIdle || rxDmaState == dmaReadWaiting); rxDmaState = dmaReading; - if (dmaPending()) + if (dmaPending() || getState() != Running) rxDmaState = dmaReadWaiting; else dmaRead(rxDmaAddr, rxDmaLen, &rxDmaReadEvent, (uint8_t*)rxDmaData); @@ -1419,7 +1408,7 @@ NSGigE::doRxDmaWrite() assert(rxDmaState == dmaIdle || rxDmaState == dmaWriteWaiting); rxDmaState = dmaWriting; - if (dmaPending()) + if (dmaPending() || getState() != Running) rxDmaState = dmaWriteWaiting; else dmaWrite(rxDmaAddr, rxDmaLen, &rxDmaWriteEvent, (uint8_t*)rxDmaData); @@ -1837,7 +1826,7 @@ NSGigE::doTxDmaRead() assert(txDmaState == dmaIdle || txDmaState == dmaReadWaiting); txDmaState = dmaReading; - if (dmaPending()) + if (dmaPending() || getState() != Running) txDmaState = dmaReadWaiting; else dmaRead(txDmaAddr, txDmaLen, &txDmaReadEvent, (uint8_t*)txDmaData); @@ -1868,7 +1857,7 @@ NSGigE::doTxDmaWrite() assert(txDmaState == dmaIdle || txDmaState == dmaWriteWaiting); txDmaState = dmaWriting; - if (dmaPending()) + if (dmaPending() || getState() != Running) txDmaState = dmaWriteWaiting; else dmaWrite(txDmaAddr, txDmaLen, &txDmaWriteEvent, (uint8_t*)txDmaData); @@ -2417,6 +2406,20 @@ NSGigE::recvPacket(EthPacketPtr packet) return true; } + +void +NSGigE::resume() +{ + SimObject::resume(); + + // During drain we could have left the state machines in a waiting state and + // they wouldn't get out until some other event occured to kick them. + // This way they'll get out immediately + txKick(); + rxKick(); +} + + //===================================================================== // // @@ -2807,7 +2810,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(NSGigE) SimObjectParam<System *> system; SimObjectParam<Platform *> platform; - SimObjectParam<PciConfigAll *> configspace; SimObjectParam<PciConfigData *> configdata; Param<uint32_t> pci_bus; Param<uint32_t> pci_dev; @@ -2841,7 +2843,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(NSGigE) INIT_PARAM(system, "System pointer"), INIT_PARAM(platform, "Platform pointer"), - INIT_PARAM(configspace, "PCI Configspace"), INIT_PARAM(configdata, "PCI Config data"), INIT_PARAM(pci_bus, "PCI bus ID"), INIT_PARAM(pci_dev, "PCI device number"), @@ -2879,7 +2880,6 @@ CREATE_SIM_OBJECT(NSGigE) params->name = getInstanceName(); params->platform = platform; params->system = system; - params->configSpace = configspace; params->configData = configdata; params->busNum = pci_bus; params->deviceNum = pci_dev; diff --git a/src/dev/ns_gige.hh b/src/dev/ns_gige.hh index 2f47026f3..080c0b1f3 100644 --- a/src/dev/ns_gige.hh +++ b/src/dev/ns_gige.hh @@ -114,7 +114,6 @@ struct dp_rom { class NSGigEInt; class Packet; -class PciConfigAll; /** * NS DP83820 Ethernet device model @@ -376,7 +375,7 @@ class NSGigE : public PciDev ~NSGigE(); const Params *params() const { return (const Params *)_params; } - virtual void writeConfig(int offset, const uint16_t data); + virtual Tick writeConfig(Packet *pkt); virtual Tick read(Packet *pkt); virtual Tick write(Packet *pkt); @@ -392,6 +391,8 @@ class NSGigE : public PciDev virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + virtual void resume(); + public: void regStats(); diff --git a/src/dev/pciconfigall.cc b/src/dev/pciconfigall.cc index 785774ff4..68013eab8 100644 --- a/src/dev/pciconfigall.cc +++ b/src/dev/pciconfigall.cc @@ -33,14 +33,8 @@ * PCI Configspace implementation */ -#include <deque> -#include <string> -#include <vector> -#include <bitset> - #include "base/trace.hh" #include "dev/pciconfigall.hh" -#include "dev/pcidev.hh" #include "dev/pcireg.h" #include "dev/platform.hh" #include "mem/packet.hh" @@ -50,151 +44,61 @@ using namespace std; PciConfigAll::PciConfigAll(Params *p) - : BasicPioDevice(p) + : PioDevice(p) { - pioSize = 0xffffff; - - // Set backpointer for pci config. Really the config stuff should be able to - // automagically do this - p->platform->pciconfig = this; - - // Make all the pointers to devices null - for(int x=0; x < MAX_PCI_DEV; x++) - for(int y=0; y < MAX_PCI_FUNC; y++) - devices[x][y] = NULL; + pioAddr = p->platform->calcConfigAddr(params()->bus,0,0); } -// If two interrupts share the same line largely bad things will happen. -// Since we don't track how many times an interrupt was set and correspondingly -// cleared two devices on the same interrupt line and assert and deassert each -// others interrupt "line". Interrupts will not work correctly. -void -PciConfigAll::startup() -{ - bitset<256> intLines; - PciDev *tempDev; - uint8_t intline; - - for (int x = 0; x < MAX_PCI_DEV; x++) { - for (int y = 0; y < MAX_PCI_FUNC; y++) { - if (devices[x][y] != NULL) { - tempDev = devices[x][y]; - intline = tempDev->interruptLine(); - if (intLines.test(intline)) - warn("Interrupt line %#X is used multiple times" - "(You probably want to fix this).\n", (uint32_t)intline); - else - intLines.set(intline); - } // devices != NULL - } // PCI_FUNC - } // PCI_DEV - -} Tick PciConfigAll::read(Packet *pkt) { assert(pkt->result == Packet::Unknown); - assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); - - Addr daddr = pkt->getAddr() - pioAddr; - int device = (daddr >> 11) & 0x1F; - int func = (daddr >> 8) & 0x7; - int reg = daddr & 0xFF; pkt->allocate(); - DPRINTF(PciConfigAll, "read va=%#x da=%#x size=%d\n", pkt->getAddr(), daddr, + DPRINTF(PciConfigAll, "read va=%#x size=%d\n", pkt->getAddr(), pkt->getSize()); switch (pkt->getSize()) { case sizeof(uint32_t): - if (devices[device][func] == NULL) - pkt->set<uint32_t>(0xFFFFFFFF); - else - devices[device][func]->readConfig(reg, pkt->getPtr<uint32_t>()); + pkt->set<uint32_t>(0xFFFFFFFF); break; case sizeof(uint16_t): - if (devices[device][func] == NULL) - pkt->set<uint16_t>(0xFFFF); - else - devices[device][func]->readConfig(reg, pkt->getPtr<uint16_t>()); + pkt->set<uint16_t>(0xFFFF); break; case sizeof(uint8_t): - if (devices[device][func] == NULL) - pkt->set<uint8_t>(0xFF); - else - devices[device][func]->readConfig(reg, pkt->getPtr<uint8_t>()); + pkt->set<uint8_t>(0xFF); break; default: panic("invalid access size(?) for PCI configspace!\n"); } pkt->result = Packet::Success; - return pioDelay; + return params()->pio_delay; } Tick PciConfigAll::write(Packet *pkt) { assert(pkt->result == Packet::Unknown); - assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); - assert(pkt->getSize() == sizeof(uint8_t) || pkt->getSize() == sizeof(uint16_t) || - pkt->getSize() == sizeof(uint32_t)); - Addr daddr = pkt->getAddr() - pioAddr; - - int device = (daddr >> 11) & 0x1F; - int func = (daddr >> 8) & 0x7; - int reg = daddr & 0xFF; - - if (devices[device][func] == NULL) - panic("Attempting to write to config space on non-existant device\n"); - - DPRINTF(PciConfigAll, "write - va=%#x size=%d data=%#x\n", - pkt->getAddr(), pkt->getSize(), pkt->get<uint32_t>()); - - switch (pkt->getSize()) { - case sizeof(uint8_t): - devices[device][func]->writeConfig(reg, pkt->get<uint8_t>()); - break; - case sizeof(uint16_t): - devices[device][func]->writeConfig(reg, pkt->get<uint16_t>()); - break; - case sizeof(uint32_t): - devices[device][func]->writeConfig(reg, pkt->get<uint32_t>()); - break; - default: - panic("invalid pci config write size\n"); - } - pkt->result = Packet::Success; - return pioDelay; + panic("Attempting to write to config space on non-existant device\n"); } void -PciConfigAll::serialize(std::ostream &os) +PciConfigAll::addressRanges(AddrRangeList &range_list) { - /* - * There is no state associated with this object that requires - * serialization. The only real state are the device pointers - * which are all setup by the constructor of the PciDev class - */ + range_list.clear(); + range_list.push_back(RangeSize(pioAddr, params()->size)); } -void -PciConfigAll::unserialize(Checkpoint *cp, const std::string §ion) -{ - /* - * There is no state associated with this object that requires - * serialization. The only real state are the device pointers - * which are all setup by the constructor of the PciDev class - */ -} #ifndef DOXYGEN_SHOULD_SKIP_THIS BEGIN_DECLARE_SIM_OBJECT_PARAMS(PciConfigAll) - Param<Addr> pio_addr; Param<Tick> pio_latency; + Param<int> bus; + Param<Addr> size; SimObjectParam<Platform *> platform; SimObjectParam<System *> system; @@ -202,8 +106,9 @@ END_DECLARE_SIM_OBJECT_PARAMS(PciConfigAll) BEGIN_INIT_SIM_OBJECT_PARAMS(PciConfigAll) - INIT_PARAM(pio_addr, "Device Address"), INIT_PARAM(pio_latency, "Programmed IO latency"), + INIT_PARAM(bus, "Bus that this object handles config space for"), + INIT_PARAM(size, "The size of config space"), INIT_PARAM(platform, "platform"), INIT_PARAM(system, "system object") @@ -211,11 +116,13 @@ END_INIT_SIM_OBJECT_PARAMS(PciConfigAll) CREATE_SIM_OBJECT(PciConfigAll) { - BasicPioDevice::Params *p = new BasicPioDevice::Params; - p->pio_addr = pio_addr; + PciConfigAll::Params *p = new PciConfigAll::Params; p->pio_delay = pio_latency; p->platform = platform; p->system = system; + p->bus = bus; + p->size = size; + return new PciConfigAll(p); } diff --git a/src/dev/pciconfigall.hh b/src/dev/pciconfigall.hh index e60fd949b..07eaf8112 100644 --- a/src/dev/pciconfigall.hh +++ b/src/dev/pciconfigall.hh @@ -42,11 +42,6 @@ #include "dev/io_device.hh" -static const uint32_t MAX_PCI_DEV = 32; -static const uint32_t MAX_PCI_FUNC = 8; - -class PciDev; - /** * PCI Config Space * All of PCI config space needs to return -1 on Tsunami, except @@ -54,16 +49,17 @@ class PciDev; * space and passes the requests on to TsunamiPCIDev devices as * appropriate. */ -class PciConfigAll : public BasicPioDevice +class PciConfigAll : public PioDevice { - private: - /** - * Pointers to all the devices that are registered with this - * particular config space. - */ - PciDev* devices[MAX_PCI_DEV][MAX_PCI_FUNC]; - public: + struct Params : public PioDevice::Params + { + Tick pio_delay; + Addr size; + int bus; + }; + const Params *params() const { return (const Params *)_params; } + /** * Constructor for PCIConfigAll * @param p parameters structure @@ -71,28 +67,10 @@ class PciConfigAll : public BasicPioDevice PciConfigAll(Params *p); /** - * Check if a device exists. - * @param pcidev PCI device to check - * @param pcifunc PCI function to check - * @return true if device exists, false otherwise - */ - bool deviceExists(uint32_t pcidev, uint32_t pcifunc) - { return devices[pcidev][pcifunc] != NULL ? true : false; } - - /** - * Registers a device with the config space object. - * @param pcidev PCI device to register - * @param pcifunc PCI function to register - * @param device device to register - */ - void registerDevice(uint8_t pcidev, uint8_t pcifunc, PciDev *device) - { devices[pcidev][pcifunc] = device; } - - /** * Read something in PCI config space. If the device does not exist * -1 is returned, if the device does exist its PciDev::ReadConfig (or the * virtual function that overrides) it is called. - * @param pkt Contains the address of the field to read. + * @param pkt Contains information about the read operation * @return Amount of time to do the read */ virtual Tick read(Packet *pkt); @@ -101,31 +79,17 @@ class PciConfigAll : public BasicPioDevice * Write to PCI config spcae. If the device does not exit the simulator * panics. If it does it is passed on the PciDev::WriteConfig (or the virtual * function that overrides it). - * @param req Contains the address to write to. - * @param data The data to write. - * @return The fault condition of the access. + * @param pkt Contains information about the write operation + * @return Amount of time to do the read */ virtual Tick write(Packet *pkt); - /** - * Start up function to check if more than one person is using an interrupt line - * and print a warning if such a case exists - */ - virtual void startup(); + void addressRanges(AddrRangeList &range_list); - /** - * Serialize this object to the given output stream. - * @param os The stream to serialize to. - */ - virtual void serialize(std::ostream &os); + private: + Addr pioAddr; - /** - * Reconstruct the state of this object from a checkpoint. - * @param cp The checkpoint use. - * @param section The section name of this object - */ - virtual void unserialize(Checkpoint *cp, const std::string §ion); }; #endif // __PCICONFIGALL_HH__ diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc index f8db2efbc..e81e0d1ee 100644 --- a/src/dev/pcidev.cc +++ b/src/dev/pcidev.cc @@ -53,201 +53,268 @@ using namespace std; -PciDev::PciDev(Params *p) - : DmaDevice(p), plat(p->platform), configData(p->configData), - pioDelay(p->pio_delay) -{ - // copy the config data from the PciConfigData object - if (configData) { - memcpy(config.data, configData->config.data, sizeof(config.data)); - memcpy(BARSize, configData->BARSize, sizeof(BARSize)); - memcpy(BARAddrs, configData->BARAddrs, sizeof(BARAddrs)); - } else - panic("NULL pointer to configuration data"); - // Setup pointer in config space to point to this entry - if (p->configSpace->deviceExists(p->deviceNum, p->functionNum)) - panic("Two PCI devices occuping same dev: %#x func: %#x", - p->deviceNum, p->functionNum); - else - p->configSpace->registerDevice(p->deviceNum, p->functionNum, this); -} - -void -PciDev::readConfig(int offset, uint8_t *data) +PciDev::PciConfigPort::PciConfigPort(PciDev *dev, int busid, int devid, + int funcid, Platform *p) + : PioPort(dev,p->system,"-pciconf"), device(dev), platform(p), + busId(busid), deviceId(devid), functionId(funcid) { - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); + configAddr = platform->calcConfigAddr(busId, deviceId, functionId); +} - *data = config.data[offset]; - DPRINTF(PCIDEV, - "read device: %#x function: %#x register: %#x 1 bytes: data: %#x\n", - params()->deviceNum, params()->functionNum, offset, *data); +Tick +PciDev::PciConfigPort::recvAtomic(Packet *pkt) +{ + assert(pkt->result == Packet::Unknown); + assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + + PCI_CONFIG_SIZE); + return device->recvConfig(pkt); } void -PciDev::addressRanges(AddrRangeList &range_list) +PciDev::PciConfigPort::recvFunctional(Packet *pkt) { - int x = 0; - range_list.clear(); - for (x = 0; x < 6; x++) - if (BARAddrs[x] != 0) - range_list.push_back(RangeSize(BARAddrs[x],BARSize[x])); + assert(pkt->result == Packet::Unknown); + assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + + PCI_CONFIG_SIZE); + device->recvConfig(pkt); } void -PciDev::readConfig(int offset, uint16_t *data) +PciDev::PciConfigPort::getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) { - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); + snoop.clear(); + resp.push_back(RangeSize(configAddr, PCI_CONFIG_SIZE+1)); +} - *data = *(uint16_t*)&config.data[offset]; - DPRINTF(PCIDEV, - "read device: %#x function: %#x register: %#x 2 bytes: data: %#x\n", - params()->deviceNum, params()->functionNum, offset, *data); +bool +PciDev::PciConfigPort::recvTiming(Packet *pkt) +{ + if (pkt->result == Packet::Nacked) { + resendNacked(pkt); + } else { + assert(pkt->result == Packet::Unknown); + assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + + PCI_CONFIG_SIZE); + Tick latency = device->recvConfig(pkt); + // turn packet around to go back to requester + pkt->makeTimingResponse(); + sendTiming(pkt, latency); + } + return true; } -void -PciDev::readConfig(int offset, uint32_t *data) +PciDev::PciDev(Params *p) + : DmaDevice(p), plat(p->platform), configData(p->configData), + pioDelay(p->pio_delay), configDelay(p->config_delay), + configPort(NULL) { - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); + // copy the config data from the PciConfigData object + if (configData) { + memcpy(config.data, configData->config.data, sizeof(config.data)); + memcpy(BARSize, configData->BARSize, sizeof(BARSize)); + memcpy(BARAddrs, configData->BARAddrs, sizeof(BARAddrs)); + } else + panic("NULL pointer to configuration data"); - *data = *(uint32_t*)&config.data[offset]; + plat->registerPciDevice(0, p->deviceNum, p->functionNum, + letoh(configData->config.interruptLine)); +} - DPRINTF(PCIDEV, - "read device: %#x function: %#x register: %#x 4 bytes: data: %#x\n", - params()->deviceNum, params()->functionNum, offset, *data); +void +PciDev::init() +{ + if (!configPort) + panic("pci config port not connected to anything!"); + configPort->sendStatusChange(Port::RangeChange); + PioDevice::init(); } +unsigned int +PciDev::drain(Event *de) +{ + unsigned int count; + count = pioPort->drain(de) + dmaPort->drain(de) + configPort->drain(de); + if (count) + changeState(Draining); + else + changeState(Drained); + return count; +} -void -PciDev::writeConfig(int offset, const uint8_t data) +Tick +PciDev::readConfig(Packet *pkt) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset >= PCI_DEVICE_SPECIFIC) panic("Device specific PCI config space not implemented!\n"); - DPRINTF(PCIDEV, - "write device: %#x function: %#x reg: %#x size: 1 data: %#x\n", - params()->deviceNum, params()->functionNum, offset, data); - - switch (offset) { - case PCI0_INTERRUPT_LINE: - config.interruptLine = data; - case PCI_CACHE_LINE_SIZE: - config.cacheLineSize = data; - case PCI_LATENCY_TIMER: - config.latencyTimer = data; + pkt->allocate(); + + switch (pkt->getSize()) { + case sizeof(uint8_t): + pkt->set<uint8_t>(config.data[offset]); + DPRINTF(PCIDEV, + "read device: %#x function: %#x register: %#x 1 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint8_t>()); break; - /* Do nothing for these read-only registers */ - case PCI0_INTERRUPT_PIN: - case PCI0_MINIMUM_GRANT: - case PCI0_MAXIMUM_LATENCY: - case PCI_CLASS_CODE: - case PCI_REVISION_ID: + case sizeof(uint16_t): + pkt->set<uint16_t>(*(uint16_t*)&config.data[offset]); + DPRINTF(PCIDEV, + "read device: %#x function: %#x register: %#x 2 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint16_t>()); + break; + case sizeof(uint32_t): + pkt->set<uint32_t>(*(uint32_t*)&config.data[offset]); + DPRINTF(PCIDEV, + "read device: %#x function: %#x register: %#x 4 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint32_t>()); break; default: - panic("writing to a read only register"); + panic("invalid access size(?) for PCI configspace!\n"); } + pkt->result = Packet::Success; + return configDelay; + } void -PciDev::writeConfig(int offset, const uint16_t data) +PciDev::addressRanges(AddrRangeList &range_list) { - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); - - DPRINTF(PCIDEV, - "write device: %#x function: %#x reg: %#x size: 2 data: %#x\n", - params()->deviceNum, params()->functionNum, offset, data); - - switch (offset) { - case PCI_COMMAND: - config.command = data; - case PCI_STATUS: - config.status = data; - case PCI_CACHE_LINE_SIZE: - config.cacheLineSize = data; - break; - default: - panic("writing to a read only register"); - } + int x = 0; + range_list.clear(); + for (x = 0; x < 6; x++) + if (BARAddrs[x] != 0) + range_list.push_back(RangeSize(BARAddrs[x],BARSize[x])); } - -void -PciDev::writeConfig(int offset, const uint32_t data) +Tick +PciDev::writeConfig(Packet *pkt) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset >= PCI_DEVICE_SPECIFIC) panic("Device specific PCI config space not implemented!\n"); - DPRINTF(PCIDEV, - "write device: %#x function: %#x reg: %#x size: 4 data: %#x\n", - params()->deviceNum, params()->functionNum, offset, data); - - switch (offset) { - case PCI0_BASE_ADDR0: - case PCI0_BASE_ADDR1: - case PCI0_BASE_ADDR2: - case PCI0_BASE_ADDR3: - case PCI0_BASE_ADDR4: - case PCI0_BASE_ADDR5: - - uint32_t barnum, bar_mask; - Addr base_addr, base_size, space_base; - - barnum = BAR_NUMBER(offset); - - if (BAR_IO_SPACE(letoh(config.baseAddr[barnum]))) { - bar_mask = BAR_IO_MASK; - space_base = TSUNAMI_PCI0_IO; - } else { - bar_mask = BAR_MEM_MASK; - space_base = TSUNAMI_PCI0_MEMORY; + switch (pkt->getSize()) { + case sizeof(uint8_t): + switch (offset) { + case PCI0_INTERRUPT_LINE: + config.interruptLine = pkt->get<uint8_t>(); + case PCI_CACHE_LINE_SIZE: + config.cacheLineSize = pkt->get<uint8_t>(); + case PCI_LATENCY_TIMER: + config.latencyTimer = pkt->get<uint8_t>(); + break; + /* Do nothing for these read-only registers */ + case PCI0_INTERRUPT_PIN: + case PCI0_MINIMUM_GRANT: + case PCI0_MAXIMUM_LATENCY: + case PCI_CLASS_CODE: + case PCI_REVISION_ID: + break; + default: + panic("writing to a read only register"); } + DPRINTF(PCIDEV, + "write device: %#x function: %#x register: %#x 1 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint8_t>()); + break; + case sizeof(uint16_t): + switch (offset) { + case PCI_COMMAND: + config.command = pkt->get<uint8_t>(); + case PCI_STATUS: + config.status = pkt->get<uint8_t>(); + case PCI_CACHE_LINE_SIZE: + config.cacheLineSize = pkt->get<uint8_t>(); + break; + default: + panic("writing to a read only register"); + } + DPRINTF(PCIDEV, + "write device: %#x function: %#x register: %#x 2 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint16_t>()); + break; + case sizeof(uint32_t): + switch (offset) { + case PCI0_BASE_ADDR0: + case PCI0_BASE_ADDR1: + case PCI0_BASE_ADDR2: + case PCI0_BASE_ADDR3: + case PCI0_BASE_ADDR4: + case PCI0_BASE_ADDR5: + + uint32_t barnum, bar_mask; + Addr base_addr, base_size, space_base; + + barnum = BAR_NUMBER(offset); + + if (BAR_IO_SPACE(letoh(config.baseAddr[barnum]))) { + bar_mask = BAR_IO_MASK; + space_base = TSUNAMI_PCI0_IO; + } else { + bar_mask = BAR_MEM_MASK; + space_base = TSUNAMI_PCI0_MEMORY; + } - // Writing 0xffffffff to a BAR tells the card to set the - // value of the bar to size of memory it needs - if (letoh(data) == 0xffffffff) { - // This is I/O Space, bottom two bits are read only - - config.baseAddr[barnum] = letoh( - (~(BARSize[barnum] - 1) & ~bar_mask) | + // Writing 0xffffffff to a BAR tells the card to set the + // value of the bar to size of memory it needs + if (letoh(pkt->get<uint32_t>()) == 0xffffffff) { + // This is I/O Space, bottom two bits are read only + + config.baseAddr[barnum] = letoh( + (~(BARSize[barnum] - 1) & ~bar_mask) | + (letoh(config.baseAddr[barnum]) & bar_mask)); + } else { + config.baseAddr[barnum] = letoh( + (letoh(pkt->get<uint32_t>()) & ~bar_mask) | (letoh(config.baseAddr[barnum]) & bar_mask)); - } else { - config.baseAddr[barnum] = letoh( - (letoh(data) & ~bar_mask) | - (letoh(config.baseAddr[barnum]) & bar_mask)); - if (letoh(config.baseAddr[barnum]) & ~bar_mask) { - base_addr = (letoh(data) & ~bar_mask) + space_base; - base_size = BARSize[barnum]; - BARAddrs[barnum] = base_addr; + if (letoh(config.baseAddr[barnum]) & ~bar_mask) { + base_addr = (letoh(pkt->get<uint32_t>()) & ~bar_mask) + space_base; + base_size = BARSize[barnum]; + BARAddrs[barnum] = base_addr; - pioPort->sendStatusChange(Port::RangeChange); + pioPort->sendStatusChange(Port::RangeChange); + } } + break; + + case PCI0_ROM_BASE_ADDR: + if (letoh(pkt->get<uint32_t>()) == 0xfffffffe) + config.expansionROM = htole((uint32_t)0xffffffff); + else + config.expansionROM = pkt->get<uint32_t>(); + break; + + case PCI_COMMAND: + // This could also clear some of the error bits in the Status + // register. However they should never get set, so lets ignore + // it for now + config.command = pkt->get<uint32_t>(); + break; + + default: + DPRINTF(PCIDEV, "Writing to a read only register"); } + DPRINTF(PCIDEV, + "write device: %#x function: %#x register: %#x 4 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get<uint32_t>()); break; - - case PCI0_ROM_BASE_ADDR: - if (letoh(data) == 0xfffffffe) - config.expansionROM = htole((uint32_t)0xffffffff); - else - config.expansionROM = data; - break; - - case PCI_COMMAND: - // This could also clear some of the error bits in the Status - // register. However they should never get set, so lets ignore - // it for now - config.command = data; - break; - default: - DPRINTF(PCIDEV, "Writing to a read only register"); + panic("invalid access size(?) for PCI configspace!\n"); } + pkt->result = Packet::Success; + return configDelay; + } void diff --git a/src/dev/pcidev.hh b/src/dev/pcidev.hh index 92786427b..847fb07d0 100644 --- a/src/dev/pcidev.hh +++ b/src/dev/pcidev.hh @@ -47,8 +47,6 @@ #define BAR_IO_SPACE(x) ((x) & BAR_IO_SPACE_BIT) #define BAR_NUMBER(x) (((x) - PCI0_BASE_ADDR0) >> 0x2); -class PciConfigAll; - /** * This class encapulates the first 64 bytes of a singles PCI @@ -78,24 +76,43 @@ class PciConfigData : public SimObject Addr BARAddrs[6]; }; + /** * PCI device, base implemnation is only config space. - * Each device is connected to a PCIConfigSpace device - * which returns -1 for everything but the pcidevs that - * register with it. This object registers with the PCIConfig space - * object. */ class PciDev : public DmaDevice { - public: - struct Params : public ::PioDevice::Params + class PciConfigPort : public PioPort { - /** - * A pointer to the configspace all object that calls us when - * a read comes to this particular device/function. - */ - PciConfigAll *configSpace; + protected: + PciDev *device; + + virtual bool recvTiming(Packet *pkt); + + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt) ; + + virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop); + + Platform *platform; + + int busId; + int deviceId; + int functionId; + + Addr configAddr; + + public: + PciConfigPort(PciDev *dev, int busid, int devid, int funcid, + Platform *p); + friend class PioPort::SendEvent; + }; + + public: + struct Params : public PioDevice::Params + { /** * A pointer to the object that contains the first 64 bytes of * config space @@ -113,6 +130,9 @@ class PciDev : public DmaDevice /** The latency for pio accesses. */ Tick pio_delay; + + /** The latency for a config access. */ + Tick config_delay; }; public: @@ -164,6 +184,25 @@ class PciDev : public DmaDevice Platform *plat; PciConfigData *configData; Tick pioDelay; + Tick configDelay; + PciConfigPort *configPort; + + /** + * Write to the PCI config space data that is stored locally. This may be + * overridden by the device but at some point it will eventually call this + * for normal operations that it does not need to override. + * @param pkt packet containing the write the offset into config space + */ + virtual Tick writeConfig(Packet *pkt); + + + /** + * Read from the PCI config space data that is stored locally. This may be + * overridden by the device but at some point it will eventually call this + * for normal operations that it does not need to override. + * @param pkt packet containing the write the offset into config space + */ + virtual Tick readConfig(Packet *pkt); public: Addr pciToDma(Addr pciAddr) const @@ -171,21 +210,25 @@ class PciDev : public DmaDevice void intrPost() - { plat->postPciInt(configData->config.interruptLine); } + { plat->postPciInt(letoh(configData->config.interruptLine)); } void intrClear() - { plat->clearPciInt(configData->config.interruptLine); } + { plat->clearPciInt(letoh(configData->config.interruptLine)); } uint8_t interruptLine() - { return configData->config.interruptLine; } + { return letoh(configData->config.interruptLine); } /** return the address ranges that this device responds to. * @params range_list range list to populate with ranges */ void addressRanges(AddrRangeList &range_list); + /** Do a PCI Configspace memory access. */ + Tick recvConfig(Packet *pkt) + { return pkt->isRead() ? readConfig(pkt) : writeConfig(pkt); } + /** * Constructor for PCI Dev. This function copies data from the * config file object PCIConfigData and registers the device with @@ -193,30 +236,7 @@ class PciDev : public DmaDevice */ PciDev(Params *params); - /** - * Write to the PCI config space data that is stored locally. This may be - * overridden by the device but at some point it will eventually call this - * for normal operations that it does not need to override. - * @param offset the offset into config space - * @param size the size of the write - * @param data the data to write - */ - virtual void writeConfig(int offset, const uint8_t data); - virtual void writeConfig(int offset, const uint16_t data); - virtual void writeConfig(int offset, const uint32_t data); - - - /** - * Read from the PCI config space data that is stored locally. This may be - * overridden by the device but at some point it will eventually call this - * for normal operations that it does not need to override. - * @param offset the offset into config space - * @param size the size of the read - * @param data pointer to the location where the read value should be stored - */ - virtual void readConfig(int offset, uint8_t *data); - virtual void readConfig(int offset, uint16_t *data); - virtual void readConfig(int offset, uint32_t *data); + virtual void init(); /** * Serialize this object to the given output stream. @@ -230,5 +250,22 @@ class PciDev : public DmaDevice * @param section The section name of this object */ virtual void unserialize(Checkpoint *cp, const std::string §ion); + + + virtual unsigned int drain(Event *de); + + virtual Port *getPort(const std::string &if_name, int idx = -1) + { + if (if_name == "config") { + if (configPort != NULL) + panic("pciconfig port already connected to."); + configPort = new PciConfigPort(this, params()->busNum, + params()->deviceNum, params()->functionNum, + params()->platform); + return configPort; + } + return DmaDevice::getPort(if_name, idx); + } + }; #endif // __DEV_PCIDEV_HH__ diff --git a/src/dev/pcireg.h b/src/dev/pcireg.h index 0aa4ba8ef..a48abd4fa 100644 --- a/src/dev/pcireg.h +++ b/src/dev/pcireg.h @@ -142,6 +142,7 @@ union PCIConfig { // Device specific offsets #define PCI_DEVICE_SPECIFIC 0x40 // 192 bytes +#define PCI_CONFIG_SIZE 0xFF // Some Vendor IDs #define PCI_VENDOR_DEC 0x1011 diff --git a/src/dev/platform.cc b/src/dev/platform.cc index ed021e3b6..8546b7805 100644 --- a/src/dev/platform.cc +++ b/src/dev/platform.cc @@ -63,5 +63,21 @@ Platform::pciToDma(Addr pciAddr) const panic("No PCI dma support in platform."); } +void +Platform::registerPciDevice(uint8_t bus, uint8_t dev, uint8_t func, uint8_t intr) +{ + uint32_t bdf = bus << 16 | dev << 8 | func << 0; + if (pciDevices.find(bdf) != pciDevices.end()) + fatal("Two PCI devices have same bus:device:function\n"); + + if (intLines.test(intr)) + fatal("Two PCI devices have same interrupt line: %d\n", intr); + + pciDevices.insert(bdf); + + intLines.set(intr); +} + + DEFINE_SIM_OBJECT_CLASS_NAME("Platform", Platform) diff --git a/src/dev/platform.hh b/src/dev/platform.hh index 0e6f4ba4a..1940dcad6 100644 --- a/src/dev/platform.hh +++ b/src/dev/platform.hh @@ -37,6 +37,9 @@ #ifndef __DEV_PLATFORM_HH__ #define __DEV_PLATFORM_HH__ +#include <bitset> +#include <set> + #include "sim/sim_object.hh" #include "arch/isa_traits.hh" @@ -52,9 +55,6 @@ class Platform : public SimObject /** Pointer to the interrupt controller */ IntrControl *intrctrl; - /** Pointer to the PCI configuration space */ - PciConfigAll *pciconfig; - /** Pointer to the UART, set by the uart */ Uart *uart; @@ -64,13 +64,20 @@ class Platform : public SimObject public: Platform(const std::string &name, IntrControl *intctrl); virtual ~Platform(); - virtual void init() { if (pciconfig == NULL) panic("PCI Config not set"); } virtual void postConsoleInt() = 0; virtual void clearConsoleInt() = 0; virtual Tick intrFrequency() = 0; virtual void postPciInt(int line); virtual void clearPciInt(int line); virtual Addr pciToDma(Addr pciAddr) const; + virtual Addr calcConfigAddr(int bus, int dev, int func) = 0; + virtual void registerPciDevice(uint8_t bus, uint8_t dev, uint8_t func, + uint8_t intr); + + private: + std::bitset<256> intLines; + std::set<uint32_t> pciDevices; + }; #endif // __DEV_PLATFORM_HH__ diff --git a/src/dev/sinic.cc b/src/dev/sinic.cc index a0223733b..815cecca5 100644 --- a/src/dev/sinic.cc +++ b/src/dev/sinic.cc @@ -37,7 +37,6 @@ #include "cpu/intr_control.hh" #include "dev/etherlink.hh" #include "dev/sinic.hh" -#include "dev/pciconfigall.hh" #include "mem/packet.hh" #include "sim/builder.hh" #include "sim/debug.hh" @@ -922,7 +921,7 @@ Device::rxKick() break; case rxBeginCopy: - if (dmaPending()) + if (dmaPending() || getState() != Running) goto exit; rxDmaAddr = params()->platform->pciToDma( @@ -1110,7 +1109,7 @@ Device::txKick() break; case txBeginCopy: - if (dmaPending()) + if (dmaPending() || getState() != Running) goto exit; txDmaAddr = params()->platform->pciToDma( @@ -1288,6 +1287,18 @@ Device::recvPacket(EthPacketPtr packet) return true; } +void +Device::resume() +{ + SimObject::resume(); + + // During drain we could have left the state machines in a waiting state and + // they wouldn't get out until some other event occured to kick them. + // This way they'll get out immediately + txKick(); + rxKick(); +} + //===================================================================== // // @@ -1623,7 +1634,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Device) SimObjectParam<System *> system; SimObjectParam<Platform *> platform; - SimObjectParam<PciConfigAll *> configspace; SimObjectParam<PciConfigData *> configdata; Param<uint32_t> pci_bus; Param<uint32_t> pci_dev; @@ -1666,7 +1676,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Device) INIT_PARAM(system, "System pointer"), INIT_PARAM(platform, "Platform pointer"), - INIT_PARAM(configspace, "PCI Configspace"), INIT_PARAM(configdata, "PCI Config data"), INIT_PARAM(pci_bus, "PCI bus ID"), INIT_PARAM(pci_dev, "PCI device number"), @@ -1711,7 +1720,6 @@ CREATE_SIM_OBJECT(Device) params->name = getInstanceName(); params->platform = platform; params->system = system; - params->configSpace = configspace; params->configData = configdata; params->busNum = pci_bus; params->deviceNum = pci_dev; diff --git a/src/dev/sinic.hh b/src/dev/sinic.hh index f6c229039..eece4ba6b 100644 --- a/src/dev/sinic.hh +++ b/src/dev/sinic.hh @@ -266,6 +266,7 @@ class Device : public Base public: virtual Tick read(Packet *pkt); virtual Tick write(Packet *pkt); + virtual void resume(); void prepareIO(int cpu, int index); void prepareRead(int cpu, int index); diff --git a/src/dev/tsunami.cc b/src/dev/tsunami.cc index c9e15581d..8e740a72f 100644 --- a/src/dev/tsunami.cc +++ b/src/dev/tsunami.cc @@ -95,6 +95,13 @@ Tsunami::pciToDma(Addr pciAddr) const return pchip->translatePciToDma(pciAddr); } + +Addr +Tsunami::calcConfigAddr(int bus, int dev, int func) +{ + return pchip->calcConfigAddr(bus, dev, func); +} + void Tsunami::serialize(std::ostream &os) { diff --git a/src/dev/tsunami.hh b/src/dev/tsunami.hh index 13fc4417c..8bb66e914 100644 --- a/src/dev/tsunami.hh +++ b/src/dev/tsunami.hh @@ -113,9 +113,15 @@ class Tsunami : public Platform */ virtual void clearPciInt(int line); + virtual Addr pciToDma(Addr pciAddr) const; /** + * Calculate the configuration address given a bus/dev/func. + */ + virtual Addr calcConfigAddr(int bus, int dev, int func); + + /** * Serialize this object to the given output stream. * @param os The stream to serialize to. */ diff --git a/src/dev/tsunami_pchip.cc b/src/dev/tsunami_pchip.cc index a376b908d..8a542b9b0 100644 --- a/src/dev/tsunami_pchip.cc +++ b/src/dev/tsunami_pchip.cc @@ -302,6 +302,17 @@ TsunamiPChip::translatePciToDma(Addr busAddr) // if no match was found, then return the original address return busAddr; } +Addr +TsunamiPChip::calcConfigAddr(int bus, int dev, int func) +{ + assert(func < 8); + assert(dev < 32); + assert(bus == 0); + + return TsunamiPciBus0Config | (func << 8) | (dev << 11); +} + + void TsunamiPChip::serialize(std::ostream &os) diff --git a/src/dev/tsunami_pchip.hh b/src/dev/tsunami_pchip.hh index 9f80f7d68..2c97a1fea 100644 --- a/src/dev/tsunami_pchip.hh +++ b/src/dev/tsunami_pchip.hh @@ -45,6 +45,9 @@ class TsunamiPChip : public BasicPioDevice { protected: + + static const Addr TsunamiPciBus0Config = ULL(0x801fe000000); + /** Pchip control register */ uint64_t pctl; @@ -80,6 +83,8 @@ class TsunamiPChip : public BasicPioDevice */ Addr translatePciToDma(Addr busAddr); + Addr calcConfigAddr(int bus, int dev, int func); + virtual Tick read(Packet *pkt); virtual Tick write(Packet *pkt); diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 19a3dc9e4..31271106b 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -33,6 +33,7 @@ */ +#include "base/misc.hh" #include "base/trace.hh" #include "mem/bus.hh" #include "sim/builder.hh" @@ -40,6 +41,14 @@ Port * Bus::getPort(const std::string &if_name, int idx) { + if (if_name == "default") + if (defaultPort == NULL) { + defaultPort = new BusPort(csprintf("%s-default",name()), this, + defaultId); + return defaultPort; + } else + fatal("Default port already set\n"); + // if_name ignored? forced to be empty? int id = interfaces.size(); BusPort *bp = new BusPort(csprintf("%s-p%d", name(), id), this, id); @@ -47,11 +56,12 @@ Bus::getPort(const std::string &if_name, int idx) return bp; } -/** Get the ranges of anyone that we are connected to. */ +/** Get the ranges of anyone other buses that we are connected to. */ void Bus::init() { std::vector<Port*>::iterator intIter; + for (intIter = interfaces.begin(); intIter != interfaces.end(); intIter++) (*intIter)->sendStatusChange(Port::RangeChange); } @@ -110,6 +120,7 @@ Bus::findPort(Addr addr, int id) int dest_id = -1; int i = 0; bool found = false; + AddrRangeIter iter; while (i < portList.size() && !found) { @@ -120,8 +131,18 @@ Bus::findPort(Addr addr, int id) } i++; } - if (dest_id == -1) + + // Check if this matches the default range + if (dest_id == -1) { + for (iter = defaultRange.begin(); iter != defaultRange.end(); iter++) { + if (*iter == addr) { + DPRINTF(Bus, " found addr 0x%llx on default\n", addr); + return defaultPort; + } + } panic("Unable to find destination for addr: %llx", addr); + } + // we shouldn't be sending this back to where it came from assert(dest_id != id); @@ -155,39 +176,52 @@ Bus::recvFunctional(Packet *pkt) void Bus::recvStatusChange(Port::Status status, int id) { + AddrRangeList ranges; + AddrRangeList snoops; + int x; + AddrRangeIter iter; + assert(status == Port::RangeChange && "The other statuses need to be implemented."); DPRINTF(BusAddrRanges, "received RangeChange from device id %d\n", id); - assert(id < interfaces.size() && id >= 0); - int x; - Port *port = interfaces[id]; - AddrRangeList ranges; - AddrRangeList snoops; - AddrRangeIter iter; - std::vector<DevMap>::iterator portIter; + if (id == defaultId) { + defaultRange.clear(); + defaultPort->getPeerAddressRanges(ranges, snoops); + assert(snoops.size() == 0); + for(iter = ranges.begin(); iter != ranges.end(); iter++) { + defaultRange.push_back(*iter); + DPRINTF(BusAddrRanges, "Adding range %llx - %llx for default\n", + iter->start, iter->end); + } + } else { - // Clean out any previously existent ids - for (portIter = portList.begin(); portIter != portList.end(); ) { - if (portIter->portId == id) - portIter = portList.erase(portIter); - else - portIter++; - } + assert((id < interfaces.size() && id >= 0) || id == -1); + Port *port = interfaces[id]; + std::vector<DevMap>::iterator portIter; + + // Clean out any previously existent ids + for (portIter = portList.begin(); portIter != portList.end(); ) { + if (portIter->portId == id) + portIter = portList.erase(portIter); + else + portIter++; + } - port->getPeerAddressRanges(ranges, snoops); + port->getPeerAddressRanges(ranges, snoops); - // not dealing with snooping yet either - assert(snoops.size() == 0); - for(iter = ranges.begin(); iter != ranges.end(); iter++) { - DevMap dm; - dm.portId = id; - dm.range = *iter; + // not dealing with snooping yet either + assert(snoops.size() == 0); + for(iter = ranges.begin(); iter != ranges.end(); iter++) { + DevMap dm; + dm.portId = id; + dm.range = *iter; - DPRINTF(BusAddrRanges, "Adding range %llx - %llx for id %d\n", - dm.range.start, dm.range.end, id); - portList.push_back(dm); + DPRINTF(BusAddrRanges, "Adding range %llx - %llx for id %d\n", + dm.range.start, dm.range.end, id); + portList.push_back(dm); + } } DPRINTF(MMU, "port list has %d entries\n", portList.size()); @@ -196,19 +230,47 @@ Bus::recvStatusChange(Port::Status status, int id) for (x = 0; x < interfaces.size(); x++) if (x != id) interfaces[x]->sendStatusChange(Port::RangeChange); + + if (id != defaultId && defaultPort) + defaultPort->sendStatusChange(Port::RangeChange); } void Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) { std::vector<DevMap>::iterator portIter; + AddrRangeIter dflt_iter; + bool subset; resp.clear(); snoop.clear(); DPRINTF(BusAddrRanges, "received address range request, returning:\n"); + + for (dflt_iter = defaultRange.begin(); dflt_iter != defaultRange.end(); + dflt_iter++) { + resp.push_back(*dflt_iter); + DPRINTF(BusAddrRanges, " -- %#llX : %#llX\n",dflt_iter->start, + dflt_iter->end); + } for (portIter = portList.begin(); portIter != portList.end(); portIter++) { - if (portIter->portId != id) { + subset = false; + for (dflt_iter = defaultRange.begin(); dflt_iter != defaultRange.end(); + dflt_iter++) { + if ((portIter->range.start < dflt_iter->start && + portIter->range.end >= dflt_iter->start) || + (portIter->range.start < dflt_iter->end && + portIter->range.end >= dflt_iter->end)) + fatal("Devices can not set ranges that itersect the default set\ + but are not a subset of the default set.\n"); + if (portIter->range.start >= dflt_iter->start && + portIter->range.end <= dflt_iter->end) { + subset = true; + DPRINTF(BusAddrRanges, " -- %#llX : %#llX is a SUBSET\n", + portIter->range.start, portIter->range.end); + } + } + if (portIter->portId != id && !subset) { resp.push_back(portIter->range); DPRINTF(BusAddrRanges, " -- %#llX : %#llX\n", portIter->range.start, portIter->range.end); diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 9c7054b94..3a2896886 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -51,19 +51,22 @@ class Bus : public MemObject /** a globally unique id for this bus. */ int busId; + static const int defaultId = -1; + struct DevMap { int portId; Range<Addr> range; }; std::vector<DevMap> portList; + AddrRangeList defaultRange; /** Function called by the port when the bus is recieving a Timing - transaction.*/ + transaction.*/ bool recvTiming(Packet *pkt); /** Function called by the port when the bus is recieving a Atomic - transaction.*/ + transaction.*/ Tick recvAtomic(Packet *pkt); /** Function called by the port when the bus is recieving a Functional @@ -159,6 +162,9 @@ class Bus : public MemObject * original send failed for whatever reason.*/ std::list<Port*> retryList; + /** Port that handles requests that don't match any of the interfaces.*/ + Port *defaultPort; + public: /** A function used to return the port associated with this bus object. */ @@ -167,7 +173,7 @@ class Bus : public MemObject virtual void init(); Bus(const std::string &n, int bus_id) - : MemObject(n), busId(bus_id) {} + : MemObject(n), busId(bus_id), defaultPort(NULL) {} }; diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index aaaf1bdef..451da28e8 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -59,7 +59,7 @@ void BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) { - cache->getAddressRanges(resp, snoop); + cache->getAddressRanges(resp, snoop, isCpuSide); } int @@ -98,6 +98,56 @@ BaseCache::CachePort::clearBlocked() blocked = false; } +BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort) + : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort) +{ + this->setFlags(AutoDelete); + pkt = NULL; +} + +BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort, Packet *_pkt) + : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort), pkt(_pkt) +{ + this->setFlags(AutoDelete); +} + +void +BaseCache::CacheEvent::process() +{ + if (!pkt) + { + if (!cachePort->isCpuSide) + { + pkt = cachePort->cache->getPacket(); + bool success = cachePort->sendTiming(pkt); + DPRINTF(Cache, "Address %x was %s in sending the timing request\n", + pkt->getAddr(), success ? "succesful" : "unsuccesful"); + cachePort->cache->sendResult(pkt, success); + if (success && cachePort->cache->doMasterRequest()) + { + //Still more to issue, rerequest in 1 cycle + pkt = NULL; + this->schedule(curTick+1); + } + } + else + { + pkt = cachePort->cache->getCoherencePacket(); + cachePort->sendTiming(pkt); + } + return; + } + //Know the packet to send, no need to mark in service (must succed) + bool success = cachePort->sendTiming(pkt); + assert(success); +} + +const char * +BaseCache::CacheEvent::description() +{ + return "timing event\n"; +} + Port* BaseCache::getPort(const std::string &if_name, int idx) { @@ -107,7 +157,13 @@ BaseCache::getPort(const std::string &if_name, int idx) cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); return cpuSidePort; } - if (if_name == "functional") + else if (if_name == "functional") + { + if(cpuSidePort == NULL) + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + return cpuSidePort; + } + else if (if_name == "cpu_side") { if(cpuSidePort == NULL) cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); @@ -124,6 +180,14 @@ BaseCache::getPort(const std::string &if_name, int idx) } void +BaseCache::init() +{ + if (!cpuSidePort || !memSidePort) + panic("Cache not hooked up on both sides\n"); + cpuSidePort->sendStatusChange(Port::RangeChange); +} + +void BaseCache::regStats() { Request temp_req((Addr) NULL, 4, 0); diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 2754fab5a..0d1bfdfdb 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -79,9 +79,9 @@ class BaseCache : public MemObject { class CachePort : public Port { + public: BaseCache *cache; - public: CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide); protected: @@ -110,10 +110,11 @@ class BaseCache : public MemObject struct CacheEvent : public Event { - Packet *pkt; CachePort *cachePort; + Packet *pkt; - CacheEvent(Packet *pkt, CachePort *cachePort); + CacheEvent(CachePort *_cachePort); + CacheEvent(CachePort *_cachePort, Packet *_pkt); void process(); const char *description(); }; @@ -142,11 +143,37 @@ class BaseCache : public MemObject fatal("No implementation"); } - virtual void recvStatusChange(Port::Status status, bool isCpuSide) + void recvStatusChange(Port::Status status, bool isCpuSide) + { + if (status == Port::RangeChange) + { + if (!isCpuSide) + { + cpuSidePort->sendStatusChange(Port::RangeChange); + } + else + { + memSidePort->sendStatusChange(Port::RangeChange); + } + } + } + + virtual Packet *getPacket() { fatal("No implementation"); } + virtual Packet *getCoherencePacket() + { + fatal("No implementation"); + } + + virtual void sendResult(Packet* &pkt, bool success) + { + + fatal("No implementation"); + } + /** * Bit vector of the blocking reasons for the access path. * @sa #BlockedCause @@ -303,6 +330,8 @@ class BaseCache : public MemObject memSidePort = NULL; } + virtual void init(); + /** * Query block size of a cache. * @return The block size @@ -388,7 +417,6 @@ class BaseCache : public MemObject if (!isBlockedForSnoop()) { memSidePort->clearBlocked(); } - } /** @@ -407,10 +435,13 @@ class BaseCache : public MemObject */ void setMasterRequest(RequestCause cause, Tick time) { + if (!doMasterRequest()) + { + BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(memSidePort); + reqCpu->schedule(time); + } uint8_t flag = 1<<cause; masterRequests |= flag; - assert("Implement\n" && 0); -// mi->pktuest(time); } /** @@ -462,8 +493,10 @@ class BaseCache : public MemObject */ void respond(Packet *pkt, Tick time) { - assert("Implement\n" && 0); -// si->respond(pkt,time); + pkt->makeTimingResponse(); + pkt->result = Packet::Success; + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); } /** @@ -476,8 +509,10 @@ class BaseCache : public MemObject if (!pkt->req->isUncacheable()) { missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time; } - assert("Implement\n" && 0); -// si->respond(pkt,time); + pkt->makeTimingResponse(); + pkt->result = Packet::Success; + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); } /** @@ -496,9 +531,18 @@ class BaseCache : public MemObject */ void rangeChange() {} - void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) + void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop, bool isCpuSide) { - panic("Unimplimented\n"); + if (isCpuSide) + { + AddrRangeList dummy; + memSidePort->getPeerAddressRanges(resp, dummy); + } + else + { + //This is where snoops get updated + return; + } } }; diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 1243c9d9e..ec5b800a8 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -168,14 +168,14 @@ class Cache : public BaseCache * Selects a request to send on the bus. * @return The memory request to service. */ - Packet * getPacket(); + virtual Packet * getPacket(); /** * Was the request was sent successfully? * @param req The request. * @param success True if the request was sent successfully. */ - void sendResult(Packet * &pkt, bool success); + virtual void sendResult(Packet * &pkt, bool success); /** * Handles a response (cache line fill/write ack) from the bus. @@ -202,7 +202,7 @@ class Cache : public BaseCache * Selects a coherence message to forward to lower levels of the hierarchy. * @return The coherence message to forward. */ - Packet * getCoherenceReq(); + virtual Packet * getCoherencePacket(); /** * Snoops bus transactions to maintain coherence. @@ -242,17 +242,6 @@ class Cache : public BaseCache } /** - * Send a response to the slave interface. - * @param req The request being responded to. - * @param time The time the response is ready. - */ - void respond(Packet * &pkt, Tick time) - { - //si->respond(pkt,time); - cpuSidePort->sendAtomic(pkt); - } - - /** * Perform the access specified in the request and return the estimated * time of completion. This function can either update the hierarchy state * or just perform the access wherever the data is found depending on the diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index aae5cbf01..a447ae3d5 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -350,7 +350,7 @@ Cache<TagStore,Buffering,Coherence>::pseudoFill(MSHR *mshr) template<class TagStore, class Buffering, class Coherence> Packet * -Cache<TagStore,Buffering,Coherence>::getCoherenceReq() +Cache<TagStore,Buffering,Coherence>::getCoherencePacket() { return coherence->getPacket(); } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index da0448ad3..4a3dc1062 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -352,7 +352,7 @@ MissQueue::setPrefetcher(BasePrefetcher *_prefetcher) MSHR* MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) { - MSHR* mshr = mq.allocate(pkt, size); + MSHR* mshr = mq.allocate(pkt, blkSize); mshr->order = order++; if (!pkt->req->isUncacheable() ){//&& !pkt->isNoAllocate()) { // Mark this as a cache line fill @@ -372,7 +372,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) MSHR* MissQueue::allocateWrite(Packet * &pkt, int size, Tick time) { - MSHR* mshr = wb.allocate(pkt,pkt->getSize()); + MSHR* mshr = wb.allocate(pkt,blkSize); mshr->order = order++; //REMOVING COMPRESSION FOR NOW @@ -446,11 +446,11 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) /** * @todo Add write merging here. */ - mshr = allocateWrite(pkt, pkt->getSize(), time); + mshr = allocateWrite(pkt, blkSize, time); return; } - mshr = allocateMiss(pkt, size, time); + mshr = allocateMiss(pkt, blkSize, time); } MSHR* diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 1a85d3018..db2f40c56 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -57,6 +57,7 @@ void MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, Packet * &target) { + addr = _addr; if (target) { //Have a request, just use it diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 2b97ab0c1..534db0077 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -183,19 +183,19 @@ class Packet ReadReq = IsRead | IsRequest | NeedsResponse, WriteReq = IsWrite | IsRequest | NeedsResponse, WriteReqNoAck = IsWrite | IsRequest, - ReadResp = IsRead | IsResponse, - WriteResp = IsWrite | IsResponse, + ReadResp = IsRead | IsResponse | NeedsResponse, + WriteResp = IsWrite | IsResponse | NeedsResponse, Writeback = IsWrite | IsRequest, SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, - SoftPFResp = IsRead | IsRequest | IsSWPrefetch | IsResponse, - HardPFResp = IsRead | IsRequest | IsHWPrefetch | IsResponse, + SoftPFResp = IsRead | IsResponse | IsSWPrefetch | NeedsResponse, + HardPFResp = IsRead | IsResponse | IsHWPrefetch | NeedsResponse, InvalidateReq = IsInvalidate | IsRequest, WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, - UpgradeReq = IsInvalidate | NeedsResponse, - UpgradeResp = IsInvalidate | IsResponse, - ReadExReq = IsRead | IsInvalidate | NeedsResponse, - ReadExResp = IsRead | IsInvalidate | IsResponse + UpgradeReq = IsInvalidate | IsRequest | NeedsResponse, + UpgradeResp = IsInvalidate | IsResponse | NeedsResponse, + ReadExReq = IsRead | IsInvalidate | IsRequest | NeedsResponse, + ReadExResp = IsRead | IsInvalidate | IsResponse | NeedsResponse }; /** Return the string name of the cmd field (for debugging and @@ -247,7 +247,7 @@ class Packet Addr getAddr() const { assert(addrSizeValid); return addr; } int getSize() const { assert(addrSizeValid); return size; } - Addr getOffset(int blkSize) const { return req->getPaddr() & (Addr)(blkSize - 1); } + Addr getOffset(int blkSize) const { return addr & (Addr)(blkSize - 1); } void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; } void cmdOverride(Command newCmd) { cmd = newCmd; } @@ -311,8 +311,9 @@ class Packet * should not be called. */ void makeTimingResponse() { assert(needsResponse()); + assert(isRequest()); int icmd = (int)cmd; - icmd &= ~(IsRequest | NeedsResponse); + icmd &= ~(IsRequest); icmd |= IsResponse; cmd = (Command)icmd; dest = src; diff --git a/src/python/SConscript b/src/python/SConscript index 3a9def9a8..c9e713199 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -75,16 +75,27 @@ def addPkg(pkgdir): # build_env flags. def MakeDefinesPyFile(target, source, env): f = file(str(target[0]), 'w') - print >>f, "m5_build_env = ", - print >>f, source[0] + print >>f, "m5_build_env = ", source[0] f.close() optionDict = dict([(opt, env[opt]) for opt in env.ExportOptions]) env.Command('m5/defines.py', Value(optionDict), MakeDefinesPyFile) +def MakeInfoPyFile(target, source, env): + f = file(str(target[0]), 'w') + for src in source: + data = ''.join(file(src.srcnode().abspath, 'r').xreadlines()) + print >>f, "%s = %s" % (src, repr(data)) + f.close() + +env.Command('m5/info.py', + [ '#/AUTHORS', '#/LICENSE', '#/README', '#/RELEASE_NOTES' ], + MakeInfoPyFile) + # Now specify the packages & files for the zip archive. addPkg('m5') pyzip_files.append('m5/defines.py') +pyzip_files.append('m5/info.py') pyzip_files.append(join(env['ROOT'], 'util/pbs/jobfile.py')) env.Command(['swig/cc_main_wrap.cc', 'm5/cc_main.py'], diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 828165d15..3d0e3defa 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -27,14 +27,14 @@ # Authors: Nathan Binkert # Steve Reinhardt -import sys, os, time, atexit, optparse +import atexit, os, sys # import the SWIG-wrapped main C++ functions import cc_main # import a few SWIG-wrapped items (those that are likely to be used # directly by user scripts) completely into this module for # convenience -from cc_main import simulate, SimLoopExitEvent, setCheckpointDir +from cc_main import simulate, SimLoopExitEvent # import the m5 compile options import defines @@ -57,111 +57,6 @@ def AddToPath(path): # so place the new dir right after that. sys.path.insert(1, path) - -# The m5 module's pointer to the parsed options object -options = None - - -# User should call this function after calling parse_args() to pass -# parsed standard option values back into the m5 module for -# processing. -def setStandardOptions(_options): - # Set module global var - global options - options = _options - # tell C++ about output directory - cc_main.setOutputDir(options.outdir) - -# Callback to set trace flags. Not necessarily the best way to do -# things in the long run (particularly if we change how these global -# options are handled). -def setTraceFlags(option, opt_str, value, parser): - objects.Trace.flags = value - -def setTraceStart(option, opt_str, value, parser): - objects.Trace.start = value - -def setTraceFile(option, opt_str, value, parser): - objects.Trace.file = value - -def noPCSymbol(option, opt_str, value, parser): - objects.ExecutionTrace.pc_symbol = False - -def noPrintCycle(option, opt_str, value, parser): - objects.ExecutionTrace.print_cycle = False - -def noPrintOpclass(option, opt_str, value, parser): - objects.ExecutionTrace.print_opclass = False - -def noPrintThread(option, opt_str, value, parser): - objects.ExecutionTrace.print_thread = False - -def noPrintEA(option, opt_str, value, parser): - objects.ExecutionTrace.print_effaddr = False - -def noPrintData(option, opt_str, value, parser): - objects.ExecutionTrace.print_data = False - -def printFetchseq(option, opt_str, value, parser): - objects.ExecutionTrace.print_fetchseq = True - -def printCpseq(option, opt_str, value, parser): - objects.ExecutionTrace.print_cpseq = True - -def dumpOnExit(option, opt_str, value, parser): - objects.Trace.dump_on_exit = True - -def debugBreak(option, opt_str, value, parser): - objects.Debug.break_cycles = value - -def statsTextFile(option, opt_str, value, parser): - objects.Statistics.text_file = value - -# Standard optparse options. Need to be explicitly included by the -# user script when it calls optparse.OptionParser(). -standardOptions = [ - optparse.make_option("--outdir", type="string", default="."), - optparse.make_option("--traceflags", type="string", action="callback", - callback=setTraceFlags), - optparse.make_option("--tracestart", type="int", action="callback", - callback=setTraceStart), - optparse.make_option("--tracefile", type="string", action="callback", - callback=setTraceFile), - optparse.make_option("--nopcsymbol", - action="callback", callback=noPCSymbol, - help="Disable PC symbols in trace output"), - optparse.make_option("--noprintcycle", - action="callback", callback=noPrintCycle, - help="Don't print cycle numbers in trace output"), - optparse.make_option("--noprintopclass", - action="callback", callback=noPrintOpclass, - help="Don't print op class type in trace output"), - optparse.make_option("--noprintthread", - action="callback", callback=noPrintThread, - help="Don't print thread number in trace output"), - optparse.make_option("--noprinteffaddr", - action="callback", callback=noPrintEA, - help="Don't print effective address in trace output"), - optparse.make_option("--noprintdata", - action="callback", callback=noPrintData, - help="Don't print result data in trace output"), - optparse.make_option("--printfetchseq", - action="callback", callback=printFetchseq, - help="Print fetch sequence numbers in trace output"), - optparse.make_option("--printcpseq", - action="callback", callback=printCpseq, - help="Print correct path sequence numbers in trace output"), - optparse.make_option("--dumponexit", - action="callback", callback=dumpOnExit, - help="Dump trace buffer on exit"), - optparse.make_option("--debugbreak", type="int", metavar="CYCLE", - action="callback", callback=debugBreak, - help="Cycle to create a breakpoint"), - optparse.make_option("--statsfile", type="string", action="callback", - callback=statsTextFile, metavar="FILE", - help="Sets the output file for the statistics") - ] - # make a SmartDict out of the build options for our local use import smartdict build_env = smartdict.SmartDict() @@ -171,12 +66,13 @@ build_env.update(defines.m5_build_env) env = smartdict.SmartDict() env.update(os.environ) - # Function to provide to C++ so it can look up instances based on paths def resolveSimObject(name): obj = config.instanceDict[name] return obj.getCCObject() +from main import options, arguments, main + # The final hook to generate .ini files. Called from the user script # once the config is built. def instantiate(root): @@ -213,35 +109,50 @@ atexit.register(cc_main.doExitCleanup) # matter since most scripts will probably 'from m5.objects import *'. import objects -def doQuiesce(root): - quiesce = cc_main.createCountedQuiesce() - unready_objects = root.startQuiesce(quiesce, True) - # If we've got some objects that can't quiesce immediately, then simulate +# This loops until all objects have been fully drained. +def doDrain(root): + all_drained = drain(root) + while (not all_drained): + all_drained = drain(root) + +# Tries to drain all objects. Draining might not be completed unless +# all objects return that they are drained on the first call. This is +# because as objects drain they may cause other objects to no longer +# be drained. +def drain(root): + all_drained = False + drain_event = cc_main.createCountedDrain() + unready_objects = root.startDrain(drain_event, True) + # If we've got some objects that can't drain immediately, then simulate if unready_objects > 0: - quiesce.setCount(unready_objects) + drain_event.setCount(unready_objects) simulate() - cc_main.cleanupCountedQuiesce(quiesce) + else: + all_drained = True + cc_main.cleanupCountedDrain(drain_event) + return all_drained def resume(root): root.resume() -def checkpoint(root): +def checkpoint(root, dir): if not isinstance(root, objects.Root): raise TypeError, "Object is not a root object. Checkpoint must be called on a root object." - doQuiesce(root) + doDrain(root) print "Writing checkpoint" - cc_main.serializeAll() + cc_main.serializeAll(dir) resume(root) -def restoreCheckpoint(root): +def restoreCheckpoint(root, dir): print "Restoring from checkpoint" - cc_main.unserializeAll() + cc_main.unserializeAll(dir) + resume(root) def changeToAtomic(system): if not isinstance(system, objects.Root) and not isinstance(system, System): raise TypeError, "Object is not a root or system object. Checkpoint must be " "called on a root object." - doQuiesce(system) + doDrain(system) print "Changing memory mode to atomic" system.changeTiming(cc_main.SimObject.Atomic) resume(system) @@ -250,7 +161,7 @@ def changeToTiming(system): if not isinstance(system, objects.Root) and not isinstance(system, System): raise TypeError, "Object is not a root or system object. Checkpoint must be " "called on a root object." - doQuiesce(system) + doDrain(system) print "Changing memory mode to timing" system.changeTiming(cc_main.SimObject.Timing) resume(system) @@ -271,16 +182,16 @@ def switchCpus(cpuList): if not isinstance(cpu, objects.BaseCPU): raise TypeError, "%s is not of type BaseCPU", cpu - # Quiesce all of the individual CPUs - quiesce = cc_main.createCountedQuiesce() + # Drain all of the individual CPUs + drain_event = cc_main.createCountedDrain() unready_cpus = 0 for old_cpu in old_cpus: - unready_cpus += old_cpu.startQuiesce(quiesce, False) - # If we've got some objects that can't quiesce immediately, then simulate + unready_cpus += old_cpu.startDrain(drain_event, False) + # If we've got some objects that can't drain immediately, then simulate if unready_cpus > 0: - quiesce.setCount(unready_cpus) + drain_event.setCount(unready_cpus) simulate() - cc_main.cleanupCountedQuiesce(quiesce) + cc_main.cleanupCountedDrain(drain_event) # Now all of the CPUs are ready to be switched out for old_cpu in old_cpus: old_cpu._ccObject.switchOut() diff --git a/src/python/m5/attrdict.py b/src/python/m5/attrdict.py new file mode 100644 index 000000000..4ee7f1b8c --- /dev/null +++ b/src/python/m5/attrdict.py @@ -0,0 +1,61 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +__all__ = [ 'attrdict' ] + +class attrdict(dict): + def __getattr__(self, attr): + if attr in self: + return self.__getitem__(attr) + return super(attrdict, self).__getattribute__(attr) + + def __setattr__(self, attr, value): + if attr in dir(self): + return super(attrdict, self).__setattr__(attr, value) + return self.__setitem__(attr, value) + + def __delattr__(self, attr): + if attr in self: + return self.__delitem__(attr) + return super(attrdict, self).__delattr__(attr, value) + +if __name__ == '__main__': + x = attrdict() + x.y = 1 + x['z'] = 2 + print x['y'], x.y + print x['z'], x.z + print dir(x) + print x + + print + + del x['y'] + del x.z + print dir(x) + print(x) diff --git a/src/python/m5/config.py b/src/python/m5/config.py index 6f2873d40..df4b74cbd 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -543,15 +543,14 @@ class SimObject(object): for child in self._children.itervalues(): child.connectPorts() - def startQuiesce(self, quiesce_event, recursive): + def startDrain(self, drain_event, recursive): count = 0 # ParamContexts don't serialize if isinstance(self, SimObject) and not isinstance(self, ParamContext): - if self._ccObject.quiesce(quiesce_event): - count = 1 + count += self._ccObject.drain(drain_event) if recursive: for child in self._children.itervalues(): - count += child.startQuiesce(quiesce_event, True) + count += child.startDrain(drain_event, True) return count def resume(self): @@ -561,7 +560,7 @@ class SimObject(object): child.resume() def changeTiming(self, mode): - if isinstance(self, SimObject) and not isinstance(self, ParamContext): + if isinstance(self, System): self._ccObject.setMemoryMode(mode) for child in self._children.itervalues(): child.changeTiming(mode) @@ -666,7 +665,8 @@ class BaseProxy(object): result, done = self.find(obj) if not done: - raise AttributeError, "Can't resolve proxy '%s' from '%s'" % \ + raise AttributeError, \ + "Can't resolve proxy '%s' from '%s'" % \ (self.path(), base.path()) if isinstance(result, BaseProxy): diff --git a/src/python/m5/main.py b/src/python/m5/main.py new file mode 100644 index 000000000..a757aaf1a --- /dev/null +++ b/src/python/m5/main.py @@ -0,0 +1,323 @@ +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +import code, optparse, os, socket, sys +from datetime import datetime +from attrdict import attrdict + +try: + import info +except ImportError: + info = None + +__all__ = [ 'options', 'arguments', 'main' ] + +usage="%prog [m5 options] script.py [script options]" +version="%prog 2.0" +brief_copyright=''' +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved +''' + +# there's only one option parsing done, so make it global and add some +# helper functions to make it work well. +parser = optparse.OptionParser(usage=usage, version=version, + description=brief_copyright, + formatter=optparse.TitledHelpFormatter()) +parser.disable_interspersed_args() + +# current option group +group = None + +def set_group(*args, **kwargs): + '''set the current option group''' + global group + if not args and not kwargs: + group = None + else: + group = parser.add_option_group(*args, **kwargs) + +class splitter(object): + def __init__(self, split): + self.split = split + def __call__(self, option, opt_str, value, parser): + getattr(parser.values, option.dest).extend(value.split(self.split)) + +def add_option(*args, **kwargs): + '''add an option to the current option group, or global none set''' + + # if action=split, but allows the option arguments + # themselves to be lists separated by the split variable''' + + if kwargs.get('action', None) == 'append' and 'split' in kwargs: + split = kwargs.pop('split') + kwargs['default'] = [] + kwargs['type'] = 'string' + kwargs['action'] = 'callback' + kwargs['callback'] = splitter(split) + + if group: + return group.add_option(*args, **kwargs) + + return parser.add_option(*args, **kwargs) + +def bool_option(name, default, help): + '''add a boolean option called --name and --no-name. + Display help depending on which is the default''' + + tname = '--%s' % name + fname = '--no-%s' % name + dest = name.replace('-', '_') + if default: + thelp = optparse.SUPPRESS_HELP + fhelp = help + else: + thelp = help + fhelp = optparse.SUPPRESS_HELP + + add_option(tname, action="store_true", default=default, help=thelp) + add_option(fname, action="store_false", dest=dest, help=fhelp) + +# Help options +add_option('-A', "--authors", action="store_true", default=False, + help="Show author information") +add_option('-C', "--copyright", action="store_true", default=False, + help="Show full copyright information") +add_option('-R', "--readme", action="store_true", default=False, + help="Show the readme") +add_option('-N', "--release-notes", action="store_true", default=False, + help="Show the release notes") + +# Options for configuring the base simulator +add_option('-d', "--outdir", metavar="DIR", default=".", + help="Set the output directory to DIR [Default: %default]") +add_option('-i', "--interactive", action="store_true", default=False, + help="Invoke the interactive interpreter after running the script") +add_option("--pdb", action="store_true", default=False, + help="Invoke the python debugger before running the script") +add_option('-p', "--path", metavar="PATH[:PATH]", action='append', split=':', + help="Prepend PATH to the system path when invoking the script") +add_option('-q', "--quiet", action="count", default=0, + help="Reduce verbosity") +add_option('-v', "--verbose", action="count", default=0, + help="Increase verbosity") + +# Statistics options +set_group("Statistics Options") +add_option("--stats-file", metavar="FILE", default="m5stats.txt", + help="Sets the output file for statistics [Default: %default]") + +# Debugging options +set_group("Debugging Options") +add_option("--debug-break", metavar="TIME[,TIME]", action='append', split=',', + help="Cycle to create a breakpoint") + +# Tracing options +set_group("Trace Options") +add_option("--trace-flags", metavar="FLAG[,FLAG]", action='append', split=',', + help="Sets the flags for tracing") +add_option("--trace-start", metavar="TIME", default='0s', + help="Start tracing at TIME (must have units)") +add_option("--trace-file", metavar="FILE", default="cout", + help="Sets the output file for tracing [Default: %default]") +add_option("--trace-circlebuf", metavar="SIZE", type="int", default=0, + help="If SIZE is non-zero, turn on the circular buffer with SIZE lines") +add_option("--no-trace-circlebuf", action="store_const", const=0, + dest='trace_circlebuf', help=optparse.SUPPRESS_HELP) +bool_option("trace-dumponexit", default=False, + help="Dump trace buffer on exit") +add_option("--trace-ignore", metavar="EXPR", action='append', split=':', + help="Ignore EXPR sim objects") + +# Execution Trace options +set_group("Execution Trace Options") +bool_option("speculative", default=True, + help="Don't capture speculative instructions") +bool_option("print-cycle", default=True, + help="Don't print cycle numbers in trace output") +bool_option("print-symbol", default=True, + help="Disable PC symbols in trace output") +bool_option("print-opclass", default=True, + help="Don't print op class type in trace output") +bool_option("print-thread", default=True, + help="Don't print thread number in trace output") +bool_option("print-effaddr", default=True, + help="Don't print effective address in trace output") +bool_option("print-data", default=True, + help="Don't print result data in trace output") +bool_option("print-iregs", default=False, + help="Print fetch sequence numbers in trace output") +bool_option("print-fetch-seq", default=False, + help="Print fetch sequence numbers in trace output") +bool_option("print-cpseq", default=False, + help="Print correct path sequence numbers in trace output") + +options = attrdict() +arguments = [] + +def usage(exitcode=None): + parser.print_help() + if exitcode is not None: + sys.exit(exitcode) + +def parse_args(): + _opts,args = parser.parse_args() + opts = attrdict(_opts.__dict__) + + # setting verbose and quiet at the same time doesn't make sense + if opts.verbose > 0 and opts.quiet > 0: + usage(2) + + # store the verbosity in a single variable. 0 is default, + # negative numbers represent quiet and positive values indicate verbose + opts.verbose -= opts.quiet + + del opts.quiet + + options.update(opts) + arguments.extend(args) + return opts,args + +def main(): + import cc_main + + parse_args() + + done = False + if options.copyright: + done = True + print info.LICENSE + print + + if options.authors: + done = True + print 'Author information:' + print + print info.AUTHORS + print + + if options.readme: + done = True + print 'Readme:' + print + print info.README + print + + if options.release_notes: + done = True + print 'Release Notes:' + print + print info.RELEASE_NOTES + print + + if done: + sys.exit(0) + + if options.verbose >= 0: + print "M5 Simulator System" + print brief_copyright + print + print "M5 compiled %s" % cc_main.cvar.compileDate; + print "M5 started %s" % datetime.now().ctime() + print "M5 executing on %s" % socket.gethostname() + + # check to make sure we can find the listed script + if not arguments or not os.path.isfile(arguments[0]): + if arguments and not os.path.isfile(arguments[0]): + print "Script %s not found" % arguments[0] + usage(2) + + # tell C++ about output directory + cc_main.setOutputDir(options.outdir) + + # update the system path with elements from the -p option + sys.path[0:0] = options.path + + import objects + + # set stats options + objects.Statistics.text_file = options.stats_file + + # set debugging options + objects.Debug.break_cycles = options.debug_break + + # set tracing options + objects.Trace.flags = options.trace_flags + objects.Trace.start = options.trace_start + objects.Trace.file = options.trace_file + objects.Trace.bufsize = options.trace_circlebuf + objects.Trace.dump_on_exit = options.trace_dumponexit + objects.Trace.ignore = options.trace_ignore + + # set execution trace options + objects.ExecutionTrace.speculative = options.speculative + objects.ExecutionTrace.print_cycle = options.print_cycle + objects.ExecutionTrace.pc_symbol = options.print_symbol + objects.ExecutionTrace.print_opclass = options.print_opclass + objects.ExecutionTrace.print_thread = options.print_thread + objects.ExecutionTrace.print_effaddr = options.print_effaddr + objects.ExecutionTrace.print_data = options.print_data + objects.ExecutionTrace.print_iregs = options.print_iregs + objects.ExecutionTrace.print_fetchseq = options.print_fetch_seq + objects.ExecutionTrace.print_cpseq = options.print_cpseq + + sys.argv = arguments + sys.path = [ os.path.dirname(sys.argv[0]) ] + sys.path + + scope = { '__file__' : sys.argv[0] } + + # we want readline if we're doing anything interactive + if options.interactive or options.pdb: + exec("import readline", scope) + + # if pdb was requested, execfile the thing under pdb, otherwise, + # just do the execfile normally + if options.pdb: + from pdb import Pdb + debugger = Pdb() + debugger.run('execfile("%s")' % sys.argv[0], scope) + else: + execfile(sys.argv[0], scope) + + # once the script is done + if options.interactive: + interact = code.InteractiveConsole(scope) + interact.interact("M5 Interactive Console") + +if __name__ == '__main__': + from pprint import pprint + + parse_args() + + print 'opts:' + pprint(options, indent=4) + print + + print 'args:' + pprint(arguments, indent=4) diff --git a/src/python/m5/objects/BaseCPU.py b/src/python/m5/objects/BaseCPU.py index 2e78578df..5bf98be9c 100644 --- a/src/python/m5/objects/BaseCPU.py +++ b/src/python/m5/objects/BaseCPU.py @@ -6,10 +6,10 @@ class BaseCPU(SimObject): abstract = True mem = Param.MemObject("memory") + system = Param.System(Parent.any, "system object") if build_env['FULL_SYSTEM']: dtb = Param.AlphaDTB("Data TLB") itb = Param.AlphaITB("Instruction TLB") - system = Param.System(Parent.any, "system object") cpu_id = Param.Int(-1, "CPU identifier") else: workload = VectorParam.Process("processes to run") diff --git a/src/python/m5/objects/Bus.py b/src/python/m5/objects/Bus.py index 019e15034..e0278e6c3 100644 --- a/src/python/m5/objects/Bus.py +++ b/src/python/m5/objects/Bus.py @@ -4,4 +4,5 @@ from MemObject import MemObject class Bus(MemObject): type = 'Bus' port = VectorPort("vector port for connecting devices") + default = Port("Default port for requests that aren't handeled by a device.") bus_id = Param.Int(0, "blah") diff --git a/src/python/m5/objects/Device.py b/src/python/m5/objects/Device.py index 222f750da..f72c8e73f 100644 --- a/src/python/m5/objects/Device.py +++ b/src/python/m5/objects/Device.py @@ -12,7 +12,7 @@ class BasicPioDevice(PioDevice): type = 'BasicPioDevice' abstract = True pio_addr = Param.Addr("Device Address") - pio_latency = Param.Tick(1, "Programmed IO latency in simticks") + pio_latency = Param.Latency('1ns', "Programmed IO latency in simticks") class DmaDevice(PioDevice): type = 'DmaDevice' diff --git a/src/python/m5/objects/DiskImage.py b/src/python/m5/objects/DiskImage.py index 70d8b2e45..a98b35a4f 100644 --- a/src/python/m5/objects/DiskImage.py +++ b/src/python/m5/objects/DiskImage.py @@ -10,6 +10,6 @@ class RawDiskImage(DiskImage): class CowDiskImage(DiskImage): type = 'CowDiskImage' - child = Param.DiskImage("child image") + child = Param.DiskImage(RawDiskImage(read_only=True), + "child image") table_size = Param.Int(65536, "initial table size") - image_file = '' diff --git a/src/python/m5/objects/Ethernet.py b/src/python/m5/objects/Ethernet.py index 418670592..db7efe004 100644 --- a/src/python/m5/objects/Ethernet.py +++ b/src/python/m5/objects/Ethernet.py @@ -1,7 +1,7 @@ from m5 import build_env from m5.config import * from Device import DmaDevice -from Pci import PciDevice +from Pci import PciDevice, PciConfigData class EtherInt(SimObject): type = 'EtherInt' @@ -84,6 +84,26 @@ class EtherDevBase(PciDevice): tx_thread = Param.Bool(False, "dedicated kernel threads for receive") rss = Param.Bool(False, "Receive Side Scaling") +class NSGigEPciData(PciConfigData): + VendorID = 0x100B + DeviceID = 0x0022 + Status = 0x0290 + SubClassCode = 0x00 + ClassCode = 0x02 + ProgIF = 0x00 + BAR0 = 0x00000001 + BAR1 = 0x00000000 + BAR2 = 0x00000000 + BAR3 = 0x00000000 + BAR4 = 0x00000000 + BAR5 = 0x00000000 + MaximumLatency = 0x34 + MinimumGrant = 0xb0 + InterruptLine = 0x1e + InterruptPin = 0x01 + BAR0Size = '256B' + BAR1Size = '4kB' + class NSGigE(EtherDevBase): type = 'NSGigE' @@ -91,11 +111,32 @@ class NSGigE(EtherDevBase): dma_desc_free = Param.Bool(False, "DMA of Descriptors is free") dma_no_allocate = Param.Bool(True, "Should we allocate cache on read") + configdata = NSGigEPciData() + class NSGigEInt(EtherInt): type = 'NSGigEInt' device = Param.NSGigE("Ethernet device of this interface") +class SinicPciData(PciConfigData): + VendorID = 0x1291 + DeviceID = 0x1293 + Status = 0x0290 + SubClassCode = 0x00 + ClassCode = 0x02 + ProgIF = 0x00 + BAR0 = 0x00000000 + BAR1 = 0x00000000 + BAR2 = 0x00000000 + BAR3 = 0x00000000 + BAR4 = 0x00000000 + BAR5 = 0x00000000 + MaximumLatency = 0x34 + MinimumGrant = 0xb0 + InterruptLine = 0x1e + InterruptPin = 0x01 + BAR0Size = '64kB' + class Sinic(EtherDevBase): type = 'Sinic' @@ -111,6 +152,8 @@ class Sinic(EtherDevBase): delay_copy = Param.Bool(False, "Delayed copy transmit") virtual_addr = Param.Bool(False, "Virtual addressing") + configdata = SinicPciData() + class SinicInt(EtherInt): type = 'SinicInt' device = Param.Sinic("Ethernet device of this interface") diff --git a/src/python/m5/objects/Ide.py b/src/python/m5/objects/Ide.py index 9ee578177..a5fe1b595 100644 --- a/src/python/m5/objects/Ide.py +++ b/src/python/m5/objects/Ide.py @@ -1,8 +1,31 @@ from m5.config import * -from Pci import PciDevice +from Pci import PciDevice, PciConfigData class IdeID(Enum): vals = ['master', 'slave'] +class IdeControllerPciData(PciConfigData): + VendorID = 0x8086 + DeviceID = 0x7111 + Command = 0x0 + Status = 0x280 + Revision = 0x0 + ClassCode = 0x01 + SubClassCode = 0x01 + ProgIF = 0x85 + BAR0 = 0x00000001 + BAR1 = 0x00000001 + BAR2 = 0x00000001 + BAR3 = 0x00000001 + BAR4 = 0x00000001 + BAR5 = 0x00000001 + InterruptLine = 0x1f + InterruptPin = 0x01 + BAR0Size = '8B' + BAR1Size = '4B' + BAR2Size = '8B' + BAR3Size = '4B' + BAR4Size = '16B' + class IdeDisk(SimObject): type = 'IdeDisk' delay = Param.Latency('1us', "Fixed disk delay in microseconds") @@ -12,3 +35,5 @@ class IdeDisk(SimObject): class IdeController(PciDevice): type = 'IdeController' disks = VectorParam.IdeDisk("IDE disks attached to this controller") + + configdata =IdeControllerPciData() diff --git a/src/python/m5/objects/O3CPU.py b/src/python/m5/objects/O3CPU.py index 4ecfa8fbd..d6bc454ad 100644 --- a/src/python/m5/objects/O3CPU.py +++ b/src/python/m5/objects/O3CPU.py @@ -10,6 +10,8 @@ class DerivO3CPU(BaseCPU): checker = Param.BaseCPU(NULL, "checker") cachePorts = Param.Unsigned("Cache Ports") + icache_port = Port("Instruction Port") + dcache_port = Port("Data Port") decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") renameToFetchDelay = Param.Unsigned("Rename to fetch delay") @@ -37,12 +39,10 @@ class DerivO3CPU(BaseCPU): "Issue/Execute/Writeback delay") issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal " "to the IEW stage)") + dispatchWidth = Param.Unsigned("Dispatch width") issueWidth = Param.Unsigned("Issue width") - executeWidth = Param.Unsigned("Execute width") - executeIntWidth = Param.Unsigned("Integer execute width") - executeFloatWidth = Param.Unsigned("Floating point execute width") - executeBranchWidth = Param.Unsigned("Branch execute width") - executeMemoryWidth = Param.Unsigned("Memory execute width") + wbWidth = Param.Unsigned("Writeback width") + wbDepth = Param.Unsigned("Writeback depth") fuPool = Param.FUPool(NULL, "Functional Unit pool") iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit " @@ -53,6 +53,9 @@ class DerivO3CPU(BaseCPU): trapLatency = Param.Tick("Trap latency") fetchTrapLatency = Param.Tick("Fetch trap latency") + backComSize = Param.Unsigned("Time buffer size for backwards communication") + forwardComSize = Param.Unsigned("Time buffer size for forward communication") + predType = Param.String("Branch predictor type ('local', 'tournament')") localPredictorSize = Param.Unsigned("Size of local predictor") localCtrBits = Param.Unsigned("Bits per counter") diff --git a/src/python/m5/objects/OzoneCPU.py b/src/python/m5/objects/OzoneCPU.py index 8aff89203..88fb63c74 100644 --- a/src/python/m5/objects/OzoneCPU.py +++ b/src/python/m5/objects/OzoneCPU.py @@ -9,6 +9,9 @@ class DerivOzoneCPU(BaseCPU): checker = Param.BaseCPU("Checker CPU") + icache_port = Port("Instruction Port") + dcache_port = Port("Data Port") + width = Param.Unsigned("Width") frontEndWidth = Param.Unsigned("Front end width") backEndWidth = Param.Unsigned("Back end width") diff --git a/src/python/m5/objects/Pci.py b/src/python/m5/objects/Pci.py index 9e1e91b13..cc0d1cf4a 100644 --- a/src/python/m5/objects/Pci.py +++ b/src/python/m5/objects/Pci.py @@ -1,5 +1,5 @@ from m5.config import * -from Device import BasicPioDevice, DmaDevice +from Device import BasicPioDevice, DmaDevice, PioDevice class PciConfigData(SimObject): type = 'PciConfigData' @@ -38,18 +38,22 @@ class PciConfigData(SimObject): MaximumLatency = Param.UInt8(0x00, "Maximum Latency") MinimumGrant = Param.UInt8(0x00, "Minimum Grant") -class PciConfigAll(BasicPioDevice): +class PciConfigAll(PioDevice): type = 'PciConfigAll' + pio_latency = Param.Tick(1, "Programmed IO latency in simticks") + bus = Param.UInt8(0x00, "PCI bus to act as config space for") + size = Param.MemorySize32('16MB', "Size of config space") + class PciDevice(DmaDevice): type = 'PciDevice' abstract = True + config = Port("PCI configuration space port") pci_bus = Param.Int("PCI bus") pci_dev = Param.Int("PCI device number") pci_func = Param.Int("PCI function code") - pio_latency = Param.Tick(1, "Programmed IO latency in simticks") + pio_latency = Param.Latency('1ns', "Programmed IO latency in simticks") configdata = Param.PciConfigData(Parent.any, "PCI Config data") - configspace = Param.PciConfigAll(Parent.any, "PCI Configspace") class PciFake(PciDevice): type = 'PciFake' diff --git a/src/python/m5/objects/Root.py b/src/python/m5/objects/Root.py index 373475a7a..33dd22620 100644 --- a/src/python/m5/objects/Root.py +++ b/src/python/m5/objects/Root.py @@ -7,7 +7,7 @@ from Debug import Debug class Root(SimObject): type = 'Root' - clock = Param.RootClock('200MHz', "tick frequency") + clock = Param.RootClock('1THz', "tick frequency") max_tick = Param.Tick('0', "maximum simulation ticks (0 = infinite)") progress_interval = Param.Tick('0', "print a progress message every n ticks (0 = never)") diff --git a/src/python/m5/objects/System.py b/src/python/m5/objects/System.py index 9a1e1d690..386f39277 100644 --- a/src/python/m5/objects/System.py +++ b/src/python/m5/objects/System.py @@ -1,9 +1,12 @@ from m5 import build_env from m5.config import * +class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing'] + class System(SimObject): type = 'System' physmem = Param.PhysicalMemory(Parent.any, "phsyical memory") + mem_mode = Param.MemoryMode('atomic', "The mode the memory system is in") if build_env['FULL_SYSTEM']: boot_cpu_frequency = Param.Frequency(Self.cpu[0].clock.frequency, "boot processor frequency") diff --git a/src/python/m5/objects/Tsunami.py b/src/python/m5/objects/Tsunami.py index 4613571d8..0b5ff9e7d 100644 --- a/src/python/m5/objects/Tsunami.py +++ b/src/python/m5/objects/Tsunami.py @@ -1,11 +1,10 @@ from m5.config import * from Device import BasicPioDevice from Platform import Platform - -class Tsunami(Platform): - type = 'Tsunami' -# pciconfig = Param.PciConfigAll("PCI configuration") - system = Param.System(Parent.any, "system") +from AlphaConsole import AlphaConsole +from Uart import Uart8250 +from Pci import PciConfigAll +from BadDevice import BadDevice class TsunamiCChip(BasicPioDevice): type = 'TsunamiCChip' @@ -25,3 +24,71 @@ class TsunamiIO(BasicPioDevice): class TsunamiPChip(BasicPioDevice): type = 'TsunamiPChip' tsunami = Param.Tsunami(Parent.any, "Tsunami") + +class Tsunami(Platform): + type = 'Tsunami' + system = Param.System(Parent.any, "system") + + cchip = TsunamiCChip(pio_addr=0x801a0000000) + pchip = TsunamiPChip(pio_addr=0x80180000000) + pciconfig = PciConfigAll() + fake_sm_chip = IsaFake(pio_addr=0x801fc000370) + + fake_uart1 = IsaFake(pio_addr=0x801fc0002f8) + fake_uart2 = IsaFake(pio_addr=0x801fc0003e8) + fake_uart3 = IsaFake(pio_addr=0x801fc0002e8) + fake_uart4 = IsaFake(pio_addr=0x801fc0003f0) + + fake_ppc = IsaFake(pio_addr=0x801fc0003bc) + + fake_OROM = IsaFake(pio_addr=0x800000a0000, pio_size=0x60000) + + fake_pnp_addr = IsaFake(pio_addr=0x801fc000279) + fake_pnp_write = IsaFake(pio_addr=0x801fc000a79) + fake_pnp_read0 = IsaFake(pio_addr=0x801fc000203) + fake_pnp_read1 = IsaFake(pio_addr=0x801fc000243) + fake_pnp_read2 = IsaFake(pio_addr=0x801fc000283) + fake_pnp_read3 = IsaFake(pio_addr=0x801fc0002c3) + fake_pnp_read4 = IsaFake(pio_addr=0x801fc000303) + fake_pnp_read5 = IsaFake(pio_addr=0x801fc000343) + fake_pnp_read6 = IsaFake(pio_addr=0x801fc000383) + fake_pnp_read7 = IsaFake(pio_addr=0x801fc0003c3) + + fake_ata0 = IsaFake(pio_addr=0x801fc0001f0) + fake_ata1 = IsaFake(pio_addr=0x801fc000170) + + fb = BadDevice(pio_addr=0x801fc0003d0, devicename='FrameBuffer') + io = TsunamiIO(pio_addr=0x801fc000000) + uart = Uart8250(pio_addr=0x801fc0003f8) + console = AlphaConsole(pio_addr=0x80200000000, disk=Parent.simple_disk) + + # Attach I/O devices to specified bus object. Can't do this + # earlier, since the bus object itself is typically defined at the + # System level. + def attachIO(self, bus): + self.cchip.pio = bus.port + self.pchip.pio = bus.port + self.pciconfig.pio = bus.default + self.fake_sm_chip.pio = bus.port + self.fake_uart1.pio = bus.port + self.fake_uart2.pio = bus.port + self.fake_uart3.pio = bus.port + self.fake_uart4.pio = bus.port + self.fake_ppc.pio = bus.port + self.fake_OROM.pio = bus.port + self.fake_pnp_addr.pio = bus.port + self.fake_pnp_write.pio = bus.port + self.fake_pnp_read0.pio = bus.port + self.fake_pnp_read1.pio = bus.port + self.fake_pnp_read2.pio = bus.port + self.fake_pnp_read3.pio = bus.port + self.fake_pnp_read4.pio = bus.port + self.fake_pnp_read5.pio = bus.port + self.fake_pnp_read6.pio = bus.port + self.fake_pnp_read7.pio = bus.port + self.fake_ata0.pio = bus.port + self.fake_ata1.pio = bus.port + self.fb.pio = bus.port + self.io.pio = bus.port + self.uart.pio = bus.port + self.console.pio = bus.port diff --git a/src/sim/main.cc b/src/sim/main.cc index 3eb7fa95d..4ea8c4138 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -115,70 +115,11 @@ abortHandler(int sigtype) #endif } -/// Simulator executable name -char *myProgName = ""; - -/// Show brief help message. -void -showBriefHelp(ostream &out) -{ - char *prog = basename(myProgName); - - ccprintf(out, "Usage:\n"); - ccprintf(out, -"%s [-p <path>] [-i ] [-h] <config file>\n" -"\n" -" -p, --path <path> prepends <path> to PYTHONPATH instead of using\n" -" built-in zip archive. Useful when developing/debugging\n" -" changes to built-in Python libraries, as the new Python\n" -" can be tested without building a new m5 binary.\n\n" -" -i, --interactive forces entry into interactive mode after the supplied\n" -" script is executed (just like the -i option to the\n" -" Python interpreter).\n\n" -" -h Prints this help\n\n" -" <configfile> config file name which ends in .py. (Normally you can\n" -" run <configfile> --help to get help on that config files\n" -" parameters.\n\n", - prog); - -} - -const char *briefCopyright = -"Copyright (c) 2001-2006\n" -"The Regents of The University of Michigan\n" -"All Rights Reserved\n"; - -/// Print welcome message. -void -sayHello(ostream &out) -{ - extern const char *compileDate; // from date.cc - - ccprintf(out, "M5 Simulator System\n"); - // display copyright - ccprintf(out, "%s\n", briefCopyright); - ccprintf(out, "M5 compiled %d\n", compileDate); - ccprintf(out, "M5 started %s\n", Time::start); - - char *host = getenv("HOSTNAME"); - if (!host) - host = getenv("HOST"); - - if (host) - ccprintf(out, "M5 executing on %s\n", host); -} - - extern "C" { void init_cc_main(); } int main(int argc, char **argv) { - // Saze off program name - myProgName = argv[0]; - - sayHello(cerr); - signal(SIGFPE, SIG_IGN); // may occur on misspeculated paths signal(SIGTRAP, SIG_IGN); signal(SIGUSR1, dumpStatsHandler); // dump intermediate stats @@ -189,72 +130,18 @@ main(int argc, char **argv) Py_SetProgramName(argv[0]); // default path to m5 python code is the currently executing - // file... Python ZipImporter will find embedded zip archive - char *pythonpath = argv[0]; - - bool interactive = false; - bool show_help = false; - bool getopt_done = false; - int opt_index = 0; - - static struct option long_options[] = { - {"python", 1, 0, 'p'}, - {"interactive", 0, 0, 'i'}, - {"help", 0, 0, 'h'}, - {0,0,0,0} - }; - - do { - switch (getopt_long(argc, argv, "+p:ih", long_options, &opt_index)) { - // -p <path> prepends <path> to PYTHONPATH instead of - // using built-in zip archive. Useful when - // developing/debugging changes to built-in Python - // libraries, as the new Python can be tested without - // building a new m5 binary. - case 'p': - pythonpath = optarg; - break; - - // -i forces entry into interactive mode after the - // supplied script is executed (just like the -i option to - // the Python interpreter). - case 'i': - interactive = true; - break; - - case 'h': - show_help = true; - break; - case -1: - getopt_done = true; - break; - - default: - fatal("Unrecognized option %c\n", optopt); - } - } while (!getopt_done); - - if (show_help) { - showBriefHelp(cerr); - exit(1); - } - - // Fix up argc & argv to hide arguments we just processed. - // getopt() sets optind to the index of the first non-processed - // argv element. - argc -= optind; - argv += optind; - - // Set up PYTHONPATH to make sure the m5 module is found - string newpath(pythonpath); + // file... Python ZipImporter will find embedded zip archive. + // The M5_ARCHIVE environment variable can be used to override this. + char *m5_archive = getenv("M5_ARCHIVE"); + string pythonpath = m5_archive ? m5_archive : argv[0]; char *oldpath = getenv("PYTHONPATH"); if (oldpath != NULL) { - newpath += ":"; - newpath += oldpath; + pythonpath += ":"; + pythonpath += oldpath; } - if (setenv("PYTHONPATH", newpath.c_str(), true) == -1) + if (setenv("PYTHONPATH", pythonpath.c_str(), true) == -1) fatal("setenv: %s\n", strerror(errno)); // initialize embedded Python interpreter @@ -264,37 +151,8 @@ main(int argc, char **argv) // initialize SWIG 'cc_main' module init_cc_main(); - if (argc > 0) { - // extra arg(s): first is script file, remaining ones are args - // to script file - char *filename = argv[0]; - FILE *fp = fopen(filename, "r"); - if (!fp) { - fatal("cannot open file '%s'\n", filename); - } - - PyRun_AnyFile(fp, filename); - } else { - // no script file argument... force interactive prompt - interactive = true; - } - - if (interactive) { - // The following code to import readline was copied from Python - // 2.4.3's Modules/main.c. - // Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 - // Python Software Foundation; All Rights Reserved - // We should only enable this if we're actually using an - // interactive prompt. - PyObject *v; - v = PyImport_ImportModule("readline"); - if (v == NULL) - PyErr_Clear(); - else - Py_DECREF(v); - - PyRun_InteractiveLoop(stdin, "stdin"); - } + PyRun_SimpleString("import m5"); + PyRun_SimpleString("m5.main()"); // clean up Python intepreter. Py_Finalize(); @@ -357,7 +215,7 @@ loadIniFile(PyObject *_resolveFunc) configStream = simout.find("config.out"); // The configuration database is now complete; start processing it. - inifile.load("config.ini"); + inifile.load(simout.resolve("config.ini")); // Initialize statistics database Stats::InitSimStats(); @@ -523,34 +381,34 @@ simulate(Tick num_cycles = -1) } Event * -createCountedQuiesce() +createCountedDrain() { - return new CountedQuiesceEvent(); + return new CountedDrainEvent(); } void -cleanupCountedQuiesce(Event *counted_quiesce) +cleanupCountedDrain(Event *counted_drain) { - CountedQuiesceEvent *event = - dynamic_cast<CountedQuiesceEvent *>(counted_quiesce); + CountedDrainEvent *event = + dynamic_cast<CountedDrainEvent *>(counted_drain); if (event == NULL) { - fatal("Called cleanupCountedQuiesce() on an event that was not " - "a CountedQuiesceEvent."); + fatal("Called cleanupCountedDrain() on an event that was not " + "a CountedDrainEvent."); } assert(event->getCount() == 0); delete event; } void -serializeAll() +serializeAll(const std::string &cpt_dir) { - Serializable::serializeAll(); + Serializable::serializeAll(cpt_dir); } void -unserializeAll() +unserializeAll(const std::string &cpt_dir) { - Serializable::unserializeAll(); + Serializable::unserializeAll(cpt_dir); } /** diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index b2854e491..fcf0b957a 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -37,7 +37,6 @@ #include "sim/pseudo_inst.hh" #include "arch/vtophys.hh" #include "cpu/base.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/thread_context.hh" #include "cpu/quiesce_event.hh" #include "kern/kernel_stats.hh" @@ -52,8 +51,6 @@ using namespace std; -extern Sampler *SampCPU; - using namespace Stats; using namespace TheISA; @@ -209,6 +206,7 @@ namespace AlphaPseudo { if (!doCheckpointInsts) return; + exitSimLoop("checkpoint"); } uint64_t @@ -280,7 +278,6 @@ namespace AlphaPseudo void switchcpu(ThreadContext *tc) { - if (SampCPU) - SampCPU->switchCPUs(); + exitSimLoop("switchcpu"); } } diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc index 7450d7b7e..6a1d084b7 100644 --- a/src/sim/serialize.cc +++ b/src/sim/serialize.cc @@ -231,8 +231,9 @@ Globals::unserialize(Checkpoint *cp) } void -Serializable::serializeAll() +Serializable::serializeAll(const std::string &cpt_dir) { + setCheckpointDir(cpt_dir); string dir = Checkpoint::dir(); if (mkdir(dir.c_str(), 0775) == -1 && errno != EEXIST) fatal("couldn't mkdir %s\n", dir); @@ -247,8 +248,9 @@ Serializable::serializeAll() } void -Serializable::unserializeAll() +Serializable::unserializeAll(const std::string &cpt_dir) { + setCheckpointDir(cpt_dir); string dir = Checkpoint::dir(); string cpt_file = dir + Checkpoint::baseFilename; string section = ""; @@ -289,9 +291,9 @@ Checkpoint::dir() } void -debug_serialize() +debug_serialize(const std::string &cpt_dir) { - Serializable::serializeAll(); + Serializable::serializeAll(cpt_dir); } diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index a80dc99e4..880fb0785 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -126,8 +126,8 @@ class Serializable static int ckptCount; static int ckptMaxCount; static int ckptPrevCount; - static void serializeAll(); - static void unserializeAll(); + static void serializeAll(const std::string &cpt_dir); + static void unserializeAll(const std::string &cpt_dir); static void unserializeGlobals(Checkpoint *cp); }; @@ -206,7 +206,7 @@ SerializableClass the##OBJ_CLASS##Class(CLASS_NAME, \ OBJ_CLASS::createForUnserialize); void -setCheckpointName(const std::string &name); +setCheckpointDir(const std::string &name); class Checkpoint { diff --git a/src/sim/sim_events.cc b/src/sim/sim_events.cc index 97f7ae03c..d9e8bdeaa 100644 --- a/src/sim/sim_events.cc +++ b/src/sim/sim_events.cc @@ -79,10 +79,10 @@ exitSimLoop(const std::string &message, int exit_code) } void -CountedQuiesceEvent::process() +CountedDrainEvent::process() { if (--count == 0) { - exitSimLoop("Finished quiesce"); + exitSimLoop("Finished drain"); } } diff --git a/src/sim/sim_events.hh b/src/sim/sim_events.hh index 50368f258..3c4a9dd05 100644 --- a/src/sim/sim_events.hh +++ b/src/sim/sim_events.hh @@ -67,13 +67,13 @@ class SimLoopExitEvent : public Event virtual const char *description(); }; -class CountedQuiesceEvent : public SimLoopExitEvent +class CountedDrainEvent : public SimLoopExitEvent { private: - // Count down to quiescing + // Count of how many objects have not yet drained int count; public: - CountedQuiesceEvent() + CountedDrainEvent() : count(0) { } void process(); diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc index 551555b25..d12b06b7a 100644 --- a/src/sim/sim_object.cc +++ b/src/sim/sim_object.cc @@ -37,7 +37,6 @@ #include "base/misc.hh" #include "base/trace.hh" #include "base/stats/events.hh" -#include "base/serializer.hh" #include "sim/host.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" @@ -73,7 +72,7 @@ SimObject::SimObject(Params *p) doRecordEvent = !Stats::event_ignore.match(name()); simObjectList.push_back(this); - state = Atomic; + state = Running; } // @@ -89,7 +88,7 @@ SimObject::SimObject(const string &_name) doRecordEvent = !Stats::event_ignore.match(name()); simObjectList.push_back(this); - state = Atomic; + state = Running; } void @@ -270,38 +269,23 @@ SimObject::recordEvent(const std::string &stat) Stats::recordEvent(stat); } -bool -SimObject::quiesce(Event *quiesce_event) +unsigned int +SimObject::drain(Event *drain_event) { - if (state != QuiescedAtomic && state != Atomic) { - panic("Must implement your own quiesce function if it is to be used " - "in timing mode!"); - } - state = QuiescedAtomic; - return false; + state = Drained; + return 0; } void SimObject::resume() { - if (state == QuiescedAtomic) { - state = Atomic; - } else if (state == QuiescedTiming) { - state = Timing; - } + state = Running; } void SimObject::setMemoryMode(State new_mode) { - assert(new_mode == Timing || new_mode == Atomic); - if (state == QuiescedAtomic && new_mode == Timing) { - state = QuiescedTiming; - } else if (state == QuiescedTiming && new_mode == Atomic) { - state = QuiescedAtomic; - } else { - state = new_mode; - } + panic("setMemoryMode() should only be called on systems"); } void diff --git a/src/sim/sim_object.hh b/src/sim/sim_object.hh index e0b21782f..38f2bdd23 100644 --- a/src/sim/sim_object.hh +++ b/src/sim/sim_object.hh @@ -60,16 +60,15 @@ class SimObject : public Serializable, protected StartupCallback }; enum State { - Atomic, - Timing, - Quiescing, - QuiescedAtomic, - QuiescedTiming + Running, + Draining, + Drained }; + private: + State state; protected: Params *_params; - State state; void changeState(State new_state) { state = new_state; } @@ -116,8 +115,10 @@ class SimObject : public Serializable, protected StartupCallback // Methods to drain objects in order to take checkpoints // Or switch from timing -> atomic memory model - // Quiesce returns true if the SimObject cannot quiesce immediately. - virtual bool quiesce(Event *quiesce_event); + // Drain returns 0 if the simobject can drain immediately or + // the number of times the drain_event's process function will be called + // before the object will be done draining. Normally this should be 1 + virtual unsigned int drain(Event *drain_event); virtual void resume(); virtual void setMemoryMode(State new_mode); virtual void switchOut(); diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index a3990e2fd..a3ff006ef 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -27,7 +27,6 @@ * * Authors: Steve Reinhardt * Kevin Lim - * Korey Sewell */ #ifndef __SIM_SYSCALL_EMUL_HH__ diff --git a/src/sim/system.cc b/src/sim/system.cc index 89e7b8542..ad70b9b03 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -63,7 +63,7 @@ System::System(Params *p) #else page_ptr(0), #endif - _params(p) + memoryMode(p->mem_mode), _params(p) { // add self to global system list systemList.push_back(this); @@ -143,6 +143,14 @@ int rgdb_wait = -1; #endif // FULL_SYSTEM + +void +System::setMemoryMode(MemoryMode mode) +{ + assert(getState() == Drained); + memoryMode = mode; +} + int System::registerThreadContext(ThreadContext *tc, int id) { @@ -249,6 +257,9 @@ printSystems() System::printSystems(); } +const char *System::MemoryModeStrings[3] = {"invalid", "atomic", + "timing"}; + #if FULL_SYSTEM // In full system mode, only derived classes (e.g. AlphaLinuxSystem) @@ -261,12 +272,15 @@ DEFINE_SIM_OBJECT_CLASS_NAME("System", System) BEGIN_DECLARE_SIM_OBJECT_PARAMS(System) SimObjectParam<PhysicalMemory *> physmem; + SimpleEnumParam<System::MemoryMode> mem_mode; END_DECLARE_SIM_OBJECT_PARAMS(System) BEGIN_INIT_SIM_OBJECT_PARAMS(System) - INIT_PARAM(physmem, "physical memory") + INIT_PARAM(physmem, "physical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings) END_INIT_SIM_OBJECT_PARAMS(System) @@ -275,6 +289,7 @@ CREATE_SIM_OBJECT(System) System::Params *p = new System::Params; p->name = getInstanceName(); p->physmem = physmem; + p->mem_mode = mem_mode; return new System(p); } diff --git a/src/sim/system.hh b/src/sim/system.hh index 059dc92dc..a1b53c2eb 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -61,6 +61,23 @@ class RemoteGDB; class System : public SimObject { public: + enum MemoryMode { + Invalid=0, + Atomic, + Timing + }; + + static const char *MemoryModeStrings[3]; + + + MemoryMode getMemoryMode() { assert(memoryMode); return memoryMode; } + + /** Change the memory mode of the system. This should only be called by the + * python!! + * @param mode Mode to change to (atomic/timing) + */ + void setMemoryMode(MemoryMode mode); + PhysicalMemory *physmem; PCEventQueue pcEventQueue; @@ -108,6 +125,8 @@ class System : public SimObject protected: + MemoryMode memoryMode; + #if FULL_SYSTEM /** * Fix up an address used to match PCs for hooking simulator @@ -153,6 +172,7 @@ class System : public SimObject { std::string name; PhysicalMemory *physmem; + MemoryMode mem_mode; #if FULL_SYSTEM Tick boot_cpu_frequency; diff --git a/tests/SConscript b/tests/SConscript new file mode 100644 index 000000000..5eadce6d4 --- /dev/null +++ b/tests/SConscript @@ -0,0 +1,244 @@ +# -*- mode:python -*- + +# Copyright (c) 2004-2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import sys +import glob +from SCons.Script.SConscript import SConsEnvironment + +Import('env') + +env['DIFFOUT'] = File('diff-out') + +# Dict that accumulates lists of tests by category (quick, medium, long) +env.Tests = {} + +def contents(node): + return file(str(node)).read() + +def check_test(target, source, env): + """Check output from running test. + + Targets are as follows: + target[0] : outdiff + target[1] : statsdiff + target[2] : status + + """ + # make sure target files are all gone + for t in target: + if os.path.exists(t.abspath): + Execute(Delete(t.abspath)) + # Run diff on output & ref directories to find differences. + # Exclude m5stats.txt since we will use diff-out on that. + Execute(env.subst('diff -ubr ${SOURCES[0].dir} ${SOURCES[1].dir} ' + + '-I "^command line:" ' + # for stdout file + '-I "^M5 compiled on" ' + # for stderr file + '-I "^M5 simulation started" ' + # for stderr file + '-I "^Simulation complete at" ' + # for stderr file + '-I "^Listening for" ' + # for stderr file + '--exclude=m5stats.txt --exclude=SCCS ' + + '--exclude=${TARGETS[0].file} ' + + '> ${TARGETS[0]}', target=target, source=source), None) + print "===== Output differences =====" + print contents(target[0]) + # Run diff-out on m5stats.txt file + status = Execute(env.subst('$DIFFOUT $SOURCES > ${TARGETS[1]}', + target=target, source=source), + strfunction=None) + print "===== Statistics differences =====" + print contents(target[1]) + # Generate status file contents based on exit status of diff-out + if status == 0: + status_str = "passed." + else: + status_str = "FAILED!" + f = file(str(target[2]), 'w') + print >>f, env.subst('${TARGETS[2].dir}', target=target, source=source), \ + status_str + f.close() + # done + return 0 + +def check_test_string(target, source, env): + return env.subst("Comparing outputs in ${TARGETS[0].dir}.", + target=target, source=source) + +testAction = env.Action(check_test, check_test_string) + +def print_test(target, source, env): + print '***** ' + contents(source[0]) + return 0 + +printAction = env.Action(print_test, strfunction = None) + +def update_test(target, source, env): + """Update reference test outputs. + + Target is phony. First two sources are the ref & new m5stats.txt + files, respectively. We actually copy everything in the + respective directories except the status & diff output files. + + """ + dest_dir = str(source[0].get_dir()) + src_dir = str(source[1].get_dir()) + dest_files = os.listdir(dest_dir) + src_files = os.listdir(src_dir) + # Exclude status & diff outputs + for f in ('outdiff', 'statsdiff', 'status'): + if f in src_files: + src_files.remove(f) + for f in src_files: + if f in dest_files: + print " Replacing file", f + dest_files.remove(f) + else: + print " Creating new file", f + copyAction = Copy(os.path.join(dest_dir, f), os.path.join(src_dir, f)) + copyAction.strfunction = None + Execute(copyAction) + # warn about any files in dest not overwritten (other than SCCS dir) + if 'SCCS' in dest_files: + dest_files.remove('SCCS') + if dest_files: + print "Warning: file(s) in", dest_dir, "not updated:", + print ', '.join(dest_files) + return 0 + +def update_test_string(target, source, env): + return env.subst("Updating ${SOURCES[0].dir} from ${SOURCES[1].dir}", + target=target, source=source) + +updateAction = env.Action(update_test, update_test_string) + +def test_builder(env, category, cpu_list=[], os_list=[], refdir='ref', timeout=15): + """Define a test. + + Args: + category -- string describing test category (e.g., 'quick') + cpu_list -- list of CPUs to runs this test on (blank means all compiled CPUs) + os_list -- list of OSs to run this test on + refdir -- subdirectory containing reference output (default 'ref') + timeout -- test timeout in minutes (only enforced on pool) + + """ + + default_refdir = False + if refdir == 'ref': + default_refdir = True + if len(cpu_list) == 0: + cpu_list = env['CPU_MODELS'] +# if len(os_list) == 0: +# raise RuntimeError, "No OS specified" +# else: +# for test_os in os_list: +# build_cpu_test(env, category, test_os, cpu_list, refdir, timeout) + # Loop through CPU models and generate proper options, ref directories for each + for cpu in cpu_list: + test_os = '' + if cpu == "AtomicSimpleCPU": + cpu_option = ('','atomic/') + elif cpu == "TimingSimpleCPU": + cpu_option = ('--timing','timing/') + elif cpu == "O3CPU": + cpu_option = ('--detailed','detailed/') + else: + raise TypeError, "Unknown CPU model specified" + + if default_refdir: + # Reference stats located in ref/arch/os/cpu or ref/arch/cpu if no OS specified + test_refdir = os.path.join(refdir, env['TARGET_ISA']) + if test_os != '': + test_refdir = os.path.join(test_refdir, test_os) + cpu_refdir = os.path.join(test_refdir, cpu_option[1]) + + ref_stats = os.path.join(cpu_refdir, 'm5stats.txt') + + # base command for running test + base_cmd = '${SOURCES[0]} -d $TARGET.dir ${SOURCES[1]}' + base_cmd = base_cmd + ' ' + cpu_option[0] + # stdout and stderr files + cmd_stdout = '${TARGETS[0]}' + cmd_stderr = '${TARGETS[1]}' + + stdout_string = cpu_option[1] + 'stdout' + stderr_string = cpu_option[1] + 'stderr' + m5stats_string = cpu_option[1] + 'm5stats.txt' + outdiff_string = cpu_option[1] + 'outdiff' + statsdiff_string = cpu_option[1] + 'statsdiff' + status_string = cpu_option[1] + 'status' + + # Prefix test run with batch job submission command if appropriate. + # Output redirection is also different for batch runs. + # Batch command also supports timeout arg (in seconds, not minutes). + if env['BATCH']: + cmd = [env['BATCH_CMD'], '-t', str(timeout * 60), + '-o', cmd_stdout, '-e', cmd_stderr, base_cmd] + else: + cmd = [base_cmd, '>', cmd_stdout, '2>', cmd_stderr] + + env.Command([stdout_string, stderr_string, m5stats_string], [env.M5Binary, 'run.py'], + ' '.join(cmd)) + + # order of targets is important... see check_test + env.Command([outdiff_string, statsdiff_string, status_string], + [ref_stats, m5stats_string], + testAction) + + # phony target to echo status + if env['update_ref']: + p = env.Command(cpu_option[1] + '_update', [ref_stats, m5stats_string, status_string], + updateAction) + else: + p = env.Command(cpu_option[1] + '_print', [status_string], printAction) + env.AlwaysBuild(p) + + env.Tests.setdefault(category, []) + env.Tests[category] += p + +# Make test_builder a "wrapper" function. See SCons wiki page at +# http://www.scons.org/cgi-bin/wiki/WrapperFunctions. +SConsEnvironment.Test = test_builder + +cwd = os.getcwd() +os.chdir(str(Dir('.').srcdir)) +scripts = glob.glob('*/SConscript') +os.chdir(cwd) + +for s in scripts: + SConscript(s, exports = 'env', duplicate = False) + +# Set up phony commands for various test categories +allTests = [] +for (key, val) in env.Tests.iteritems(): + env.Command(key, val, env.NoAction) + allTests += val + +# The 'all' target is redundant since just specifying the test +# directory name (e.g., ALPHA_SE/test/opt) has the same effect. +env.Command('all', allTests, env.NoAction) diff --git a/tests/diff-out b/tests/diff-out new file mode 100755 index 000000000..5ebe97dd7 --- /dev/null +++ b/tests/diff-out @@ -0,0 +1,409 @@ +#!/usr/bin/perl +# Copyright (c) 2001-2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Steve Reinhardt + +# +# This script diffs two SimpleScalar statistics output files. +# + +use Getopt::Std; + +# +# -t thresh sets threshold for ignoring differences (in %) +# -p sorts differences by % chg (default is alphabetic) +# -f ignores fetch-loss statistics +# -d ignores all distributions +# + +getopts('dfn:pt:h'); + +if ($#ARGV < 1) +{ + print "\nError: need two file arguments (<reference> <new>).\n"; + print " Options: -d = Ignore distributions\n"; + print " -f = Ignore fetch-loss stats\n"; + print " -p = Sort errors by percentage\n"; + print " -h = Diff header info separately from stats\n"; + print " -n <num> = Print top <num> errors (default 20)\n"; + print " -t <num> = Error threshold in percent (default 1)\n\n"; + die -1; +} + +open(REF, "<$ARGV[0]") or die "Error: can't open $ARGV[0].\n"; +open(NEW, "<$ARGV[1]") or die "Error: can't open $ARGV[1].\n"; + + +# +# Things that really should be adjustable via the command line +# + +# Ignorable error (in percent) +$err_thresh = ($opt_t) ? $opt_t : 0; + +# Number of stats to print before omitting +$omit_count = ($opt_n) ? $opt_n : 20; + + +# +# First copy everything up to the simulation statistics to a pair of +# temporary files, stripping out date-related items, and do a plain +# diff. Any differences in the arguments are not necessarily an issue; +# any differences in the program output should be caught by the EIO +# mechanism if an EIO file is used. +# + +# copy_header takes input filehandle and output filename + +sub copy_header +{ + my ($inhandle, $outname) = @_; + + open(OUTPUT, ">$outname") or die "Error: can't open $outname.\n"; + + while (<$inhandle>) + { + # strip out lines that can vary + next if /^(command line:|M5 compiled on |M5 simulation started |M5 executing on )/; + last if /Begin Simulation Statistics/; + print OUTPUT; + } + close OUTPUT; +} + +if ($opt_h) { + + # Diff header separately from stats + + $refheader = "/tmp/smt-test.refheader.$$"; + $newheader = "/tmp/smt-test.newheader.$$"; + + copy_header(\*REF, $refheader); + copy_header(\*NEW, $newheader); + + print "\n===== Header and program output differences =====\n\n"; + + print `diff $refheader $newheader`; + + print "\n===== Statistics differences =====\n\n"; +} + +# +# Now parse statistics +# + +# +# This function takes an open filehandle and returns a reference to +# a hash containing all the statistics variables and their values. +# +sub parse_file +{ + $stathandle = shift; + + $in_dist = undef; + $hashref = { }; # initialize hash for values + + while (<$stathandle>) + { + next if /^\s*$/; # skip blank lines + next if /^\*\*Ignore/; # temporary, to make totaling scripts easy for ISCA 03 + last if /End Simulation Statistics/; + + s/ *#.*//; # strip comments + + if (/^Memory usage: (\d+) KBytes/) { + $stat = 'memory usage'; + $value = $1; + } + elsif ($in_dist) { + if ($in_dist =~ /^fetch_loss_counters/) { + if (/^fetch_loss_counters_\d+\.end/) { + # end line of distribution: clear $in_dist flag + $in_dist = undef; + next; + } + else { + next if $opt_f; + + ($stat, $value) = /^(\S+)\s+(.*)/; + } + } + else { + if (/(.*)\.end_dist/) { + # end line of distribution: clear $in_dist flag + $in_dist = undef; + next; + } + if ($opt_d) { + next; # bail out if we are ignoring dists... + } + elsif (/(.*)\.(min|max)_value/) { + # treat these like normal stats + ($stat, $value) = /^(\S+)\s+(.*)/; + } + else { + # this is ugly because labels in the distribution + # buckets don't start in column 0 and may include + # embedded spaces + ($stat, $value) = + /^\s*(\S+(?:.*\S)?)\s+(\d+)\s+\d+\.\d+%/; + $stat = $in_dist . '::' . $stat; + } + } + } + else { + if (/(.*)\.start_dist/) { + # start line of distribution: set $in_dist flag + # and save distribution name for future reference + $in_dist = $1; + $stat = $1; + $value = 0; + } + elsif (/^(fetch_loss_counters_\d+)\.start/) { + # treat fetch loss counters like distribution, sort of + $in_dist = $1; + $stat = $1; + $value = 0; + } + else { + ($stat, $value) = /^(\S+)\s+(.*)/; + } + } + + $$hashref{$stat} = $value; + } + + close($stathandle); + return $hashref; +} + + +# +# pct_diff($old, $new) returns percent difference from $old to $new. +# +sub pct_diff +{ + my ($old, $new) = @_; + return ($old == 0) ? (($new == 0) ? 0 : 9999) : 100 * ($new - $old) / $old; +} + + +# +# Statistics to ignore: these relate to simulator performance, not +# correctness, so don't fail on changes here. +# +%ignore = ( + 'host_seconds' => 1, + 'host_tick_rate' => 1, + 'host_inst_rate' => 1, + 'host_mem_usage' => 1 +); + +# +# List of key statistics (always displayed) +# ==> list stats here WITHOUT trailing thread ID +# +@key_stat_list = ( + 'COM:IPC', + 'ISSUE:MSIPC', + 'COM:count', + 'host_inst_rate', + 'sim_insts', + 'sim_ticks', + 'host_mem_usage' +); + +$key_stat_pattern = join('|', @key_stat_list); + +# initialize first statistics from each file + +$max_err_mag = 0; + +$refhash = parse_file(\*REF); +$newhash = parse_file(\*NEW); + +# The string sim-smt prints on a divide by zero +$divbyzero = '<err: divide by zero>'; + +foreach $stat (sort keys %$refhash) +{ + $refvalue = $$refhash{$stat}; + $newvalue = $$newhash{$stat}; + + if (!defined($newvalue)) { + # stat missing from new file + push @missing_stats, $stat; + next; + } + + if ($stat =~ /($key_stat_pattern)/o) { + # key statistics: always record & display changes in these + push @key_stats, [$stat, $refvalue, $newvalue]; + } + + if ($ignore{$stat} or $refvalue eq $newvalue) { + # stat is in "ignore" list, or hasn't changed + } + else { + if ($refvalue eq $divbyzero || $newvalue eq $divbyzero) { + # one or the other was a divide by zero: + # no point in trying to quantify error + print "$stat: $refvalue --> $newvalue\n"; + } + else { + $reldiff = pct_diff($refvalue, $newvalue); + $diffmag = abs($reldiff); + + if ($diffmag > $err_thresh) { + push @errs, + [$stat, $refvalue, $newvalue, $reldiff]; + } + + if ($diffmag > $max_err_mag) { + $max_err_mag = $diffmag; + } + } + } + + # remove from new hash so we can detect added stats + delete $$newhash{$stat}; +} + + +# +# All done. Print comparison summary. +# + +printf("Maximum error magnitude: %+f%%\n\n", $max_err_mag); + +printf(" %-30s %10s %10s %10s %7s\n", ' ', 'Reference', 'New Value', 'Abs Diff', 'Pct Chg'); + +printf("Key statistics:\n\n"); + +foreach $key_stat (@key_stats) +{ + ($statname, $refvalue, $newvalue, $reldiff) = @$key_stat; + + # deduce format from reference value + $pointpos = rindex($refvalue, '.'); + $digits = ($pointpos < 0) ? 0 :(length($refvalue) - $pointpos - 1); + $fmt = "%10.${digits}f"; + + # print differing values with absolute and relative error + printf(" %-30s $fmt $fmt $fmt %+7.2f%%\n", + $statname, $refvalue, $newvalue, + $newvalue - $refvalue, pct_diff($refvalue, $newvalue)); +} + +printf("\nLargest $omit_count relative errors (> %d%%):\n\n", $err_thresh); + +$num_errs = 0; + +if ($opt_p) +{ + # sort differences by percent change + @errs = sort { abs($$b[3]) <=> abs($$a[3]) } @errs; +} + +foreach $err (@errs) +{ + ($statname, $refvalue, $newvalue, $reldiff) = @$err; + + # deduce format from reference value + $pointpos1 = rindex($refvalue, '.'); + $digits1 = ($pointpos1 < 0) ? 0 :(length($refvalue) - $pointpos1 - 1); + $pointpos2 = rindex($newvalue, '.'); + $digits2 = ($pointpos2 < 0) ? 0 :(length($newvalue) - $pointpos2 - 1); + $digits = ($digits1 > $digits2) ? $digits1 : $digits2; + $fmt = "%10.${digits}f"; + + # print differing values with absolute and relative error + printf(" %-30s $fmt $fmt $fmt %+7.2f%%\n", + $statname, $refvalue, $newvalue, $newvalue - $refvalue, $reldiff); + + # only print top N errors + if (++$num_errs >= $omit_count) + { + print "[... additional errors omitted ...]\n"; + last; + } +} + +# +# Report missing stats, but first filter out distribution buckets: +# these are mostly noise + +@missing_stats = grep { !/::(\d+|overflows)?$/ } @missing_stats; + +# get count +$missing_stats = scalar(@missing_stats); + +if ($missing_stats) +{ + print "\nMissing $missing_stats reference statistics:\n\n"; + foreach $stat (@missing_stats) + { +# print "\t$stat\n"; + printf " %-50s ", $stat; + print "$$refhash{$stat}\n"; + } +} + +# +# Any stats left in newhash are added since the reference file +# + +@added_stats = keys %$newhash; + +# first filter out distribution buckets: mostly noise + +@added_stats = grep { !/::(\d+|overflows)?$/ } @added_stats; + +# get count +$added_stats = scalar(@added_stats); + +if ($added_stats) +{ + print "\nFound $added_stats new statistics:\n\n"; + foreach $stat (sort @added_stats) + { +# print "\t$stat\n"; + printf " %-50s ", $stat; + print "$$newhash{$stat}\n"; + } +} + +cleanup(); +# Exit code is 0 if no stats error, 1 otherwise +$status = ($max_err_mag == 0.0) ? 0 : 1; +exit $status; + +sub cleanup +{ + unlink($refheader) if ($refheader); + unlink($newheader) if ($newheader); +} diff --git a/tests/test1/ref/alpha/atomic/config.ini b/tests/test1/ref/alpha/atomic/config.ini new file mode 100644 index 000000000..9961fe389 --- /dev/null +++ b/tests/test1/ref/alpha/atomic/config.ini @@ -0,0 +1,95 @@ +[root] +type=Root +children=system +checkpoint= +clock=1000000000000 +max_tick=0 +output_file=cout +progress_interval=0 + +[debug] +break_cycles= + +[exetrace] +intel_format=false +pc_symbol=true +print_cpseq=false +print_cycle=true +print_data=true +print_effaddr=true +print_fetchseq=false +print_iregs=false +print_opclass=true +print_thread=true +speculative=true +trace_system=client + +[serialize] +count=10 +cycle=0 +dir=cpt.%012d +period=0 + +[stats] +descriptions=true +dump_cycle=0 +dump_period=0 +dump_reset=false +ignore_events= +mysql_db= +mysql_host= +mysql_password= +mysql_user= +project_name=test +simulation_name=test +simulation_sample=0 +text_compat=true +text_file=m5stats.txt + +[system] +type=System +children=cpu0 physmem workload +mem_mode=atomic +physmem=system.physmem + +[system.cpu0] +type=AtomicSimpleCPU +children=mem +clock=1 +defer_registration=false +function_trace=false +function_trace_start=0 +max_insts_all_threads=0 +max_insts_any_thread=500000 +max_loads_all_threads=0 +max_loads_any_thread=0 +mem=system.cpu0.mem +simulate_stalls=false +system=system +width=1 +workload=system.workload + +[system.cpu0.mem] +type=Bus +bus_id=0 + +[system.physmem] +type=PhysicalMemory +file= +latency=1 + +[system.workload] +type=EioProcess +chkpt= +file=/z/ktlim2/clean/newmem-merge/tests/test-progs/anagram/bin/anagram-vshort.eio.gz +output=cout +system=system + +[trace] +bufsize=0 +dump_on_exit=false +file=cout +flags= +ignore= +start=0 + diff --git a/tests/test1/ref/alpha/atomic/config.out b/tests/test1/ref/alpha/atomic/config.out new file mode 100644 index 000000000..a9c04ceff --- /dev/null +++ b/tests/test1/ref/alpha/atomic/config.out @@ -0,0 +1,90 @@ +[root] +type=Root +clock=1000000000000 +max_tick=0 +progress_interval=0 +output_file=cout + +[system.physmem] +type=PhysicalMemory +file= +// range not specified +latency=1 + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.workload] +type=EioProcess +file=/z/ktlim2/clean/newmem-merge/tests/test-progs/anagram/bin/anagram-vshort.eio.gz +chkpt= +output=cout +system=system + +[system.cpu0.mem] +type=Bus +bus_id=0 + +[system.cpu0] +type=AtomicSimpleCPU +max_insts_any_thread=500000 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +mem=system.cpu0.mem +system=system +workload=system.workload +clock=1 +defer_registration=false +width=1 +function_trace=false +function_trace_start=0 +simulate_stalls=false + +[trace] +flags= +start=0 +bufsize=0 +file=cout +dump_on_exit=false +ignore= + +[stats] +descriptions=true +project_name=test +simulation_name=test +simulation_sample=0 +text_file=m5stats.txt +text_compat=true +mysql_db= +mysql_user= +mysql_password= +mysql_host= +events_start=-1 +dump_reset=false +dump_cycle=0 +dump_period=0 +ignore_events= + +[random] +seed=1 + +[exetrace] +speculative=true +print_cycle=true +print_opclass=true +print_thread=true +print_effaddr=true +print_data=true +print_iregs=false +print_fetchseq=false +print_cpseq=false +pc_symbol=true +intel_format=false +trace_system=client + +[debug] +break_cycles= + diff --git a/tests/test1/ref/alpha/atomic/m5stats.txt b/tests/test1/ref/alpha/atomic/m5stats.txt new file mode 100644 index 000000000..09e94d639 --- /dev/null +++ b/tests/test1/ref/alpha/atomic/m5stats.txt @@ -0,0 +1,18 @@ + +---------- Begin Simulation Statistics ---------- +host_inst_rate 1301768 # Simulator instruction rate (inst/s) +host_mem_usage 147756 # Number of bytes of host memory used +host_seconds 0.38 # Real time elapsed on the host +host_tick_rate 1300060 # Simulator tick rate (ticks/s) +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 500000 # Number of instructions simulated +sim_seconds 0.000000 # Number of seconds simulated +sim_ticks 499999 # Number of ticks simulated +system.cpu0.idle_fraction 0 # Percentage of idle cycles +system.cpu0.not_idle_fraction 1 # Percentage of non-idle cycles +system.cpu0.numCycles 500000 # number of cpu cycles simulated +system.cpu0.num_insts 500000 # Number of instructions executed +system.cpu0.num_refs 182204 # Number of memory references +system.workload.PROG:num_syscalls 18 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/test1/ref/alpha/atomic/stderr b/tests/test1/ref/alpha/atomic/stderr new file mode 100644 index 000000000..4e444fa6b --- /dev/null +++ b/tests/test1/ref/alpha/atomic/stderr @@ -0,0 +1,3 @@ +warn: Entering event queue @ 0. Starting simulation... + +gzip: stdout: Broken pipe diff --git a/tests/test1/ref/alpha/atomic/stdout b/tests/test1/ref/alpha/atomic/stdout new file mode 100644 index 000000000..78ab05bdd --- /dev/null +++ b/tests/test1/ref/alpha/atomic/stdout @@ -0,0 +1,14 @@ +main dictionary has 1245 entries +49508 bytes wasted +>M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Jul 19 2006 15:49:01 +M5 started Wed Jul 19 15:49:10 2006 +M5 executing on zamp.eecs.umich.edu +Creating SE system +Exiting @ tick 499999 because a thread reached the max instruction count diff --git a/tests/test1/ref/alpha/detailed/config.ini b/tests/test1/ref/alpha/detailed/config.ini new file mode 100644 index 000000000..192833c8b --- /dev/null +++ b/tests/test1/ref/alpha/detailed/config.ini @@ -0,0 +1,298 @@ +[root] +type=Root +children=system +checkpoint= +clock=1000000000000 +max_tick=0 +output_file=cout +progress_interval=0 + +[debug] +break_cycles= + +[exetrace] +intel_format=false +pc_symbol=true +print_cpseq=false +print_cycle=true +print_data=true +print_effaddr=true +print_fetchseq=false +print_iregs=false +print_opclass=true +print_thread=true +speculative=true +trace_system=client + +[serialize] +count=10 +cycle=0 +dir=cpt.%012d +period=0 + +[stats] +descriptions=true +dump_cycle=0 +dump_period=0 +dump_reset=false +ignore_events= +mysql_db= +mysql_host= +mysql_password= +mysql_user= +project_name=test +simulation_name=test +simulation_sample=0 +text_compat=true +text_file=m5stats.txt + +[system] +type=System +children=cpu0 physmem workload +mem_mode=atomic +physmem=system.physmem + +[system.cpu0] +type=DerivO3CPU +children=checker fuPool mem +BTBEntries=4096 +BTBTagSize=16 +LFSTSize=1024 +LQEntries=32 +RASSize=16 +SQEntries=32 +SSITSize=1024 +activity=0 +backComSize=5 +checker=system.cpu0.checker +choiceCtrBits=2 +choicePredictorSize=8192 +clock=1 +commitToDecodeDelay=1 +commitToFetchDelay=1 +commitToIEWDelay=1 +commitToRenameDelay=1 +commitWidth=8 +decodeToFetchDelay=1 +decodeToRenameDelay=1 +decodeWidth=8 +defer_registration=false +dispatchWidth=8 +fetchToDecodeDelay=1 +fetchWidth=8 +forwardComSize=5 +fuPool=system.cpu0.fuPool +function_trace=false +function_trace_start=0 +globalCtrBits=2 +globalHistoryBits=13 +globalPredictorSize=8192 +iewToCommitDelay=1 +iewToDecodeDelay=1 +iewToFetchDelay=1 +iewToRenameDelay=1 +instShiftAmt=2 +issueToExecuteDelay=1 +issueWidth=8 +localCtrBits=2 +localHistoryBits=11 +localHistoryTableSize=2048 +localPredictorSize=2048 +max_insts_all_threads=0 +max_insts_any_thread=500000 +max_loads_all_threads=0 +max_loads_any_thread=0 +mem=system.cpu0.mem +numIQEntries=64 +numPhysFloatRegs=256 +numPhysIntRegs=256 +numROBEntries=192 +numThreads=1 +predType=tournament +renameToDecodeDelay=1 +renameToFetchDelay=1 +renameToIEWDelay=2 +renameToROBDelay=1 +renameWidth=8 +squashWidth=8 +system=system +wbDepth=1 +wbWidth=8 +workload=system.workload + +[system.cpu0.checker] +type=O3Checker +clock=1 +defer_registration=false +exitOnError=true +function_trace=false +function_trace_start=0 +max_insts_all_threads=0 +max_insts_any_thread=0 +max_loads_all_threads=0 +max_loads_any_thread=0 +system=system +warnOnlyOnLoadError=false +workload=system.workload + +[system.cpu0.fuPool] +type=FUPool +children=FUList0 FUList1 FUList2 FUList3 FUList4 FUList5 FUList6 FUList7 +FUList=system.cpu0.fuPool.FUList0 system.cpu0.fuPool.FUList1 system.cpu0.fuPool.FUList2 system.cpu0.fuPool.FUList3 system.cpu0.fuPool.FUList4 system.cpu0.fuPool.FUList5 system.cpu0.fuPool.FUList6 system.cpu0.fuPool.FUList7 + +[system.cpu0.fuPool.FUList0] +type=FUDesc +children=opList0 +count=6 +opList=system.cpu0.fuPool.FUList0.opList0 + +[system.cpu0.fuPool.FUList0.opList0] +type=OpDesc +issueLat=1 +opClass=IntAlu +opLat=1 + +[system.cpu0.fuPool.FUList1] +type=FUDesc +children=opList0 opList1 +count=2 +opList=system.cpu0.fuPool.FUList1.opList0 system.cpu0.fuPool.FUList1.opList1 + +[system.cpu0.fuPool.FUList1.opList0] +type=OpDesc +issueLat=1 +opClass=IntMult +opLat=3 + +[system.cpu0.fuPool.FUList1.opList1] +type=OpDesc +issueLat=19 +opClass=IntDiv +opLat=20 + +[system.cpu0.fuPool.FUList2] +type=FUDesc +children=opList0 opList1 opList2 +count=4 +opList=system.cpu0.fuPool.FUList2.opList0 system.cpu0.fuPool.FUList2.opList1 system.cpu0.fuPool.FUList2.opList2 + +[system.cpu0.fuPool.FUList2.opList0] +type=OpDesc +issueLat=1 +opClass=FloatAdd +opLat=2 + +[system.cpu0.fuPool.FUList2.opList1] +type=OpDesc +issueLat=1 +opClass=FloatCmp +opLat=2 + +[system.cpu0.fuPool.FUList2.opList2] +type=OpDesc +issueLat=1 +opClass=FloatCvt +opLat=2 + +[system.cpu0.fuPool.FUList3] +type=FUDesc +children=opList0 opList1 opList2 +count=2 +opList=system.cpu0.fuPool.FUList3.opList0 system.cpu0.fuPool.FUList3.opList1 system.cpu0.fuPool.FUList3.opList2 + +[system.cpu0.fuPool.FUList3.opList0] +type=OpDesc +issueLat=1 +opClass=FloatMult +opLat=4 + +[system.cpu0.fuPool.FUList3.opList1] +type=OpDesc +issueLat=12 +opClass=FloatDiv +opLat=12 + +[system.cpu0.fuPool.FUList3.opList2] +type=OpDesc +issueLat=24 +opClass=FloatSqrt +opLat=24 + +[system.cpu0.fuPool.FUList4] +type=FUDesc +children=opList0 +count=0 +opList=system.cpu0.fuPool.FUList4.opList0 + +[system.cpu0.fuPool.FUList4.opList0] +type=OpDesc +issueLat=1 +opClass=MemRead +opLat=1 + +[system.cpu0.fuPool.FUList5] +type=FUDesc +children=opList0 +count=0 +opList=system.cpu0.fuPool.FUList5.opList0 + +[system.cpu0.fuPool.FUList5.opList0] +type=OpDesc +issueLat=1 +opClass=MemWrite +opLat=1 + +[system.cpu0.fuPool.FUList6] +type=FUDesc +children=opList0 opList1 +count=4 +opList=system.cpu0.fuPool.FUList6.opList0 system.cpu0.fuPool.FUList6.opList1 + +[system.cpu0.fuPool.FUList6.opList0] +type=OpDesc +issueLat=1 +opClass=MemRead +opLat=1 + +[system.cpu0.fuPool.FUList6.opList1] +type=OpDesc +issueLat=1 +opClass=MemWrite +opLat=1 + +[system.cpu0.fuPool.FUList7] +type=FUDesc +children=opList0 +count=1 +opList=system.cpu0.fuPool.FUList7.opList0 + +[system.cpu0.fuPool.FUList7.opList0] +type=OpDesc +issueLat=3 +opClass=IprAccess +opLat=3 + +[system.cpu0.mem] +type=Bus +bus_id=0 + +[system.physmem] +type=PhysicalMemory +file= +latency=1 + +[system.workload] +type=EioProcess +chkpt= +file=/z/ktlim2/clean/newmem-merge/tests/test-progs/anagram/bin/anagram-vshort.eio.gz +output=cout +system=system + +[trace] +bufsize=0 +dump_on_exit=false +file=cout +flags= +ignore= +start=0 + diff --git a/tests/test1/ref/alpha/detailed/config.out b/tests/test1/ref/alpha/detailed/config.out new file mode 100644 index 000000000..07c092a2b --- /dev/null +++ b/tests/test1/ref/alpha/detailed/config.out @@ -0,0 +1,293 @@ +[root] +type=Root +clock=1000000000000 +max_tick=0 +progress_interval=0 +output_file=cout + +[system.physmem] +type=PhysicalMemory +file= +// range not specified +latency=1 + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.workload] +type=EioProcess +file=/z/ktlim2/clean/newmem-merge/tests/test-progs/anagram/bin/anagram-vshort.eio.gz +chkpt= +output=cout +system=system + +[system.cpu0.mem] +type=Bus +bus_id=0 + +[system.cpu0.checker] +type=O3Checker +max_insts_any_thread=0 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +workload=system.workload +clock=1 +defer_registration=false +exitOnError=true +warnOnlyOnLoadError=false +function_trace=false +function_trace_start=0 + +[system.cpu0.fuPool.FUList0.opList0] +type=OpDesc +opClass=IntAlu +opLat=1 +issueLat=1 + +[system.cpu0.fuPool.FUList0] +type=FUDesc +opList=system.cpu0.fuPool.FUList0.opList0 +count=6 + +[system.cpu0.fuPool.FUList1.opList0] +type=OpDesc +opClass=IntMult +opLat=3 +issueLat=1 + +[system.cpu0.fuPool.FUList1.opList1] +type=OpDesc +opClass=IntDiv +opLat=20 +issueLat=19 + +[system.cpu0.fuPool.FUList1] +type=FUDesc +opList=system.cpu0.fuPool.FUList1.opList0 system.cpu0.fuPool.FUList1.opList1 +count=2 + +[system.cpu0.fuPool.FUList2.opList0] +type=OpDesc +opClass=FloatAdd +opLat=2 +issueLat=1 + +[system.cpu0.fuPool.FUList2.opList1] +type=OpDesc +opClass=FloatCmp +opLat=2 +issueLat=1 + +[system.cpu0.fuPool.FUList2.opList2] +type=OpDesc +opClass=FloatCvt +opLat=2 +issueLat=1 + +[system.cpu0.fuPool.FUList2] +type=FUDesc +opList=system.cpu0.fuPool.FUList2.opList0 system.cpu0.fuPool.FUList2.opList1 system.cpu0.fuPool.FUList2.opList2 +count=4 + +[system.cpu0.fuPool.FUList3.opList0] +type=OpDesc +opClass=FloatMult +opLat=4 +issueLat=1 + +[system.cpu0.fuPool.FUList3.opList1] +type=OpDesc +opClass=FloatDiv +opLat=12 +issueLat=12 + +[system.cpu0.fuPool.FUList3.opList2] +type=OpDesc +opClass=FloatSqrt +opLat=24 +issueLat=24 + +[system.cpu0.fuPool.FUList3] +type=FUDesc +opList=system.cpu0.fuPool.FUList3.opList0 system.cpu0.fuPool.FUList3.opList1 system.cpu0.fuPool.FUList3.opList2 +count=2 + +[system.cpu0.fuPool.FUList4.opList0] +type=OpDesc +opClass=MemRead +opLat=1 +issueLat=1 + +[system.cpu0.fuPool.FUList4] +type=FUDesc +opList=system.cpu0.fuPool.FUList4.opList0 +count=0 + +[system.cpu0.fuPool.FUList5.opList0] +type=OpDesc +opClass=MemWrite +opLat=1 +issueLat=1 + +[system.cpu0.fuPool.FUList5] +type=FUDesc +opList=system.cpu0.fuPool.FUList5.opList0 +count=0 + +[system.cpu0.fuPool.FUList6.opList0] +type=OpDesc +opClass=MemRead +opLat=1 +issueLat=1 + +[system.cpu0.fuPool.FUList6.opList1] +type=OpDesc +opClass=MemWrite +opLat=1 +issueLat=1 + +[system.cpu0.fuPool.FUList6] +type=FUDesc +opList=system.cpu0.fuPool.FUList6.opList0 system.cpu0.fuPool.FUList6.opList1 +count=4 + +[system.cpu0.fuPool.FUList7.opList0] +type=OpDesc +opClass=IprAccess +opLat=3 +issueLat=3 + +[system.cpu0.fuPool.FUList7] +type=FUDesc +opList=system.cpu0.fuPool.FUList7.opList0 +count=1 + +[system.cpu0.fuPool] +type=FUPool +FUList=system.cpu0.fuPool.FUList0 system.cpu0.fuPool.FUList1 system.cpu0.fuPool.FUList2 system.cpu0.fuPool.FUList3 system.cpu0.fuPool.FUList4 system.cpu0.fuPool.FUList5 system.cpu0.fuPool.FUList6 system.cpu0.fuPool.FUList7 + +[system.cpu0] +type=DerivO3CPU +clock=1 +numThreads=1 +activity=0 +workload=system.workload +mem=system.cpu0.mem +checker=system.cpu0.checker +max_insts_any_thread=500000 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +cachePorts=200 +decodeToFetchDelay=1 +renameToFetchDelay=1 +iewToFetchDelay=1 +commitToFetchDelay=1 +fetchWidth=8 +renameToDecodeDelay=1 +iewToDecodeDelay=1 +commitToDecodeDelay=1 +fetchToDecodeDelay=1 +decodeWidth=8 +iewToRenameDelay=1 +commitToRenameDelay=1 +decodeToRenameDelay=1 +renameWidth=8 +commitToIEWDelay=1 +renameToIEWDelay=2 +issueToExecuteDelay=1 +dispatchWidth=8 +issueWidth=8 +wbWidth=8 +wbDepth=1 +fuPool=system.cpu0.fuPool +iewToCommitDelay=1 +renameToROBDelay=1 +commitWidth=8 +squashWidth=8 +trapLatency=6 +backComSize=5 +forwardComSize=5 +predType=tournament +localPredictorSize=2048 +localCtrBits=2 +localHistoryTableSize=2048 +localHistoryBits=11 +globalPredictorSize=8192 +globalCtrBits=2 +globalHistoryBits=13 +choicePredictorSize=8192 +choiceCtrBits=2 +BTBEntries=4096 +BTBTagSize=16 +RASSize=16 +LQEntries=32 +SQEntries=32 +LFSTSize=1024 +SSITSize=1024 +numPhysIntRegs=256 +numPhysFloatRegs=256 +numIQEntries=64 +numROBEntries=192 +smtNumFetchingThreads=1 +smtFetchPolicy=SingleThread +smtLSQPolicy=Partitioned +smtLSQThreshold=100 +smtIQPolicy=Partitioned +smtIQThreshold=100 +smtROBPolicy=Partitioned +smtROBThreshold=100 +smtCommitPolicy=RoundRobin +instShiftAmt=2 +defer_registration=false +function_trace=false +function_trace_start=0 + +[trace] +flags= +start=0 +bufsize=0 +file=cout +dump_on_exit=false +ignore= + +[stats] +descriptions=true +project_name=test +simulation_name=test +simulation_sample=0 +text_file=m5stats.txt +text_compat=true +mysql_db= +mysql_user= +mysql_password= +mysql_host= +events_start=-1 +dump_reset=false +dump_cycle=0 +dump_period=0 +ignore_events= + +[random] +seed=1 + +[exetrace] +speculative=true +print_cycle=true +print_opclass=true +print_thread=true +print_effaddr=true +print_data=true +print_iregs=false +print_fetchseq=false +print_cpseq=false +pc_symbol=true +intel_format=false +trace_system=client + +[debug] +break_cycles= + diff --git a/tests/test1/ref/alpha/detailed/m5stats.txt b/tests/test1/ref/alpha/detailed/m5stats.txt new file mode 100644 index 000000000..8ff727085 --- /dev/null +++ b/tests/test1/ref/alpha/detailed/m5stats.txt @@ -0,0 +1,1775 @@ + +---------- Begin Simulation Statistics ---------- +global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. +global.BPredUnit.BTBHits 45390 # Number of BTB hits +global.BPredUnit.BTBLookups 59902 # Number of BTB lookups +global.BPredUnit.RASInCorrect 85 # Number of incorrect RAS predictions. +global.BPredUnit.condIncorrect 3098 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 46029 # Number of conditional branches predicted +global.BPredUnit.lookups 70231 # Number of BP lookups +global.BPredUnit.usedRAS 7755 # Number of times the RAS was used to get a target. +host_inst_rate 69741 # Simulator instruction rate (inst/s) +host_mem_usage 148316 # Number of bytes of host memory used +host_seconds 7.17 # Real time elapsed on the host +host_tick_rate 36160 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 15235 # Number of conflicting loads. +memdepunit.memDep.conflictingStores 2693 # Number of conflicting stores. +memdepunit.memDep.insertedLoads 145639 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 60928 # Number of stores inserted to the mem dependence unit. +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 500002 # Number of instructions simulated +sim_seconds 0.000000 # Number of seconds simulated +sim_ticks 259259 # Number of ticks simulated +system.cpu0.checker.numCycles 518940 # number of cpu cycles simulated +system.cpu0.commit.COM:branches 61160 # Number of branches committed +system.cpu0.commit.COM:bw_lim_events 17172 # number cycles where commit BW limit reached +system.cpu0.commit.COM:bw_limited 0 # number of insts not committed due to BW limits +system.cpu0.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle +system.cpu0.commit.COM:committed_per_cycle.samples 251997 +system.cpu0.commit.COM:committed_per_cycle.min_value 0 + 0 70509 2798.01% + 1 75489 2995.63% + 2 28876 1145.89% + 3 23224 921.60% + 4 21222 842.15% + 5 3198 126.91% + 6 8368 332.07% + 7 3939 156.31% + 8 17172 681.44% +system.cpu0.commit.COM:committed_per_cycle.max_value 8 +system.cpu0.commit.COM:committed_per_cycle.end_dist + +system.cpu0.commit.COM:count 518948 # Number of instructions committed +system.cpu0.commit.COM:loads 131376 # Number of loads committed +system.cpu0.commit.COM:membars 0 # Number of memory barriers committed +system.cpu0.commit.COM:refs 189772 # Number of memory references committed +system.cpu0.commit.COM:swp_count 0 # Number of s/w prefetches committed +system.cpu0.commit.branchMispredicts 2836 # The number of times a branch was mispredicted +system.cpu0.commit.commitCommittedInsts 518948 # The number of committed instructions +system.cpu0.commit.commitNonSpecStalls 18 # The number of times commit has been forced to stall to communicate backwards +system.cpu0.commit.commitSquashedInsts 44297 # The number of squashed insts skipped by commit +system.cpu0.committedInsts 500002 # Number of Instructions Simulated +system.cpu0.committedInsts_total 500002 # Number of Instructions Simulated +system.cpu0.cpi 0.518516 # CPI: Cycles Per Instruction +system.cpu0.cpi_total 0.518516 # CPI: Total CPI of All Threads +system.cpu0.decode.DECODE:BlockedCycles 743 # Number of cycles decode is blocked +system.cpu0.decode.DECODE:BranchMispred 281 # Number of times decode detected a branch misprediction +system.cpu0.decode.DECODE:BranchResolved 16033 # Number of times decode resolved a branch +system.cpu0.decode.DECODE:DecodedInsts 586219 # Number of instructions handled by decode +system.cpu0.decode.DECODE:IdleCycles 143055 # Number of cycles decode is idle +system.cpu0.decode.DECODE:RunCycles 108199 # Number of cycles decode is running +system.cpu0.decode.DECODE:SquashCycles 7263 # Number of cycles decode is squashing +system.cpu0.decode.DECODE:SquashedInsts 989 # Number of squashed instructions handled by decode +system.cpu0.fetch.Branches 70231 # Number of branches that fetch encountered +system.cpu0.fetch.CacheLines 71036 # Number of cache lines fetched +system.cpu0.fetch.Cycles 180480 # Number of cycles fetch has run and was not squashing or blocked +system.cpu0.fetch.IcacheSquashes 962 # Number of outstanding Icache misses that were squashed +system.cpu0.fetch.Insts 594968 # Number of instructions fetch has processed +system.cpu0.fetch.SquashCycles 3140 # Number of cycles fetch has spent squashing +system.cpu0.fetch.branchRate 0.270890 # Number of branch fetches per cycle +system.cpu0.fetch.icacheStallCycles 71036 # Number of cycles fetch is stalled on an Icache miss +system.cpu0.fetch.predictedBranches 53145 # Number of branches that fetch has predicted taken +system.cpu0.fetch.rate 2.294870 # Number of inst fetches per cycle +system.cpu0.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) +system.cpu0.fetch.rateDist.samples 259260 +system.cpu0.fetch.rateDist.min_value 0 + 0 149817 5778.64% + 1 3603 138.97% + 2 9058 349.38% + 3 10685 412.13% + 4 8455 326.12% + 5 18775 724.18% + 6 25664 989.89% + 7 6109 235.63% + 8 27094 1045.05% +system.cpu0.fetch.rateDist.max_value 8 +system.cpu0.fetch.rateDist.end_dist + +system.cpu0.iew.EXEC:branches 64672 # Number of branches executed +system.cpu0.iew.EXEC:insts 526242 # Number of executed instructions +system.cpu0.iew.EXEC:loads 140576 # Number of load instructions executed +system.cpu0.iew.EXEC:nop 19405 # number of nop insts executed +system.cpu0.iew.EXEC:rate 2.029785 # Inst execution rate +system.cpu0.iew.EXEC:refs 200121 # number of memory reference insts executed +system.cpu0.iew.EXEC:squashedInsts 5760 # Number of squashed instructions skipped in execute +system.cpu0.iew.EXEC:stores 59545 # Number of stores executed +system.cpu0.iew.EXEC:swp 0 # number of swp insts executed +system.cpu0.iew.WB:consumers 394903 # num instructions consuming a value +system.cpu0.iew.WB:count 523588 # cumulative count of insts written-back +system.cpu0.iew.WB:fanout 0.746115 # average fanout of values written-back +system.cpu0.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ +system.cpu0.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ +system.cpu0.iew.WB:producers 294643 # num instructions producing a value +system.cpu0.iew.WB:rate 2.019548 # insts written-back per cycle +system.cpu0.iew.WB:sent 524223 # cumulative count of insts sent to commit +system.cpu0.iew.branchMispredicts 2948 # Number of branch mispredicts detected at execute +system.cpu0.iew.iewBlockCycles 0 # Number of cycles IEW is blocking +system.cpu0.iew.iewDispLoadInsts 145639 # Number of dispatched load instructions +system.cpu0.iew.iewDispNonSpecInsts 27 # Number of dispatched non-speculative instructions +system.cpu0.iew.iewDispSquashedInsts 1523 # Number of squashed instructions skipped by dispatch +system.cpu0.iew.iewDispStoreInsts 60928 # Number of dispatched store instructions +system.cpu0.iew.iewDispatchedInsts 563297 # Number of instructions dispatched to IQ +system.cpu0.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall +system.cpu0.iew.iewIdleCycles 0 # Number of cycles IEW is idle +system.cpu0.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall +system.cpu0.iew.iewSquashCycles 7263 # Number of cycles IEW is squashing +system.cpu0.iew.iewUnblockCycles 0 # Number of cycles IEW is unblocking +system.cpu0.iew.lsq.thread.0.blockedLoads 1 # Number of blocked loads due to partial load-store forwarding +system.cpu0.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked +system.cpu0.iew.lsq.thread.0.forwLoads 18223 # Number of loads that had data forwarded from stores +system.cpu0.iew.lsq.thread.0.ignoredResponses 9 # Number of memory responses ignored because the instruction is squashed +system.cpu0.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address +system.cpu0.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address +system.cpu0.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled +system.cpu0.iew.lsq.thread.0.squashedLoads 14246 # Number of loads squashed +system.cpu0.iew.lsq.thread.0.squashedStores 2528 # Number of stores squashed +system.cpu0.iew.memOrderViolationEvents 44 # Number of memory order violations +system.cpu0.iew.predictedNotTakenIncorrect 1750 # Number of branches that were predicted not taken incorrectly +system.cpu0.iew.predictedTakenIncorrect 1198 # Number of branches that were predicted taken incorrectly +system.cpu0.ipc 1.928581 # IPC: Instructions Per Cycle +system.cpu0.ipc_total 1.928581 # IPC: Total IPC of All Threads +system.cpu0.iq.IQ:residence:(null).start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:(null).samples 0 +system.cpu0.iq.IQ:residence:(null).min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:(null).max_value 0 +system.cpu0.iq.IQ:residence:(null).end_dist + +system.cpu0.iq.IQ:residence:IntAlu.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:IntAlu.samples 0 +system.cpu0.iq.IQ:residence:IntAlu.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:IntAlu.max_value 0 +system.cpu0.iq.IQ:residence:IntAlu.end_dist + +system.cpu0.iq.IQ:residence:IntMult.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:IntMult.samples 0 +system.cpu0.iq.IQ:residence:IntMult.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:IntMult.max_value 0 +system.cpu0.iq.IQ:residence:IntMult.end_dist + +system.cpu0.iq.IQ:residence:IntDiv.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:IntDiv.samples 0 +system.cpu0.iq.IQ:residence:IntDiv.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:IntDiv.max_value 0 +system.cpu0.iq.IQ:residence:IntDiv.end_dist + +system.cpu0.iq.IQ:residence:FloatAdd.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:FloatAdd.samples 0 +system.cpu0.iq.IQ:residence:FloatAdd.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:FloatAdd.max_value 0 +system.cpu0.iq.IQ:residence:FloatAdd.end_dist + +system.cpu0.iq.IQ:residence:FloatCmp.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:FloatCmp.samples 0 +system.cpu0.iq.IQ:residence:FloatCmp.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:FloatCmp.max_value 0 +system.cpu0.iq.IQ:residence:FloatCmp.end_dist + +system.cpu0.iq.IQ:residence:FloatCvt.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:FloatCvt.samples 0 +system.cpu0.iq.IQ:residence:FloatCvt.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:FloatCvt.max_value 0 +system.cpu0.iq.IQ:residence:FloatCvt.end_dist + +system.cpu0.iq.IQ:residence:FloatMult.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:FloatMult.samples 0 +system.cpu0.iq.IQ:residence:FloatMult.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:FloatMult.max_value 0 +system.cpu0.iq.IQ:residence:FloatMult.end_dist + +system.cpu0.iq.IQ:residence:FloatDiv.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:FloatDiv.samples 0 +system.cpu0.iq.IQ:residence:FloatDiv.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:FloatDiv.max_value 0 +system.cpu0.iq.IQ:residence:FloatDiv.end_dist + +system.cpu0.iq.IQ:residence:FloatSqrt.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:FloatSqrt.samples 0 +system.cpu0.iq.IQ:residence:FloatSqrt.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:FloatSqrt.max_value 0 +system.cpu0.iq.IQ:residence:FloatSqrt.end_dist + +system.cpu0.iq.IQ:residence:MemRead.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:MemRead.samples 0 +system.cpu0.iq.IQ:residence:MemRead.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:MemRead.max_value 0 +system.cpu0.iq.IQ:residence:MemRead.end_dist + +system.cpu0.iq.IQ:residence:MemWrite.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:MemWrite.samples 0 +system.cpu0.iq.IQ:residence:MemWrite.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:MemWrite.max_value 0 +system.cpu0.iq.IQ:residence:MemWrite.end_dist + +system.cpu0.iq.IQ:residence:IprAccess.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:IprAccess.samples 0 +system.cpu0.iq.IQ:residence:IprAccess.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:IprAccess.max_value 0 +system.cpu0.iq.IQ:residence:IprAccess.end_dist + +system.cpu0.iq.IQ:residence:InstPrefetch.start_dist # cycles from dispatch to issue +system.cpu0.iq.IQ:residence:InstPrefetch.samples 0 +system.cpu0.iq.IQ:residence:InstPrefetch.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.IQ:residence:InstPrefetch.max_value 0 +system.cpu0.iq.IQ:residence:InstPrefetch.end_dist + +system.cpu0.iq.ISSUE:(null)_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:(null)_delay.samples 0 +system.cpu0.iq.ISSUE:(null)_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:(null)_delay.max_value 0 +system.cpu0.iq.ISSUE:(null)_delay.end_dist + +system.cpu0.iq.ISSUE:IntAlu_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:IntAlu_delay.samples 0 +system.cpu0.iq.ISSUE:IntAlu_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:IntAlu_delay.max_value 0 +system.cpu0.iq.ISSUE:IntAlu_delay.end_dist + +system.cpu0.iq.ISSUE:IntMult_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:IntMult_delay.samples 0 +system.cpu0.iq.ISSUE:IntMult_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:IntMult_delay.max_value 0 +system.cpu0.iq.ISSUE:IntMult_delay.end_dist + +system.cpu0.iq.ISSUE:IntDiv_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:IntDiv_delay.samples 0 +system.cpu0.iq.ISSUE:IntDiv_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:IntDiv_delay.max_value 0 +system.cpu0.iq.ISSUE:IntDiv_delay.end_dist + +system.cpu0.iq.ISSUE:FloatAdd_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:FloatAdd_delay.samples 0 +system.cpu0.iq.ISSUE:FloatAdd_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:FloatAdd_delay.max_value 0 +system.cpu0.iq.ISSUE:FloatAdd_delay.end_dist + +system.cpu0.iq.ISSUE:FloatCmp_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:FloatCmp_delay.samples 0 +system.cpu0.iq.ISSUE:FloatCmp_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:FloatCmp_delay.max_value 0 +system.cpu0.iq.ISSUE:FloatCmp_delay.end_dist + +system.cpu0.iq.ISSUE:FloatCvt_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:FloatCvt_delay.samples 0 +system.cpu0.iq.ISSUE:FloatCvt_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:FloatCvt_delay.max_value 0 +system.cpu0.iq.ISSUE:FloatCvt_delay.end_dist + +system.cpu0.iq.ISSUE:FloatMult_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:FloatMult_delay.samples 0 +system.cpu0.iq.ISSUE:FloatMult_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:FloatMult_delay.max_value 0 +system.cpu0.iq.ISSUE:FloatMult_delay.end_dist + +system.cpu0.iq.ISSUE:FloatDiv_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:FloatDiv_delay.samples 0 +system.cpu0.iq.ISSUE:FloatDiv_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:FloatDiv_delay.max_value 0 +system.cpu0.iq.ISSUE:FloatDiv_delay.end_dist + +system.cpu0.iq.ISSUE:FloatSqrt_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:FloatSqrt_delay.samples 0 +system.cpu0.iq.ISSUE:FloatSqrt_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:FloatSqrt_delay.max_value 0 +system.cpu0.iq.ISSUE:FloatSqrt_delay.end_dist + +system.cpu0.iq.ISSUE:MemRead_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:MemRead_delay.samples 0 +system.cpu0.iq.ISSUE:MemRead_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:MemRead_delay.max_value 0 +system.cpu0.iq.ISSUE:MemRead_delay.end_dist + +system.cpu0.iq.ISSUE:MemWrite_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:MemWrite_delay.samples 0 +system.cpu0.iq.ISSUE:MemWrite_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:MemWrite_delay.max_value 0 +system.cpu0.iq.ISSUE:MemWrite_delay.end_dist + +system.cpu0.iq.ISSUE:IprAccess_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:IprAccess_delay.samples 0 +system.cpu0.iq.ISSUE:IprAccess_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:IprAccess_delay.max_value 0 +system.cpu0.iq.ISSUE:IprAccess_delay.end_dist + +system.cpu0.iq.ISSUE:InstPrefetch_delay.start_dist # cycles from operands ready to issue +system.cpu0.iq.ISSUE:InstPrefetch_delay.samples 0 +system.cpu0.iq.ISSUE:InstPrefetch_delay.min_value 0 + 0 0 + 2 0 + 4 0 + 6 0 + 8 0 + 10 0 + 12 0 + 14 0 + 16 0 + 18 0 + 20 0 + 22 0 + 24 0 + 26 0 + 28 0 + 30 0 + 32 0 + 34 0 + 36 0 + 38 0 + 40 0 + 42 0 + 44 0 + 46 0 + 48 0 + 50 0 + 52 0 + 54 0 + 56 0 + 58 0 + 60 0 + 62 0 + 64 0 + 66 0 + 68 0 + 70 0 + 72 0 + 74 0 + 76 0 + 78 0 + 80 0 + 82 0 + 84 0 + 86 0 + 88 0 + 90 0 + 92 0 + 94 0 + 96 0 + 98 0 +system.cpu0.iq.ISSUE:InstPrefetch_delay.max_value 0 +system.cpu0.iq.ISSUE:InstPrefetch_delay.end_dist + +system.cpu0.iq.ISSUE:FU_type_0 532005 # Type of FU issued +system.cpu0.iq.ISSUE:FU_type_0.start_dist + (null) 0 0.00% # Type of FU issued + IntAlu 329259 61.89% # Type of FU issued + IntMult 10 0.00% # Type of FU issued + IntDiv 0 0.00% # Type of FU issued + FloatAdd 13 0.00% # Type of FU issued + FloatCmp 3 0.00% # Type of FU issued + FloatCvt 0 0.00% # Type of FU issued + FloatMult 2 0.00% # Type of FU issued + FloatDiv 0 0.00% # Type of FU issued + FloatSqrt 0 0.00% # Type of FU issued + MemRead 142868 26.85% # Type of FU issued + MemWrite 59850 11.25% # Type of FU issued + IprAccess 0 0.00% # Type of FU issued + InstPrefetch 0 0.00% # Type of FU issued +system.cpu0.iq.ISSUE:FU_type_0.end_dist +system.cpu0.iq.ISSUE:fu_busy_cnt 5510 # FU busy when requested +system.cpu0.iq.ISSUE:fu_busy_rate 0.010357 # FU busy rate (busy events/executed inst) +system.cpu0.iq.ISSUE:fu_full.start_dist + (null) 0 0.00% # attempts to use FU when none available + IntAlu 1663 30.18% # attempts to use FU when none available + IntMult 0 0.00% # attempts to use FU when none available + IntDiv 0 0.00% # attempts to use FU when none available + FloatAdd 0 0.00% # attempts to use FU when none available + FloatCmp 0 0.00% # attempts to use FU when none available + FloatCvt 0 0.00% # attempts to use FU when none available + FloatMult 0 0.00% # attempts to use FU when none available + FloatDiv 0 0.00% # attempts to use FU when none available + FloatSqrt 0 0.00% # attempts to use FU when none available + MemRead 2693 48.87% # attempts to use FU when none available + MemWrite 1154 20.94% # attempts to use FU when none available + IprAccess 0 0.00% # attempts to use FU when none available + InstPrefetch 0 0.00% # attempts to use FU when none available +system.cpu0.iq.ISSUE:fu_full.end_dist +system.cpu0.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle +system.cpu0.iq.ISSUE:issued_per_cycle.samples 259260 +system.cpu0.iq.ISSUE:issued_per_cycle.min_value 0 + 0 59185 2282.84% + 1 72964 2814.32% + 2 38364 1479.75% + 3 33144 1278.41% + 4 19818 764.41% + 5 14624 564.07% + 6 18233 703.27% + 7 2333 89.99% + 8 595 22.95% +system.cpu0.iq.ISSUE:issued_per_cycle.max_value 8 +system.cpu0.iq.ISSUE:issued_per_cycle.end_dist + +system.cpu0.iq.ISSUE:rate 2.052013 # Inst issue rate +system.cpu0.iq.iqInstsAdded 543865 # Number of instructions added to the IQ (excludes non-spec) +system.cpu0.iq.iqInstsIssued 532005 # Number of instructions issued +system.cpu0.iq.iqNonSpecInstsAdded 27 # Number of non-speculative instructions added to the IQ +system.cpu0.iq.iqSquashedInstsExamined 42716 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu0.iq.iqSquashedInstsIssued 611 # Number of squashed instructions issued +system.cpu0.iq.iqSquashedNonSpecRemoved 9 # Number of squashed non-spec instructions that were removed +system.cpu0.iq.iqSquashedOperandsExamined 21818 # Number of squashed operands that are examined and possibly removed from graph +system.cpu0.numCycles 259260 # number of cpu cycles simulated +system.cpu0.rename.RENAME:BlockCycles 191 # Number of cycles rename is blocking +system.cpu0.rename.RENAME:CommittedMaps 386063 # Number of HB maps that are committed +system.cpu0.rename.RENAME:IdleCycles 144885 # Number of cycles rename is idle +system.cpu0.rename.RENAME:LSQFullEvents 336 # Number of times rename has blocked due to LSQ full +system.cpu0.rename.RENAME:RenameLookups 753146 # Number of register rename lookups that rename has made +system.cpu0.rename.RENAME:RenamedInsts 577319 # Number of instructions processed by rename +system.cpu0.rename.RENAME:RenamedOperands 432146 # Number of destination operands rename has renamed +system.cpu0.rename.RENAME:RunCycles 106374 # Number of cycles rename is running +system.cpu0.rename.RENAME:SquashCycles 7263 # Number of cycles rename is squashing +system.cpu0.rename.RENAME:UnblockCycles 302 # Number of cycles rename is unblocking +system.cpu0.rename.RENAME:UndoneMaps 46034 # Number of HB maps that are undone due to squashing +system.cpu0.rename.RENAME:serializeStallCycles 245 # count of cycles rename stalled for serializing inst +system.cpu0.rename.RENAME:serializingInsts 34 # count of serializing insts renamed +system.cpu0.rename.RENAME:skidInsts 421 # count of insts added to the skid buffer +system.cpu0.rename.RENAME:tempSerializingInsts 32 # count of temporary serializing insts renamed +system.workload.PROG:num_syscalls 18 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/test1/ref/alpha/detailed/stderr b/tests/test1/ref/alpha/detailed/stderr new file mode 100644 index 000000000..2b973e482 --- /dev/null +++ b/tests/test1/ref/alpha/detailed/stderr @@ -0,0 +1,4 @@ +warn: Entering event queue @ 0. Starting simulation... +warn: cycle 0: fault (1) detected @ PC 0x000000 + +gzip: stdout: Broken pipe diff --git a/tests/test1/ref/alpha/detailed/stdout b/tests/test1/ref/alpha/detailed/stdout new file mode 100644 index 000000000..2c46aa4f2 --- /dev/null +++ b/tests/test1/ref/alpha/detailed/stdout @@ -0,0 +1,14 @@ +main dictionary has 1245 entries +49508 bytes wasted +>M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Jul 19 2006 15:49:01 +M5 started Wed Jul 19 15:49:12 2006 +M5 executing on zamp.eecs.umich.edu +Creating SE system +Exiting @ tick 259259 because a thread reached the max instruction count diff --git a/tests/test1/ref/alpha/timing/config.ini b/tests/test1/ref/alpha/timing/config.ini new file mode 100644 index 000000000..58dc6741b --- /dev/null +++ b/tests/test1/ref/alpha/timing/config.ini @@ -0,0 +1,93 @@ +[root] +type=Root +children=system +checkpoint= +clock=1000000000000 +max_tick=0 +output_file=cout +progress_interval=0 + +[debug] +break_cycles= + +[exetrace] +intel_format=false +pc_symbol=true +print_cpseq=false +print_cycle=true +print_data=true +print_effaddr=true +print_fetchseq=false +print_iregs=false +print_opclass=true +print_thread=true +speculative=true +trace_system=client + +[serialize] +count=10 +cycle=0 +dir=cpt.%012d +period=0 + +[stats] +descriptions=true +dump_cycle=0 +dump_period=0 +dump_reset=false +ignore_events= +mysql_db= +mysql_host= +mysql_password= +mysql_user= +project_name=test +simulation_name=test +simulation_sample=0 +text_compat=true +text_file=m5stats.txt + +[system] +type=System +children=cpu0 physmem workload +mem_mode=atomic +physmem=system.physmem + +[system.cpu0] +type=TimingSimpleCPU +children=mem +clock=1 +defer_registration=false +function_trace=false +function_trace_start=0 +max_insts_all_threads=0 +max_insts_any_thread=500000 +max_loads_all_threads=0 +max_loads_any_thread=0 +mem=system.cpu0.mem +system=system +workload=system.workload + +[system.cpu0.mem] +type=Bus +bus_id=0 + +[system.physmem] +type=PhysicalMemory +file= +latency=1 + +[system.workload] +type=EioProcess +chkpt= +file=/z/ktlim2/clean/newmem-merge/tests/test-progs/anagram/bin/anagram-vshort.eio.gz +output=cout +system=system + +[trace] +bufsize=0 +dump_on_exit=false +file=cout +flags= +ignore= +start=0 + diff --git a/tests/test1/ref/alpha/timing/config.out b/tests/test1/ref/alpha/timing/config.out new file mode 100644 index 000000000..b28de6f74 --- /dev/null +++ b/tests/test1/ref/alpha/timing/config.out @@ -0,0 +1,90 @@ +[root] +type=Root +clock=1000000000000 +max_tick=0 +progress_interval=0 +output_file=cout + +[system.physmem] +type=PhysicalMemory +file= +// range not specified +latency=1 + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.workload] +type=EioProcess +file=/z/ktlim2/clean/newmem-merge/tests/test-progs/anagram/bin/anagram-vshort.eio.gz +chkpt= +output=cout +system=system + +[system.cpu0.mem] +type=Bus +bus_id=0 + +[system.cpu0] +type=TimingSimpleCPU +max_insts_any_thread=500000 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +mem=system.cpu0.mem +system=system +workload=system.workload +clock=1 +defer_registration=false +// width not specified +function_trace=false +function_trace_start=0 +// simulate_stalls not specified + +[trace] +flags= +start=0 +bufsize=0 +file=cout +dump_on_exit=false +ignore= + +[stats] +descriptions=true +project_name=test +simulation_name=test +simulation_sample=0 +text_file=m5stats.txt +text_compat=true +mysql_db= +mysql_user= +mysql_password= +mysql_host= +events_start=-1 +dump_reset=false +dump_cycle=0 +dump_period=0 +ignore_events= + +[random] +seed=1 + +[exetrace] +speculative=true +print_cycle=true +print_opclass=true +print_thread=true +print_effaddr=true +print_data=true +print_iregs=false +print_fetchseq=false +print_cpseq=false +pc_symbol=true +intel_format=false +trace_system=client + +[debug] +break_cycles= + diff --git a/tests/test1/ref/alpha/timing/m5stats.txt b/tests/test1/ref/alpha/timing/m5stats.txt new file mode 100644 index 000000000..64d05099f --- /dev/null +++ b/tests/test1/ref/alpha/timing/m5stats.txt @@ -0,0 +1,18 @@ + +---------- Begin Simulation Statistics ---------- +host_inst_rate 739858 # Simulator instruction rate (inst/s) +host_mem_usage 147760 # Number of bytes of host memory used +host_seconds 0.68 # Real time elapsed on the host +host_tick_rate 1006609 # Simulator tick rate (ticks/s) +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 500000 # Number of instructions simulated +sim_seconds 0.000001 # Number of seconds simulated +sim_ticks 680774 # Number of ticks simulated +system.cpu0.idle_fraction 0 # Percentage of idle cycles +system.cpu0.not_idle_fraction 1 # Percentage of non-idle cycles +system.cpu0.numCycles 0 # number of cpu cycles simulated +system.cpu0.num_insts 500000 # Number of instructions executed +system.cpu0.num_refs 182203 # Number of memory references +system.workload.PROG:num_syscalls 18 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/test1/ref/alpha/timing/stderr b/tests/test1/ref/alpha/timing/stderr new file mode 100644 index 000000000..4e444fa6b --- /dev/null +++ b/tests/test1/ref/alpha/timing/stderr @@ -0,0 +1,3 @@ +warn: Entering event queue @ 0. Starting simulation... + +gzip: stdout: Broken pipe diff --git a/tests/test1/ref/alpha/timing/stdout b/tests/test1/ref/alpha/timing/stdout new file mode 100644 index 000000000..980af1477 --- /dev/null +++ b/tests/test1/ref/alpha/timing/stdout @@ -0,0 +1,14 @@ +main dictionary has 1245 entries +49508 bytes wasted +>M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Jul 19 2006 15:49:01 +M5 started Wed Jul 19 15:49:19 2006 +M5 executing on zamp.eecs.umich.edu +Creating SE system +Exiting @ tick 680774 because a thread reached the max instruction count |