diff options
author | Steve Reinhardt <stever@eecs.umich.edu> | 2006-05-22 14:29:33 -0400 |
---|---|---|
committer | Steve Reinhardt <stever@eecs.umich.edu> | 2006-05-22 14:29:33 -0400 |
commit | ba2eae5d528487900d1510fc0a160e660f2c394c (patch) | |
tree | a2c6dd5948f6ff353763cb3f83ddd734077e646e /src/cpu | |
parent | 86777c9db174c74be49667bce3dda99f8ba23696 (diff) | |
download | gem5-ba2eae5d528487900d1510fc0a160e660f2c394c.tar.xz |
New directory structure:
- simulator source now in 'src' subdirectory
- imported files from 'ext' repository
- support building in arbitrary places, including
outside of the source tree. See comment at top
of SConstruct file for more details.
Regression tests are temporarily disabled; that
syetem needs more extensive revisions.
SConstruct:
Update for new directory structure.
Modify to support build trees that are not subdirectories
of the source tree. See comment at top of file for
more details.
Regression tests are temporarily disabled.
src/arch/SConscript:
src/arch/isa_parser.py:
src/python/SConscript:
Update for new directory structure.
--HG--
rename : build/SConstruct => SConstruct
rename : build/default_options/ALPHA_FS => build_opts/ALPHA_FS
rename : build/default_options/ALPHA_FS_TL => build_opts/ALPHA_FS_TL
rename : build/default_options/ALPHA_SE => build_opts/ALPHA_SE
rename : build/default_options/MIPS_SE => build_opts/MIPS_SE
rename : build/default_options/SPARC_SE => build_opts/SPARC_SE
rename : Doxyfile => src/Doxyfile
rename : SConscript => src/SConscript
rename : arch/SConscript => src/arch/SConscript
rename : arch/alpha/SConscript => src/arch/alpha/SConscript
rename : arch/alpha/aout_machdep.h => src/arch/alpha/aout_machdep.h
rename : arch/alpha/arguments.cc => src/arch/alpha/arguments.cc
rename : arch/alpha/arguments.hh => src/arch/alpha/arguments.hh
rename : arch/alpha/ecoff_machdep.h => src/arch/alpha/ecoff_machdep.h
rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc
rename : arch/alpha/ev5.hh => src/arch/alpha/ev5.hh
rename : arch/alpha/faults.cc => src/arch/alpha/faults.cc
rename : arch/alpha/faults.hh => src/arch/alpha/faults.hh
rename : arch/alpha/freebsd/system.cc => src/arch/alpha/freebsd/system.cc
rename : arch/alpha/freebsd/system.hh => src/arch/alpha/freebsd/system.hh
rename : arch/alpha/isa/branch.isa => src/arch/alpha/isa/branch.isa
rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa
rename : arch/alpha/isa/fp.isa => src/arch/alpha/isa/fp.isa
rename : arch/alpha/isa/int.isa => src/arch/alpha/isa/int.isa
rename : arch/alpha/isa/main.isa => src/arch/alpha/isa/main.isa
rename : arch/alpha/isa/mem.isa => src/arch/alpha/isa/mem.isa
rename : arch/alpha/isa/opcdec.isa => src/arch/alpha/isa/opcdec.isa
rename : arch/alpha/isa/pal.isa => src/arch/alpha/isa/pal.isa
rename : arch/alpha/isa/unimp.isa => src/arch/alpha/isa/unimp.isa
rename : arch/alpha/isa/unknown.isa => src/arch/alpha/isa/unknown.isa
rename : arch/alpha/isa/util.isa => src/arch/alpha/isa/util.isa
rename : arch/alpha/isa_traits.hh => src/arch/alpha/isa_traits.hh
rename : arch/alpha/linux/aligned.hh => src/arch/alpha/linux/aligned.hh
rename : arch/alpha/linux/hwrpb.hh => src/arch/alpha/linux/hwrpb.hh
rename : arch/alpha/linux/linux.cc => src/arch/alpha/linux/linux.cc
rename : arch/alpha/linux/linux.hh => src/arch/alpha/linux/linux.hh
rename : arch/alpha/linux/process.cc => src/arch/alpha/linux/process.cc
rename : arch/alpha/linux/process.hh => src/arch/alpha/linux/process.hh
rename : arch/alpha/linux/system.cc => src/arch/alpha/linux/system.cc
rename : arch/alpha/linux/system.hh => src/arch/alpha/linux/system.hh
rename : arch/alpha/linux/thread_info.hh => src/arch/alpha/linux/thread_info.hh
rename : arch/alpha/linux/threadinfo.hh => src/arch/alpha/linux/threadinfo.hh
rename : arch/alpha/osfpal.cc => src/arch/alpha/osfpal.cc
rename : arch/alpha/osfpal.hh => src/arch/alpha/osfpal.hh
rename : arch/alpha/process.cc => src/arch/alpha/process.cc
rename : arch/alpha/process.hh => src/arch/alpha/process.hh
rename : arch/alpha/regfile.hh => src/arch/alpha/regfile.hh
rename : arch/alpha/stacktrace.cc => src/arch/alpha/stacktrace.cc
rename : arch/alpha/stacktrace.hh => src/arch/alpha/stacktrace.hh
rename : arch/alpha/system.cc => src/arch/alpha/system.cc
rename : arch/alpha/system.hh => src/arch/alpha/system.hh
rename : arch/alpha/tlb.cc => src/arch/alpha/tlb.cc
rename : arch/alpha/tlb.hh => src/arch/alpha/tlb.hh
rename : arch/alpha/tru64/process.cc => src/arch/alpha/tru64/process.cc
rename : arch/alpha/tru64/process.hh => src/arch/alpha/tru64/process.hh
rename : arch/alpha/tru64/system.cc => src/arch/alpha/tru64/system.cc
rename : arch/alpha/tru64/system.hh => src/arch/alpha/tru64/system.hh
rename : arch/alpha/tru64/tru64.cc => src/arch/alpha/tru64/tru64.cc
rename : arch/alpha/tru64/tru64.hh => src/arch/alpha/tru64/tru64.hh
rename : arch/alpha/types.hh => src/arch/alpha/types.hh
rename : arch/alpha/utility.hh => src/arch/alpha/utility.hh
rename : arch/alpha/vtophys.cc => src/arch/alpha/vtophys.cc
rename : arch/alpha/vtophys.hh => src/arch/alpha/vtophys.hh
rename : arch/isa_parser.py => src/arch/isa_parser.py
rename : arch/isa_specific.hh => src/arch/isa_specific.hh
rename : arch/mips/SConscript => src/arch/mips/SConscript
rename : arch/mips/faults.cc => src/arch/mips/faults.cc
rename : arch/mips/faults.hh => src/arch/mips/faults.hh
rename : arch/mips/isa/base.isa => src/arch/mips/isa/base.isa
rename : arch/mips/isa/bitfields.isa => src/arch/mips/isa/bitfields.isa
rename : arch/mips/isa/decoder.isa => src/arch/mips/isa/decoder.isa
rename : arch/mips/isa/formats/basic.isa => src/arch/mips/isa/formats/basic.isa
rename : arch/mips/isa/formats/branch.isa => src/arch/mips/isa/formats/branch.isa
rename : arch/mips/isa/formats/formats.isa => src/arch/mips/isa/formats/formats.isa
rename : arch/mips/isa/formats/fp.isa => src/arch/mips/isa/formats/fp.isa
rename : arch/mips/isa/formats/int.isa => src/arch/mips/isa/formats/int.isa
rename : arch/mips/isa/formats/mem.isa => src/arch/mips/isa/formats/mem.isa
rename : arch/mips/isa/formats/noop.isa => src/arch/mips/isa/formats/noop.isa
rename : arch/mips/isa/formats/tlbop.isa => src/arch/mips/isa/formats/tlbop.isa
rename : arch/mips/isa/formats/trap.isa => src/arch/mips/isa/formats/trap.isa
rename : arch/mips/isa/formats/unimp.isa => src/arch/mips/isa/formats/unimp.isa
rename : arch/mips/isa/formats/unknown.isa => src/arch/mips/isa/formats/unknown.isa
rename : arch/mips/isa/formats/util.isa => src/arch/mips/isa/formats/util.isa
rename : arch/mips/isa/includes.isa => src/arch/mips/isa/includes.isa
rename : arch/mips/isa/main.isa => src/arch/mips/isa/main.isa
rename : arch/mips/isa/operands.isa => src/arch/mips/isa/operands.isa
rename : arch/mips/isa_traits.cc => src/arch/mips/isa_traits.cc
rename : arch/mips/isa_traits.hh => src/arch/mips/isa_traits.hh
rename : arch/mips/linux/linux.cc => src/arch/mips/linux/linux.cc
rename : arch/mips/linux/linux.hh => src/arch/mips/linux/linux.hh
rename : arch/mips/linux/process.cc => src/arch/mips/linux/process.cc
rename : arch/mips/linux/process.hh => src/arch/mips/linux/process.hh
rename : arch/mips/process.cc => src/arch/mips/process.cc
rename : arch/mips/process.hh => src/arch/mips/process.hh
rename : arch/mips/regfile/float_regfile.hh => src/arch/mips/regfile/float_regfile.hh
rename : arch/mips/regfile/int_regfile.hh => src/arch/mips/regfile/int_regfile.hh
rename : arch/mips/regfile/misc_regfile.hh => src/arch/mips/regfile/misc_regfile.hh
rename : arch/mips/regfile/regfile.hh => src/arch/mips/regfile/regfile.hh
rename : arch/mips/stacktrace.hh => src/arch/mips/stacktrace.hh
rename : arch/mips/types.hh => src/arch/mips/types.hh
rename : arch/mips/utility.hh => src/arch/mips/utility.hh
rename : arch/sparc/SConscript => src/arch/sparc/SConscript
rename : arch/sparc/faults.cc => src/arch/sparc/faults.cc
rename : arch/sparc/faults.hh => src/arch/sparc/faults.hh
rename : arch/sparc/isa/base.isa => src/arch/sparc/isa/base.isa
rename : arch/sparc/isa/bitfields.isa => src/arch/sparc/isa/bitfields.isa
rename : arch/sparc/isa/decoder.isa => src/arch/sparc/isa/decoder.isa
rename : arch/sparc/isa/formats.isa => src/arch/sparc/isa/formats.isa
rename : arch/sparc/isa/formats/basic.isa => src/arch/sparc/isa/formats/basic.isa
rename : arch/sparc/isa/formats/branch.isa => src/arch/sparc/isa/formats/branch.isa
rename : arch/sparc/isa/formats/integerop.isa => src/arch/sparc/isa/formats/integerop.isa
rename : arch/sparc/isa/formats/mem.isa => src/arch/sparc/isa/formats/mem.isa
rename : arch/sparc/isa/formats/nop.isa => src/arch/sparc/isa/formats/nop.isa
rename : arch/sparc/isa/formats/priv.isa => src/arch/sparc/isa/formats/priv.isa
rename : arch/sparc/isa/formats/trap.isa => src/arch/sparc/isa/formats/trap.isa
rename : arch/sparc/isa/formats/unknown.isa => src/arch/sparc/isa/formats/unknown.isa
rename : arch/sparc/isa/includes.isa => src/arch/sparc/isa/includes.isa
rename : arch/sparc/isa/main.isa => src/arch/sparc/isa/main.isa
rename : arch/sparc/isa/operands.isa => src/arch/sparc/isa/operands.isa
rename : arch/sparc/isa_traits.hh => src/arch/sparc/isa_traits.hh
rename : arch/sparc/linux/linux.cc => src/arch/sparc/linux/linux.cc
rename : arch/sparc/linux/linux.hh => src/arch/sparc/linux/linux.hh
rename : arch/sparc/linux/process.cc => src/arch/sparc/linux/process.cc
rename : arch/sparc/linux/process.hh => src/arch/sparc/linux/process.hh
rename : arch/sparc/process.cc => src/arch/sparc/process.cc
rename : arch/sparc/process.hh => src/arch/sparc/process.hh
rename : arch/sparc/regfile.hh => src/arch/sparc/regfile.hh
rename : arch/sparc/solaris/process.cc => src/arch/sparc/solaris/process.cc
rename : arch/sparc/solaris/process.hh => src/arch/sparc/solaris/process.hh
rename : arch/sparc/solaris/solaris.cc => src/arch/sparc/solaris/solaris.cc
rename : arch/sparc/solaris/solaris.hh => src/arch/sparc/solaris/solaris.hh
rename : arch/sparc/stacktrace.hh => src/arch/sparc/stacktrace.hh
rename : arch/sparc/system.cc => src/arch/sparc/system.cc
rename : arch/sparc/system.hh => src/arch/sparc/system.hh
rename : arch/sparc/utility.hh => src/arch/sparc/utility.hh
rename : base/bitfield.hh => src/base/bitfield.hh
rename : base/callback.hh => src/base/callback.hh
rename : base/chunk_generator.hh => src/base/chunk_generator.hh
rename : base/circlebuf.cc => src/base/circlebuf.cc
rename : base/circlebuf.hh => src/base/circlebuf.hh
rename : base/compression/lzss_compression.cc => src/base/compression/lzss_compression.cc
rename : base/compression/lzss_compression.hh => src/base/compression/lzss_compression.hh
rename : base/compression/null_compression.hh => src/base/compression/null_compression.hh
rename : base/cprintf.cc => src/base/cprintf.cc
rename : base/cprintf.hh => src/base/cprintf.hh
rename : base/cprintf_formats.hh => src/base/cprintf_formats.hh
rename : base/crc.cc => src/base/crc.cc
rename : base/crc.hh => src/base/crc.hh
rename : base/date.cc => src/base/date.cc
rename : base/dbl_list.hh => src/base/dbl_list.hh
rename : base/endian.hh => src/base/endian.hh
rename : base/fast_alloc.cc => src/base/fast_alloc.cc
rename : base/fast_alloc.hh => src/base/fast_alloc.hh
rename : base/fenv.hh => src/base/fenv.hh
rename : base/fifo_buffer.cc => src/base/fifo_buffer.cc
rename : base/fifo_buffer.hh => src/base/fifo_buffer.hh
rename : base/hashmap.hh => src/base/hashmap.hh
rename : base/hostinfo.cc => src/base/hostinfo.cc
rename : base/hostinfo.hh => src/base/hostinfo.hh
rename : base/hybrid_pred.cc => src/base/hybrid_pred.cc
rename : base/hybrid_pred.hh => src/base/hybrid_pred.hh
rename : base/inet.cc => src/base/inet.cc
rename : base/inet.hh => src/base/inet.hh
rename : base/inifile.cc => src/base/inifile.cc
rename : base/inifile.hh => src/base/inifile.hh
rename : base/intmath.cc => src/base/intmath.cc
rename : base/intmath.hh => src/base/intmath.hh
rename : base/kgdb.h => src/base/kgdb.h
rename : base/loader/aout_object.cc => src/base/loader/aout_object.cc
rename : base/loader/aout_object.hh => src/base/loader/aout_object.hh
rename : base/loader/coff_sym.h => src/base/loader/coff_sym.h
rename : base/loader/coff_symconst.h => src/base/loader/coff_symconst.h
rename : base/loader/ecoff_object.cc => src/base/loader/ecoff_object.cc
rename : base/loader/ecoff_object.hh => src/base/loader/ecoff_object.hh
rename : base/loader/elf_object.cc => src/base/loader/elf_object.cc
rename : base/loader/elf_object.hh => src/base/loader/elf_object.hh
rename : base/loader/exec_aout.h => src/base/loader/exec_aout.h
rename : base/loader/exec_ecoff.h => src/base/loader/exec_ecoff.h
rename : base/loader/object_file.cc => src/base/loader/object_file.cc
rename : base/loader/object_file.hh => src/base/loader/object_file.hh
rename : base/loader/symtab.cc => src/base/loader/symtab.cc
rename : base/loader/symtab.hh => src/base/loader/symtab.hh
rename : base/match.cc => src/base/match.cc
rename : base/match.hh => src/base/match.hh
rename : base/misc.cc => src/base/misc.cc
rename : base/misc.hh => src/base/misc.hh
rename : base/mod_num.hh => src/base/mod_num.hh
rename : base/mysql.cc => src/base/mysql.cc
rename : base/mysql.hh => src/base/mysql.hh
rename : base/output.cc => src/base/output.cc
rename : base/output.hh => src/base/output.hh
rename : base/pollevent.cc => src/base/pollevent.cc
rename : base/pollevent.hh => src/base/pollevent.hh
rename : base/predictor.hh => src/base/predictor.hh
rename : base/random.cc => src/base/random.cc
rename : base/random.hh => src/base/random.hh
rename : base/range.cc => src/base/range.cc
rename : base/range.hh => src/base/range.hh
rename : base/refcnt.hh => src/base/refcnt.hh
rename : base/remote_gdb.cc => src/base/remote_gdb.cc
rename : base/remote_gdb.hh => src/base/remote_gdb.hh
rename : base/res_list.hh => src/base/res_list.hh
rename : base/sat_counter.cc => src/base/sat_counter.cc
rename : base/sat_counter.hh => src/base/sat_counter.hh
rename : base/sched_list.hh => src/base/sched_list.hh
rename : base/socket.cc => src/base/socket.cc
rename : base/socket.hh => src/base/socket.hh
rename : base/statistics.cc => src/base/statistics.cc
rename : base/statistics.hh => src/base/statistics.hh
rename : base/stats/events.cc => src/base/stats/events.cc
rename : base/stats/events.hh => src/base/stats/events.hh
rename : base/stats/flags.hh => src/base/stats/flags.hh
rename : base/stats/mysql.cc => src/base/stats/mysql.cc
rename : base/stats/mysql.hh => src/base/stats/mysql.hh
rename : base/stats/mysql_run.hh => src/base/stats/mysql_run.hh
rename : base/stats/output.hh => src/base/stats/output.hh
rename : base/stats/statdb.cc => src/base/stats/statdb.cc
rename : base/stats/statdb.hh => src/base/stats/statdb.hh
rename : base/stats/text.cc => src/base/stats/text.cc
rename : base/stats/text.hh => src/base/stats/text.hh
rename : base/stats/types.hh => src/base/stats/types.hh
rename : base/stats/visit.cc => src/base/stats/visit.cc
rename : base/stats/visit.hh => src/base/stats/visit.hh
rename : base/str.cc => src/base/str.cc
rename : base/str.hh => src/base/str.hh
rename : base/time.cc => src/base/time.cc
rename : base/time.hh => src/base/time.hh
rename : base/timebuf.hh => src/base/timebuf.hh
rename : base/trace.cc => src/base/trace.cc
rename : base/trace.hh => src/base/trace.hh
rename : base/traceflags.py => src/base/traceflags.py
rename : base/userinfo.cc => src/base/userinfo.cc
rename : base/userinfo.hh => src/base/userinfo.hh
rename : cpu/SConscript => src/cpu/SConscript
rename : cpu/base.cc => src/cpu/base.cc
rename : cpu/base.hh => src/cpu/base.hh
rename : cpu/base_dyn_inst.cc => src/cpu/base_dyn_inst.cc
rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh
rename : cpu/cpu_exec_context.cc => src/cpu/cpu_exec_context.cc
rename : cpu/cpu_exec_context.hh => src/cpu/cpu_exec_context.hh
rename : cpu/cpu_models.py => src/cpu/cpu_models.py
rename : cpu/exec_context.hh => src/cpu/exec_context.hh
rename : cpu/exetrace.cc => src/cpu/exetrace.cc
rename : cpu/exetrace.hh => src/cpu/exetrace.hh
rename : cpu/inst_seq.hh => src/cpu/inst_seq.hh
rename : cpu/intr_control.cc => src/cpu/intr_control.cc
rename : cpu/intr_control.hh => src/cpu/intr_control.hh
rename : cpu/memtest/memtest.cc => src/cpu/memtest/memtest.cc
rename : cpu/memtest/memtest.hh => src/cpu/memtest/memtest.hh
rename : cpu/o3/2bit_local_pred.cc => src/cpu/o3/2bit_local_pred.cc
rename : cpu/o3/2bit_local_pred.hh => src/cpu/o3/2bit_local_pred.hh
rename : cpu/o3/alpha_cpu.cc => src/cpu/o3/alpha_cpu.cc
rename : cpu/o3/alpha_cpu.hh => src/cpu/o3/alpha_cpu.hh
rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha_cpu_builder.cc
rename : cpu/o3/alpha_cpu_impl.hh => src/cpu/o3/alpha_cpu_impl.hh
rename : cpu/o3/alpha_dyn_inst.cc => src/cpu/o3/alpha_dyn_inst.cc
rename : cpu/o3/alpha_dyn_inst.hh => src/cpu/o3/alpha_dyn_inst.hh
rename : cpu/o3/alpha_dyn_inst_impl.hh => src/cpu/o3/alpha_dyn_inst_impl.hh
rename : cpu/o3/alpha_impl.hh => src/cpu/o3/alpha_impl.hh
rename : cpu/o3/alpha_params.hh => src/cpu/o3/alpha_params.hh
rename : cpu/o3/bpred_unit.cc => src/cpu/o3/bpred_unit.cc
rename : cpu/o3/bpred_unit.hh => src/cpu/o3/bpred_unit.hh
rename : cpu/o3/bpred_unit_impl.hh => src/cpu/o3/bpred_unit_impl.hh
rename : cpu/o3/btb.cc => src/cpu/o3/btb.cc
rename : cpu/o3/btb.hh => src/cpu/o3/btb.hh
rename : cpu/o3/comm.hh => src/cpu/o3/comm.hh
rename : cpu/o3/commit.cc => src/cpu/o3/commit.cc
rename : cpu/o3/commit.hh => src/cpu/o3/commit.hh
rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh
rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc
rename : cpu/o3/cpu.hh => src/cpu/o3/cpu.hh
rename : cpu/o3/cpu_policy.hh => src/cpu/o3/cpu_policy.hh
rename : cpu/o3/decode.cc => src/cpu/o3/decode.cc
rename : cpu/o3/decode.hh => src/cpu/o3/decode.hh
rename : cpu/o3/decode_impl.hh => src/cpu/o3/decode_impl.hh
rename : cpu/o3/fetch.cc => src/cpu/o3/fetch.cc
rename : cpu/o3/fetch.hh => src/cpu/o3/fetch.hh
rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh
rename : cpu/o3/free_list.cc => src/cpu/o3/free_list.cc
rename : cpu/o3/free_list.hh => src/cpu/o3/free_list.hh
rename : cpu/o3/iew.cc => src/cpu/o3/iew.cc
rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh
rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh
rename : cpu/o3/inst_queue.cc => src/cpu/o3/inst_queue.cc
rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh
rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh
rename : cpu/o3/mem_dep_unit.cc => src/cpu/o3/mem_dep_unit.cc
rename : cpu/o3/mem_dep_unit.hh => src/cpu/o3/mem_dep_unit.hh
rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh
rename : cpu/o3/ras.cc => src/cpu/o3/ras.cc
rename : cpu/o3/ras.hh => src/cpu/o3/ras.hh
rename : cpu/o3/regfile.hh => src/cpu/o3/regfile.hh
rename : cpu/o3/rename.cc => src/cpu/o3/rename.cc
rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh
rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh
rename : cpu/o3/rename_map.cc => src/cpu/o3/rename_map.cc
rename : cpu/o3/rename_map.hh => src/cpu/o3/rename_map.hh
rename : cpu/o3/rob.cc => src/cpu/o3/rob.cc
rename : cpu/o3/rob.hh => src/cpu/o3/rob.hh
rename : cpu/o3/rob_impl.hh => src/cpu/o3/rob_impl.hh
rename : cpu/o3/sat_counter.cc => src/cpu/o3/sat_counter.cc
rename : cpu/o3/sat_counter.hh => src/cpu/o3/sat_counter.hh
rename : cpu/o3/store_set.cc => src/cpu/o3/store_set.cc
rename : cpu/o3/store_set.hh => src/cpu/o3/store_set.hh
rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc
rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh
rename : cpu/op_class.cc => src/cpu/op_class.cc
rename : cpu/op_class.hh => src/cpu/op_class.hh
rename : cpu/ozone/cpu.cc => src/cpu/ozone/cpu.cc
rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh
rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh
rename : cpu/ozone/ea_list.cc => src/cpu/ozone/ea_list.cc
rename : cpu/ozone/ea_list.hh => src/cpu/ozone/ea_list.hh
rename : cpu/pc_event.cc => src/cpu/pc_event.cc
rename : cpu/pc_event.hh => src/cpu/pc_event.hh
rename : cpu/profile.cc => src/cpu/profile.cc
rename : cpu/profile.hh => src/cpu/profile.hh
rename : cpu/simple/atomic.cc => src/cpu/simple/atomic.cc
rename : cpu/simple/atomic.hh => src/cpu/simple/atomic.hh
rename : cpu/simple/base.cc => src/cpu/simple/base.cc
rename : cpu/simple/base.hh => src/cpu/simple/base.hh
rename : cpu/simple/timing.cc => src/cpu/simple/timing.cc
rename : cpu/simple/timing.hh => src/cpu/simple/timing.hh
rename : cpu/smt.hh => src/cpu/smt.hh
rename : cpu/static_inst.cc => src/cpu/static_inst.cc
rename : cpu/static_inst.hh => src/cpu/static_inst.hh
rename : cpu/trace/opt_cpu.cc => src/cpu/trace/opt_cpu.cc
rename : cpu/trace/opt_cpu.hh => src/cpu/trace/opt_cpu.hh
rename : cpu/trace/reader/ibm_reader.cc => src/cpu/trace/reader/ibm_reader.cc
rename : cpu/trace/reader/ibm_reader.hh => src/cpu/trace/reader/ibm_reader.hh
rename : cpu/trace/reader/itx_reader.cc => src/cpu/trace/reader/itx_reader.cc
rename : cpu/trace/reader/itx_reader.hh => src/cpu/trace/reader/itx_reader.hh
rename : cpu/trace/reader/m5_reader.cc => src/cpu/trace/reader/m5_reader.cc
rename : cpu/trace/reader/m5_reader.hh => src/cpu/trace/reader/m5_reader.hh
rename : cpu/trace/reader/mem_trace_reader.cc => src/cpu/trace/reader/mem_trace_reader.cc
rename : cpu/trace/reader/mem_trace_reader.hh => src/cpu/trace/reader/mem_trace_reader.hh
rename : cpu/trace/trace_cpu.cc => src/cpu/trace/trace_cpu.cc
rename : cpu/trace/trace_cpu.hh => src/cpu/trace/trace_cpu.hh
rename : dev/alpha_access.h => src/dev/alpha_access.h
rename : dev/alpha_console.cc => src/dev/alpha_console.cc
rename : dev/alpha_console.hh => src/dev/alpha_console.hh
rename : dev/baddev.cc => src/dev/baddev.cc
rename : dev/baddev.hh => src/dev/baddev.hh
rename : dev/disk_image.cc => src/dev/disk_image.cc
rename : dev/disk_image.hh => src/dev/disk_image.hh
rename : dev/etherbus.cc => src/dev/etherbus.cc
rename : dev/etherbus.hh => src/dev/etherbus.hh
rename : dev/etherdump.cc => src/dev/etherdump.cc
rename : dev/etherdump.hh => src/dev/etherdump.hh
rename : dev/etherint.cc => src/dev/etherint.cc
rename : dev/etherint.hh => src/dev/etherint.hh
rename : dev/etherlink.cc => src/dev/etherlink.cc
rename : dev/etherlink.hh => src/dev/etherlink.hh
rename : dev/etherpkt.cc => src/dev/etherpkt.cc
rename : dev/etherpkt.hh => src/dev/etherpkt.hh
rename : dev/ethertap.cc => src/dev/ethertap.cc
rename : dev/ethertap.hh => src/dev/ethertap.hh
rename : dev/ide_atareg.h => src/dev/ide_atareg.h
rename : dev/ide_ctrl.cc => src/dev/ide_ctrl.cc
rename : dev/ide_ctrl.hh => src/dev/ide_ctrl.hh
rename : dev/ide_disk.cc => src/dev/ide_disk.cc
rename : dev/ide_disk.hh => src/dev/ide_disk.hh
rename : dev/ide_wdcreg.h => src/dev/ide_wdcreg.h
rename : dev/io_device.cc => src/dev/io_device.cc
rename : dev/io_device.hh => src/dev/io_device.hh
rename : dev/isa_fake.cc => src/dev/isa_fake.cc
rename : dev/isa_fake.hh => src/dev/isa_fake.hh
rename : dev/ns_gige.cc => src/dev/ns_gige.cc
rename : dev/ns_gige.hh => src/dev/ns_gige.hh
rename : dev/ns_gige_reg.h => src/dev/ns_gige_reg.h
rename : dev/pciconfigall.cc => src/dev/pciconfigall.cc
rename : dev/pciconfigall.hh => src/dev/pciconfigall.hh
rename : dev/pcidev.cc => src/dev/pcidev.cc
rename : dev/pcidev.hh => src/dev/pcidev.hh
rename : dev/pcireg.h => src/dev/pcireg.h
rename : dev/pitreg.h => src/dev/pitreg.h
rename : dev/pktfifo.cc => src/dev/pktfifo.cc
rename : dev/pktfifo.hh => src/dev/pktfifo.hh
rename : dev/platform.cc => src/dev/platform.cc
rename : dev/platform.hh => src/dev/platform.hh
rename : dev/rtcreg.h => src/dev/rtcreg.h
rename : dev/simconsole.cc => src/dev/simconsole.cc
rename : dev/simconsole.hh => src/dev/simconsole.hh
rename : dev/simple_disk.cc => src/dev/simple_disk.cc
rename : dev/simple_disk.hh => src/dev/simple_disk.hh
rename : dev/sinic.cc => src/dev/sinic.cc
rename : dev/sinic.hh => src/dev/sinic.hh
rename : dev/sinicreg.hh => src/dev/sinicreg.hh
rename : dev/tsunami.cc => src/dev/tsunami.cc
rename : dev/tsunami.hh => src/dev/tsunami.hh
rename : dev/tsunami_cchip.cc => src/dev/tsunami_cchip.cc
rename : dev/tsunami_cchip.hh => src/dev/tsunami_cchip.hh
rename : dev/tsunami_io.cc => src/dev/tsunami_io.cc
rename : dev/tsunami_io.hh => src/dev/tsunami_io.hh
rename : dev/tsunami_pchip.cc => src/dev/tsunami_pchip.cc
rename : dev/tsunami_pchip.hh => src/dev/tsunami_pchip.hh
rename : dev/tsunamireg.h => src/dev/tsunamireg.h
rename : dev/uart.cc => src/dev/uart.cc
rename : dev/uart.hh => src/dev/uart.hh
rename : dev/uart8250.cc => src/dev/uart8250.cc
rename : dev/uart8250.hh => src/dev/uart8250.hh
rename : kern/kernel_stats.cc => src/kern/kernel_stats.cc
rename : kern/kernel_stats.hh => src/kern/kernel_stats.hh
rename : kern/linux/events.cc => src/kern/linux/events.cc
rename : kern/linux/events.hh => src/kern/linux/events.hh
rename : kern/linux/linux.hh => src/kern/linux/linux.hh
rename : kern/linux/linux_syscalls.cc => src/kern/linux/linux_syscalls.cc
rename : kern/linux/linux_syscalls.hh => src/kern/linux/linux_syscalls.hh
rename : kern/linux/printk.cc => src/kern/linux/printk.cc
rename : kern/linux/printk.hh => src/kern/linux/printk.hh
rename : kern/linux/sched.hh => src/kern/linux/sched.hh
rename : kern/solaris/solaris.hh => src/kern/solaris/solaris.hh
rename : kern/system_events.cc => src/kern/system_events.cc
rename : kern/system_events.hh => src/kern/system_events.hh
rename : kern/tru64/dump_mbuf.cc => src/kern/tru64/dump_mbuf.cc
rename : kern/tru64/dump_mbuf.hh => src/kern/tru64/dump_mbuf.hh
rename : kern/tru64/mbuf.hh => src/kern/tru64/mbuf.hh
rename : kern/tru64/printf.cc => src/kern/tru64/printf.cc
rename : kern/tru64/printf.hh => src/kern/tru64/printf.hh
rename : kern/tru64/tru64.hh => src/kern/tru64/tru64.hh
rename : kern/tru64/tru64_events.cc => src/kern/tru64/tru64_events.cc
rename : kern/tru64/tru64_events.hh => src/kern/tru64/tru64_events.hh
rename : kern/tru64/tru64_syscalls.cc => src/kern/tru64/tru64_syscalls.cc
rename : kern/tru64/tru64_syscalls.hh => src/kern/tru64/tru64_syscalls.hh
rename : mem/bridge.cc => src/mem/bridge.cc
rename : mem/bridge.hh => src/mem/bridge.hh
rename : mem/bus.cc => src/mem/bus.cc
rename : mem/bus.hh => src/mem/bus.hh
rename : mem/cache/prefetch/tagged_prefetcher_impl.hh => src/mem/cache/prefetch/tagged_prefetcher_impl.hh
rename : mem/config/prefetch.hh => src/mem/config/prefetch.hh
rename : mem/mem_object.cc => src/mem/mem_object.cc
rename : mem/mem_object.hh => src/mem/mem_object.hh
rename : mem/packet.cc => src/mem/packet.cc
rename : mem/packet.hh => src/mem/packet.hh
rename : mem/page_table.cc => src/mem/page_table.cc
rename : mem/page_table.hh => src/mem/page_table.hh
rename : mem/physical.cc => src/mem/physical.cc
rename : mem/physical.hh => src/mem/physical.hh
rename : mem/port.cc => src/mem/port.cc
rename : mem/port.hh => src/mem/port.hh
rename : mem/request.hh => src/mem/request.hh
rename : mem/translating_port.cc => src/mem/translating_port.cc
rename : mem/translating_port.hh => src/mem/translating_port.hh
rename : mem/vport.cc => src/mem/vport.cc
rename : mem/vport.hh => src/mem/vport.hh
rename : python/SConscript => src/python/SConscript
rename : python/m5/__init__.py => src/python/m5/__init__.py
rename : python/m5/config.py => src/python/m5/config.py
rename : python/m5/convert.py => src/python/m5/convert.py
rename : python/m5/multidict.py => src/python/m5/multidict.py
rename : python/m5/objects/AlphaConsole.py => src/python/m5/objects/AlphaConsole.py
rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/AlphaFullCPU.py
rename : python/m5/objects/AlphaTLB.py => src/python/m5/objects/AlphaTLB.py
rename : python/m5/objects/BadDevice.py => src/python/m5/objects/BadDevice.py
rename : python/m5/objects/BaseCPU.py => src/python/m5/objects/BaseCPU.py
rename : python/m5/objects/BaseCache.py => src/python/m5/objects/BaseCache.py
rename : python/m5/objects/Bridge.py => src/python/m5/objects/Bridge.py
rename : python/m5/objects/Bus.py => src/python/m5/objects/Bus.py
rename : python/m5/objects/CoherenceProtocol.py => src/python/m5/objects/CoherenceProtocol.py
rename : python/m5/objects/Device.py => src/python/m5/objects/Device.py
rename : python/m5/objects/DiskImage.py => src/python/m5/objects/DiskImage.py
rename : python/m5/objects/Ethernet.py => src/python/m5/objects/Ethernet.py
rename : python/m5/objects/Ide.py => src/python/m5/objects/Ide.py
rename : python/m5/objects/IntrControl.py => src/python/m5/objects/IntrControl.py
rename : python/m5/objects/MemObject.py => src/python/m5/objects/MemObject.py
rename : python/m5/objects/MemTest.py => src/python/m5/objects/MemTest.py
rename : python/m5/objects/Pci.py => src/python/m5/objects/Pci.py
rename : python/m5/objects/PhysicalMemory.py => src/python/m5/objects/PhysicalMemory.py
rename : python/m5/objects/Platform.py => src/python/m5/objects/Platform.py
rename : python/m5/objects/Process.py => src/python/m5/objects/Process.py
rename : python/m5/objects/Repl.py => src/python/m5/objects/Repl.py
rename : python/m5/objects/Root.py => src/python/m5/objects/Root.py
rename : python/m5/objects/SimConsole.py => src/python/m5/objects/SimConsole.py
rename : python/m5/objects/SimpleDisk.py => src/python/m5/objects/SimpleDisk.py
rename : python/m5/objects/System.py => src/python/m5/objects/System.py
rename : python/m5/objects/Tsunami.py => src/python/m5/objects/Tsunami.py
rename : python/m5/objects/Uart.py => src/python/m5/objects/Uart.py
rename : python/m5/smartdict.py => src/python/m5/smartdict.py
rename : sim/async.hh => src/sim/async.hh
rename : sim/builder.cc => src/sim/builder.cc
rename : sim/builder.hh => src/sim/builder.hh
rename : sim/byteswap.hh => src/sim/byteswap.hh
rename : sim/debug.cc => src/sim/debug.cc
rename : sim/debug.hh => src/sim/debug.hh
rename : sim/eventq.cc => src/sim/eventq.cc
rename : sim/eventq.hh => src/sim/eventq.hh
rename : sim/faults.cc => src/sim/faults.cc
rename : sim/faults.hh => src/sim/faults.hh
rename : sim/host.hh => src/sim/host.hh
rename : sim/main.cc => src/sim/main.cc
rename : sim/param.cc => src/sim/param.cc
rename : sim/param.hh => src/sim/param.hh
rename : sim/process.cc => src/sim/process.cc
rename : sim/process.hh => src/sim/process.hh
rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc
rename : sim/pseudo_inst.hh => src/sim/pseudo_inst.hh
rename : sim/root.cc => src/sim/root.cc
rename : sim/serialize.cc => src/sim/serialize.cc
rename : sim/serialize.hh => src/sim/serialize.hh
rename : sim/sim_events.cc => src/sim/sim_events.cc
rename : sim/sim_events.hh => src/sim/sim_events.hh
rename : sim/sim_exit.hh => src/sim/sim_exit.hh
rename : sim/sim_object.cc => src/sim/sim_object.cc
rename : sim/sim_object.hh => src/sim/sim_object.hh
rename : sim/startup.cc => src/sim/startup.cc
rename : sim/startup.hh => src/sim/startup.hh
rename : sim/stat_control.cc => src/sim/stat_control.cc
rename : sim/stat_control.hh => src/sim/stat_control.hh
rename : sim/stats.hh => src/sim/stats.hh
rename : sim/syscall_emul.cc => src/sim/syscall_emul.cc
rename : sim/syscall_emul.hh => src/sim/syscall_emul.hh
rename : sim/system.cc => src/sim/system.cc
rename : sim/system.hh => src/sim/system.hh
rename : sim/vptr.hh => src/sim/vptr.hh
rename : test/Makefile => src/unittest/Makefile
rename : test/bitvectest.cc => src/unittest/bitvectest.cc
rename : test/circletest.cc => src/unittest/circletest.cc
rename : test/cprintftest.cc => src/unittest/cprintftest.cc
rename : test/foo.ini => src/unittest/foo.ini
rename : test/genini.py => src/unittest/genini.py
rename : test/initest.cc => src/unittest/initest.cc
rename : test/initest.ini => src/unittest/initest.ini
rename : test/lru_test.cc => src/unittest/lru_test.cc
rename : test/nmtest.cc => src/unittest/nmtest.cc
rename : test/offtest.cc => src/unittest/offtest.cc
rename : test/paramtest.cc => src/unittest/paramtest.cc
rename : test/rangetest.cc => src/unittest/rangetest.cc
rename : test/sized_test.cc => src/unittest/sized_test.cc
rename : test/stattest.cc => src/unittest/stattest.cc
rename : test/strnumtest.cc => src/unittest/strnumtest.cc
rename : test/symtest.cc => src/unittest/symtest.cc
rename : test/tokentest.cc => src/unittest/tokentest.cc
rename : test/tracetest.cc => src/unittest/tracetest.cc
extra : convert_revision : cab6a5271ca1b368193cd948e5d3dcc47ab1bd48
Diffstat (limited to 'src/cpu')
105 files changed, 22965 insertions, 0 deletions
diff --git a/src/cpu/SConscript b/src/cpu/SConscript new file mode 100644 index 000000000..34fb6df78 --- /dev/null +++ b/src/cpu/SConscript @@ -0,0 +1,143 @@ +# -*- mode:python -*- + +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import os.path + +# Import build environment variable from SConstruct. +Import('env') + +################################################################# +# +# Generate StaticInst execute() method signatures. +# +# There must be one signature for each CPU model compiled in. +# Since the set of compiled-in models is flexible, we generate a +# header containing the appropriate set of signatures on the fly. +# +################################################################# + +# CPU model-specific data is contained in cpu_models.py +# Convert to SCons File node to get path handling +models_db = File('cpu_models.py') +# slurp in contents of file +execfile(models_db.srcnode().abspath) + +# Template for execute() signature. +exec_sig_template = ''' +virtual Fault execute(%s *xc, Trace::InstRecord *traceData) const = 0; +virtual Fault initiateAcc(%s *xc, Trace::InstRecord *traceData) const +{ panic("initiateAcc not defined!"); }; +virtual Fault completeAcc(Packet *pkt, %s *xc, + Trace::InstRecord *traceData) const +{ panic("completeAcc not defined!"); }; +''' + +# Generate header. +def gen_cpu_exec_signatures(target, source, env): + f = open(str(target[0]), 'w') + print >> f, ''' +#ifndef __CPU_STATIC_INST_EXEC_SIGS_HH__ +#define __CPU_STATIC_INST_EXEC_SIGS_HH__ +''' + for cpu in env['CPU_MODELS']: + xc_type = CpuModel.dict[cpu].strings['CPU_exec_context'] + print >> f, exec_sig_template % (xc_type, xc_type, xc_type) + print >> f, ''' +#endif // __CPU_STATIC_INST_EXEC_SIGS_HH__ +''' + +# Generate string that gets printed when header is rebuilt +def gen_sigs_string(target, source, env): + return "Generating static_inst_exec_sigs.hh: " \ + + ', '.join(env['CPU_MODELS']) + +# Add command to generate header to environment. +env.Command('static_inst_exec_sigs.hh', models_db, + Action(gen_cpu_exec_signatures, gen_sigs_string, + varlist = ['CPU_MODELS'])) + +################################################################# +# +# Include CPU-model-specific files based on set of models +# specified in CPU_MODELS build option. +# +################################################################# + +sources = [] + +need_simple_base = False +if 'AtomicSimpleCPU' in env['CPU_MODELS']: + need_simple_base = True + sources += Split('simple/atomic.cc') + +if 'TimingSimpleCPU' in env['CPU_MODELS']: + need_simple_base = True + sources += Split('simple/timing.cc') + +if need_simple_base: + sources += Split('simple/base.cc') + +if 'FastCPU' in env['CPU_MODELS']: + sources += Split('fast/cpu.cc') + +if 'AlphaFullCPU' in env['CPU_MODELS']: + sources += Split(''' + o3/2bit_local_pred.cc + o3/alpha_dyn_inst.cc + o3/alpha_cpu.cc + o3/alpha_cpu_builder.cc + o3/bpred_unit.cc + o3/btb.cc + o3/commit.cc + o3/decode.cc + o3/fetch.cc + o3/free_list.cc + o3/cpu.cc + o3/iew.cc + o3/inst_queue.cc + o3/ldstq.cc + o3/mem_dep_unit.cc + o3/ras.cc + o3/rename.cc + o3/rename_map.cc + o3/rob.cc + o3/sat_counter.cc + o3/store_set.cc + o3/tournament_pred.cc + ''') + +# FullCPU sources are included from m5/SConscript since they're not +# below this point in the file hierarchy. + +# Convert file names to SCons File objects. This takes care of the +# path relative to the top of the directory tree. +sources = [File(s) for s in sources] + +Return('sources') + diff --git a/src/cpu/base.cc b/src/cpu/base.cc new file mode 100644 index 000000000..9ce458c64 --- /dev/null +++ b/src/cpu/base.cc @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <iostream> +#include <string> +#include <sstream> + +#include "base/cprintf.hh" +#include "base/loader/symtab.hh" +#include "base/misc.hh" +#include "base/output.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "cpu/profile.hh" +#include "cpu/sampler/sampler.hh" +#include "sim/param.hh" +#include "sim/process.hh" +#include "sim/sim_events.hh" +#include "sim/system.hh" + +#include "base/trace.hh" + +#if FULL_SYSTEM +#include "kern/kernel_stats.hh" +#endif + +using namespace std; + +vector<BaseCPU *> BaseCPU::cpuList; + +// This variable reflects the max number of threads in any CPU. Be +// careful to only use it once all the CPUs that you care about have +// been initialized +int maxThreadsPerCPU = 1; + +#if FULL_SYSTEM +BaseCPU::BaseCPU(Params *p) + : SimObject(p->name), clock(p->clock), checkInterrupts(true), + params(p), number_of_threads(p->numberOfThreads), system(p->system) +#else +BaseCPU::BaseCPU(Params *p) + : SimObject(p->name), clock(p->clock), params(p), + number_of_threads(p->numberOfThreads), system(p->system) +#endif +{ + DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this); + + // add self to global list of CPUs + cpuList.push_back(this); + + DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", + this); + + if (number_of_threads > maxThreadsPerCPU) + maxThreadsPerCPU = number_of_threads; + + // allocate per-thread instruction-based event queues + comInstEventQueue = new EventQueue *[number_of_threads]; + for (int i = 0; i < number_of_threads; ++i) + comInstEventQueue[i] = new EventQueue("instruction-based event queue"); + + // + // set up instruction-count-based termination events, if any + // + if (p->max_insts_any_thread != 0) + for (int i = 0; i < number_of_threads; ++i) + new SimExitEvent(comInstEventQueue[i], p->max_insts_any_thread, + "a thread reached the max instruction count"); + + if (p->max_insts_all_threads != 0) { + // allocate & initialize shared downcounter: each event will + // decrement this when triggered; simulation will terminate + // when counter reaches 0 + int *counter = new int; + *counter = number_of_threads; + for (int i = 0; i < number_of_threads; ++i) + new CountedExitEvent(comInstEventQueue[i], + "all threads reached the max instruction count", + p->max_insts_all_threads, *counter); + } + + // allocate per-thread load-based event queues + comLoadEventQueue = new EventQueue *[number_of_threads]; + for (int i = 0; i < number_of_threads; ++i) + comLoadEventQueue[i] = new EventQueue("load-based event queue"); + + // + // set up instruction-count-based termination events, if any + // + if (p->max_loads_any_thread != 0) + for (int i = 0; i < number_of_threads; ++i) + new SimExitEvent(comLoadEventQueue[i], p->max_loads_any_thread, + "a thread reached the max load count"); + + if (p->max_loads_all_threads != 0) { + // allocate & initialize shared downcounter: each event will + // decrement this when triggered; simulation will terminate + // when counter reaches 0 + int *counter = new int; + *counter = number_of_threads; + for (int i = 0; i < number_of_threads; ++i) + new CountedExitEvent(comLoadEventQueue[i], + "all threads reached the max load count", + p->max_loads_all_threads, *counter); + } + +#if FULL_SYSTEM + memset(interrupts, 0, sizeof(interrupts)); + intstatus = 0; +#endif + + functionTracingEnabled = false; + if (p->functionTrace) { + functionTraceStream = simout.find(csprintf("ftrace.%s", name())); + currentFunctionStart = currentFunctionEnd = 0; + functionEntryTick = p->functionTraceStart; + + if (p->functionTraceStart == 0) { + functionTracingEnabled = true; + } else { + Event *e = + new EventWrapper<BaseCPU, &BaseCPU::enableFunctionTrace>(this, + true); + e->schedule(p->functionTraceStart); + } + } +#if FULL_SYSTEM + profileEvent = NULL; + if (params->profile) + profileEvent = new ProfileEvent(this, params->profile); + + kernelStats = new Kernel::Statistics(system); +#endif + +} + +BaseCPU::Params::Params() +{ +#if FULL_SYSTEM + profile = false; +#endif +} + +void +BaseCPU::enableFunctionTrace() +{ + functionTracingEnabled = true; +} + +BaseCPU::~BaseCPU() +{ +#if FULL_SYSTEM + if (kernelStats) + delete kernelStats; +#endif +} + +void +BaseCPU::init() +{ + if (!params->deferRegistration) + registerExecContexts(); +} + +void +BaseCPU::startup() +{ +#if FULL_SYSTEM + if (!params->deferRegistration && profileEvent) + profileEvent->schedule(curTick); +#endif +} + + +void +BaseCPU::regStats() +{ + using namespace Stats; + + numCycles + .name(name() + ".numCycles") + .desc("number of cpu cycles simulated") + ; + + int size = execContexts.size(); + if (size > 1) { + for (int i = 0; i < size; ++i) { + stringstream namestr; + ccprintf(namestr, "%s.ctx%d", name(), i); + execContexts[i]->regStats(namestr.str()); + } + } else if (size == 1) + execContexts[0]->regStats(name()); + +#if FULL_SYSTEM + if (kernelStats) + kernelStats->regStats(name() + ".kern"); +#endif +} + + +void +BaseCPU::registerExecContexts() +{ + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + +#if FULL_SYSTEM + int id = params->cpu_id; + if (id != -1) + id += i; + + xc->setCpuId(system->registerExecContext(xc, id)); +#else + xc->setCpuId(xc->getProcessPtr()->registerExecContext(xc)); +#endif + } +} + + +void +BaseCPU::switchOut(Sampler *sampler) +{ + panic("This CPU doesn't support sampling!"); +} + +void +BaseCPU::takeOverFrom(BaseCPU *oldCPU) +{ + assert(execContexts.size() == oldCPU->execContexts.size()); + + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *newXC = execContexts[i]; + ExecContext *oldXC = oldCPU->execContexts[i]; + + newXC->takeOverFrom(oldXC); + assert(newXC->readCpuId() == oldXC->readCpuId()); +#if FULL_SYSTEM + system->replaceExecContext(newXC, newXC->readCpuId()); +#else + assert(newXC->getProcessPtr() == oldXC->getProcessPtr()); + newXC->getProcessPtr()->replaceExecContext(newXC, newXC->readCpuId()); +#endif + } + +#if FULL_SYSTEM + for (int i = 0; i < TheISA::NumInterruptLevels; ++i) + interrupts[i] = oldCPU->interrupts[i]; + intstatus = oldCPU->intstatus; + + for (int i = 0; i < execContexts.size(); ++i) + execContexts[i]->profileClear(); + + if (profileEvent) + profileEvent->schedule(curTick); +#endif +} + + +#if FULL_SYSTEM +BaseCPU::ProfileEvent::ProfileEvent(BaseCPU *_cpu, int _interval) + : Event(&mainEventQueue), cpu(_cpu), interval(_interval) +{ } + +void +BaseCPU::ProfileEvent::process() +{ + for (int i = 0, size = cpu->execContexts.size(); i < size; ++i) { + ExecContext *xc = cpu->execContexts[i]; + xc->profileSample(); + } + + schedule(curTick + interval); +} + +void +BaseCPU::post_interrupt(int int_num, int index) +{ + DPRINTF(Interrupt, "Interrupt %d:%d posted\n", int_num, index); + + if (int_num < 0 || int_num >= TheISA::NumInterruptLevels) + panic("int_num out of bounds\n"); + + if (index < 0 || index >= sizeof(uint64_t) * 8) + panic("int_num out of bounds\n"); + + checkInterrupts = true; + interrupts[int_num] |= 1 << index; + intstatus |= (ULL(1) << int_num); +} + +void +BaseCPU::clear_interrupt(int int_num, int index) +{ + DPRINTF(Interrupt, "Interrupt %d:%d cleared\n", int_num, index); + + if (int_num < 0 || int_num >= TheISA::NumInterruptLevels) + panic("int_num out of bounds\n"); + + if (index < 0 || index >= sizeof(uint64_t) * 8) + panic("int_num out of bounds\n"); + + interrupts[int_num] &= ~(1 << index); + if (interrupts[int_num] == 0) + intstatus &= ~(ULL(1) << int_num); +} + +void +BaseCPU::clear_interrupts() +{ + DPRINTF(Interrupt, "Interrupts all cleared\n"); + + memset(interrupts, 0, sizeof(interrupts)); + intstatus = 0; +} + + +void +BaseCPU::serialize(std::ostream &os) +{ + SERIALIZE_ARRAY(interrupts, TheISA::NumInterruptLevels); + SERIALIZE_SCALAR(intstatus); + +#if FULL_SYSTEM + if (kernelStats) + kernelStats->serialize(os); +#endif + +} + +void +BaseCPU::unserialize(Checkpoint *cp, const std::string §ion) +{ + UNSERIALIZE_ARRAY(interrupts, TheISA::NumInterruptLevels); + UNSERIALIZE_SCALAR(intstatus); + +#if FULL_SYSTEM + if (kernelStats) + kernelStats->unserialize(cp, section); +#endif +} + +#endif // FULL_SYSTEM + +void +BaseCPU::traceFunctionsInternal(Addr pc) +{ + if (!debugSymbolTable) + return; + + // if pc enters different function, print new function symbol and + // update saved range. Otherwise do nothing. + if (pc < currentFunctionStart || pc >= currentFunctionEnd) { + string sym_str; + bool found = debugSymbolTable->findNearestSymbol(pc, sym_str, + currentFunctionStart, + currentFunctionEnd); + + if (!found) { + // no symbol found: use addr as label + sym_str = csprintf("0x%x", pc); + currentFunctionStart = pc; + currentFunctionEnd = pc + 1; + } + + ccprintf(*functionTraceStream, " (%d)\n%d: %s", + curTick - functionEntryTick, curTick, sym_str); + functionEntryTick = curTick; + } +} + + +DEFINE_SIM_OBJECT_CLASS_NAME("BaseCPU", BaseCPU) diff --git a/src/cpu/base.hh b/src/cpu/base.hh new file mode 100644 index 000000000..79700c117 --- /dev/null +++ b/src/cpu/base.hh @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_BASE_HH__ +#define __CPU_BASE_HH__ + +#include <vector> + +#include "base/statistics.hh" +#include "config/full_system.hh" +#include "cpu/sampler/sampler.hh" +#include "sim/eventq.hh" +#include "sim/sim_object.hh" +#include "arch/isa_traits.hh" + +class System; +namespace Kernel { class Statistics; } +class BranchPred; +class ExecContext; + +class BaseCPU : public SimObject +{ + protected: + // CPU's clock period in terms of the number of ticks of curTime. + Tick clock; + + public: + inline Tick frequency() const { return Clock::Frequency / clock; } + inline Tick cycles(int numCycles) const { return clock * numCycles; } + inline Tick curCycle() const { return curTick / clock; } + +#if FULL_SYSTEM + protected: + uint64_t interrupts[TheISA::NumInterruptLevels]; + uint64_t intstatus; + + public: + virtual void post_interrupt(int int_num, int index); + virtual void clear_interrupt(int int_num, int index); + virtual void clear_interrupts(); + bool checkInterrupts; + + bool check_interrupt(int int_num) const { + if (int_num > TheISA::NumInterruptLevels) + panic("int_num out of bounds\n"); + + return interrupts[int_num] != 0; + } + + bool check_interrupts() const { return intstatus != 0; } + uint64_t intr_status() const { return intstatus; } + + class ProfileEvent : public Event + { + private: + BaseCPU *cpu; + int interval; + + public: + ProfileEvent(BaseCPU *cpu, int interval); + void process(); + }; + ProfileEvent *profileEvent; +#endif + + protected: + std::vector<ExecContext *> execContexts; + + public: + + /// Notify the CPU that the indicated context is now active. The + /// delay parameter indicates the number of ticks to wait before + /// executing (typically 0 or 1). + virtual void activateContext(int thread_num, int delay) {} + + /// Notify the CPU that the indicated context is now suspended. + virtual void suspendContext(int thread_num) {} + + /// Notify the CPU that the indicated context is now deallocated. + virtual void deallocateContext(int thread_num) {} + + /// Notify the CPU that the indicated context is now halted. + virtual void haltContext(int thread_num) {} + + public: + struct Params + { + std::string name; + int numberOfThreads; + bool deferRegistration; + Counter max_insts_any_thread; + Counter max_insts_all_threads; + Counter max_loads_any_thread; + Counter max_loads_all_threads; + Tick clock; + bool functionTrace; + Tick functionTraceStart; + System *system; +#if FULL_SYSTEM + int cpu_id; + Tick profile; +#endif + + Params(); + }; + + const Params *params; + + BaseCPU(Params *params); + virtual ~BaseCPU(); + + virtual void init(); + virtual void startup(); + virtual void regStats(); + + virtual void activateWhenReady(int tid) {}; + + void registerExecContexts(); + + /// Prepare for another CPU to take over execution. When it is + /// is ready (drained pipe) it signals the sampler. + virtual void switchOut(Sampler *); + + /// Take over execution from the given CPU. Used for warm-up and + /// sampling. + virtual void takeOverFrom(BaseCPU *); + + /** + * Number of threads we're actually simulating (<= SMT_MAX_THREADS). + * This is a constant for the duration of the simulation. + */ + int number_of_threads; + + /** + * Vector of per-thread instruction-based event queues. Used for + * scheduling events based on number of instructions committed by + * a particular thread. + */ + EventQueue **comInstEventQueue; + + /** + * Vector of per-thread load-based event queues. Used for + * scheduling events based on number of loads committed by + *a particular thread. + */ + EventQueue **comLoadEventQueue; + + System *system; + +#if FULL_SYSTEM + /** + * Serialize this object to the given output stream. + * @param os The stream to serialize to. + */ + virtual void serialize(std::ostream &os); + + /** + * Reconstruct the state of this object from a checkpoint. + * @param cp The checkpoint use. + * @param section The section name of this object + */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#endif + + /** + * Return pointer to CPU's branch predictor (NULL if none). + * @return Branch predictor pointer. + */ + virtual BranchPred *getBranchPred() { return NULL; }; + + virtual Counter totalInstructions() const { return 0; } + + // Function tracing + private: + bool functionTracingEnabled; + std::ostream *functionTraceStream; + Addr currentFunctionStart; + Addr currentFunctionEnd; + Tick functionEntryTick; + void enableFunctionTrace(); + void traceFunctionsInternal(Addr pc); + + protected: + void traceFunctions(Addr pc) + { + if (functionTracingEnabled) + traceFunctionsInternal(pc); + } + + private: + static std::vector<BaseCPU *> cpuList; //!< Static global cpu list + + public: + static int numSimulatedCPUs() { return cpuList.size(); } + static Counter numSimulatedInstructions() + { + Counter total = 0; + + int size = cpuList.size(); + for (int i = 0; i < size; ++i) + total += cpuList[i]->totalInstructions(); + + return total; + } + + public: + // Number of CPU cycles simulated + Stats::Scalar<> numCycles; + +#if FULL_SYSTEM + Kernel::Statistics *kernelStats; +#endif +}; + +#endif // __CPU_BASE_HH__ diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst.cc new file mode 100644 index 000000000..bf7c35cad --- /dev/null +++ b/src/cpu/base_dyn_inst.cc @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_BASE_DYN_INST_CC__ +#define __CPU_BASE_DYN_INST_CC__ + +#include <iostream> +#include <string> +#include <sstream> + +#include "base/cprintf.hh" +#include "base/trace.hh" + +#include "arch/faults.hh" +#include "cpu/exetrace.hh" +#include "mem/mem_req.hh" + +#include "cpu/base_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha_cpu.hh" + +using namespace std; +using namespace TheISA; + +#define NOHASH +#ifndef NOHASH + +#include "base/hashmap.hh" + +unsigned int MyHashFunc(const BaseDynInst *addr) +{ + unsigned a = (unsigned)addr; + unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + + return hash; +} + +typedef m5::hash_map<const BaseDynInst *, const BaseDynInst *, MyHashFunc> my_hash_t; +my_hash_t thishash; +#endif + +template <class Impl> +BaseDynInst<Impl>::BaseDynInst(MachInst machInst, Addr inst_PC, + Addr pred_PC, InstSeqNum seq_num, + FullCPU *cpu) + : staticInst(machInst), traceData(NULL), cpu(cpu), cpuXC(cpu->cpuXCBase()) +{ + seqNum = seq_num; + + PC = inst_PC; + nextPC = PC + sizeof(MachInst); + predPC = pred_PC; + + initVars(); +} + +template <class Impl> +BaseDynInst<Impl>::BaseDynInst(StaticInstPtr &_staticInst) + : staticInst(_staticInst), traceData(NULL) +{ + initVars(); +} + +template <class Impl> +void +BaseDynInst<Impl>::initVars() +{ + effAddr = MemReq::inval_addr; + physEffAddr = MemReq::inval_addr; + + readyRegs = 0; + + completed = false; + canIssue = false; + issued = false; + executed = false; + canCommit = false; + squashed = false; + squashedInIQ = false; + eaCalcDone = false; + + blockingInst = false; + recoverInst = false; + + // Eventually make this a parameter. + threadNumber = 0; + + // Also make this a parameter, or perhaps get it from xc or cpu. + asid = 0; + + // Initialize the fault to be unimplemented opcode. + fault = new UnimplementedOpcodeFault; + + ++instcount; + + DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n", + instcount); +} + +template <class Impl> +BaseDynInst<Impl>::~BaseDynInst() +{ + --instcount; + DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n", + instcount); +} + +template <class Impl> +void +BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags) +{ + // This is the "functional" implementation of prefetch. Not much + // happens here since prefetches don't affect the architectural + // state. + + // Generate a MemReq so we can translate the effective address. + MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), 1, flags); + req->asid = asid; + + // Prefetches never cause faults. + fault = NoFault; + + // note this is a local, not BaseDynInst::fault + Fault trans_fault = cpuXC->translateDataReadReq(req); + + if (trans_fault == NoFault && !(req->flags & UNCACHEABLE)) { + // It's a valid address to cacheable space. Record key MemReq + // parameters so we can generate another one just like it for + // the timing access without calling translate() again (which + // might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // Bogus address (invalid or uncacheable space). Mark it by + // setting the eff_addr to InvalidAddr. + effAddr = physEffAddr = MemReq::inval_addr; + } + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#if !FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (traceData) { + traceData->setAddr(addr); + } +} + +template <class Impl> +void +BaseDynInst<Impl>::writeHint(Addr addr, int size, unsigned flags) +{ + // Need to create a MemReq here so we can do a translation. This + // will casue a TLB miss trap if necessary... not sure whether + // that's the best thing to do or not. We don't really need the + // MemReq otherwise, since wh64 has no functional effect. + MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), size, flags); + req->asid = asid; + + fault = cpuXC->translateDataWriteReq(req); + + if (fault == NoFault && !(req->flags & UNCACHEABLE)) { + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // ignore faults & accesses to uncacheable space... treat as no-op + effAddr = physEffAddr = MemReq::inval_addr; + } + + storeSize = size; + storeData = 0; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template <class Impl> +Fault +BaseDynInst<Impl>::copySrcTranslate(Addr src) +{ + MemReqPtr req = new MemReq(src, cpuXC->getProxy(), 64); + req->asid = asid; + + // translate to physical address + Fault fault = cpuXC->translateDataReadReq(req); + + if (fault == NoFault) { + cpuXC->copySrcAddr = src; + cpuXC->copySrcPhysAddr = req->paddr; + } else { + cpuXC->copySrcAddr = 0; + cpuXC->copySrcPhysAddr = 0; + } + return fault; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template <class Impl> +Fault +BaseDynInst<Impl>::copy(Addr dest) +{ + uint8_t data[64]; + FunctionalMemory *mem = cpuXC->mem; + assert(cpuXC->copySrcPhysAddr || cpuXC->misspeculating()); + MemReqPtr req = new MemReq(dest, cpuXC->getProxy(), 64); + req->asid = asid; + + // translate to physical address + Fault fault = cpuXC->translateDataWriteReq(req); + + if (fault == NoFault) { + Addr dest_addr = req->paddr; + // Need to read straight from memory since we have more than 8 bytes. + req->paddr = cpuXC->copySrcPhysAddr; + mem->read(req, data); + req->paddr = dest_addr; + mem->write(req, data); + } + return fault; +} + +template <class Impl> +void +BaseDynInst<Impl>::dump() +{ + cprintf("T%d : %#08d `", threadNumber, PC); + cout << staticInst->disassemble(PC); + cprintf("'\n"); +} + +template <class Impl> +void +BaseDynInst<Impl>::dump(std::string &outstring) +{ + std::ostringstream s; + s << "T" << threadNumber << " : 0x" << PC << " " + << staticInst->disassemble(PC); + + outstring = s.str(); +} + + +#if 0 +template <class Impl> +Fault +BaseDynInst<Impl>::mem_access(mem_cmd cmd, Addr addr, void *p, int nbytes) +{ + Fault fault; + + // check alignments, even speculative this test should always pass + if ((nbytes & nbytes - 1) != 0 || (addr & nbytes - 1) != 0) { + for (int i = 0; i < nbytes; i++) + ((char *) p)[i] = 0; + + // I added the following because according to the comment above, + // we should never get here. The comment lies +#if 0 + panic("unaligned access. Cycle = %n", curTick); +#endif + return NoFault; + } + + MemReqPtr req = new MemReq(addr, thread, nbytes); + switch(cmd) { + case Read: + fault = spec_mem->read(req, (uint8_t *)p); + break; + + case Write: + fault = spec_mem->write(req, (uint8_t *)p); + if (fault != NoFault) + break; + + specMemWrite = true; + storeSize = nbytes; + switch(nbytes) { + case sizeof(uint8_t): + *(uint8_t)&storeData = (uint8_t *)p; + break; + case sizeof(uint16_t): + *(uint16_t)&storeData = (uint16_t *)p; + break; + case sizeof(uint32_t): + *(uint32_t)&storeData = (uint32_t *)p; + break; + case sizeof(uint64_t): + *(uint64_t)&storeData = (uint64_t *)p; + break; + } + break; + + default: + fault = genMachineCheckFault(); + break; + } + + trace_mem(fault, cmd, addr, p, nbytes); + + return fault; +} + +#endif + +template <class Impl> +bool +BaseDynInst<Impl>::eaSrcsReady() +{ + // For now I am assuming that src registers 1..n-1 are the ones that the + // EA calc depends on. (i.e. src reg 0 is the source of the data to be + // stored) + + for (int i = 1; i < numSrcRegs(); ++i) + { + if (!_readySrcRegIdx[i]) + return false; + } + + return true; +} + +// Forward declaration +template class BaseDynInst<AlphaSimpleImpl>; + +template <> +int +BaseDynInst<AlphaSimpleImpl>::instcount = 0; + +#endif // __CPU_BASE_DYN_INST_CC__ diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh new file mode 100644 index 000000000..3a7852f79 --- /dev/null +++ b/src/cpu/base_dyn_inst.hh @@ -0,0 +1,530 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_BASE_DYN_INST_HH__ +#define __CPU_BASE_DYN_INST_HH__ + +#include <string> +#include <vector> + +#include "base/fast_alloc.hh" +#include "base/trace.hh" +#include "config/full_system.hh" +#include "cpu/exetrace.hh" +#include "cpu/inst_seq.hh" +#include "cpu/o3/comm.hh" +#include "cpu/static_inst.hh" +#include "encumbered/cpu/full/bpred_update.hh" +#include "encumbered/cpu/full/op_class.hh" +#include "encumbered/cpu/full/spec_memory.hh" +#include "encumbered/cpu/full/spec_state.hh" +#include "encumbered/mem/functional/main.hh" + +/** + * @file + * Defines a dynamic instruction context. + */ + +// Forward declaration. +class StaticInstPtr; + +template <class Impl> +class BaseDynInst : public FastAlloc, public RefCounted +{ + public: + // Typedef for the CPU. + typedef typename Impl::FullCPU FullCPU; + + /// Binary machine instruction type. + typedef TheISA::MachInst MachInst; + /// Logical register index type. + typedef TheISA::RegIndex RegIndex; + /// Integer register index type. + typedef TheISA::IntReg IntReg; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + /** The static inst used by this dyn inst. */ + StaticInstPtr staticInst; + + //////////////////////////////////////////// + // + // INSTRUCTION EXECUTION + // + //////////////////////////////////////////// + Trace::InstRecord *traceData; + + template <class T> + Fault read(Addr addr, T &data, unsigned flags); + + template <class T> + Fault write(T data, Addr addr, unsigned flags, + uint64_t *res); + + void prefetch(Addr addr, unsigned flags); + void writeHint(Addr addr, int size, unsigned flags); + Fault copySrcTranslate(Addr src); + Fault copy(Addr dest); + + /** @todo: Consider making this private. */ + public: + /** Is this instruction valid. */ + bool valid; + + /** The sequence number of the instruction. */ + InstSeqNum seqNum; + + /** How many source registers are ready. */ + unsigned readyRegs; + + /** Is the instruction completed. */ + bool completed; + + /** Can this instruction issue. */ + bool canIssue; + + /** Has this instruction issued. */ + bool issued; + + /** Has this instruction executed (or made it through execute) yet. */ + bool executed; + + /** Can this instruction commit. */ + bool canCommit; + + /** Is this instruction squashed. */ + bool squashed; + + /** Is this instruction squashed in the instruction queue. */ + bool squashedInIQ; + + /** Is this a recover instruction. */ + bool recoverInst; + + /** Is this a thread blocking instruction. */ + bool blockingInst; /* this inst has called thread_block() */ + + /** Is this a thread syncrhonization instruction. */ + bool threadsyncWait; + + /** The thread this instruction is from. */ + short threadNumber; + + /** data address space ID, for loads & stores. */ + short asid; + + /** Pointer to the FullCPU object. */ + FullCPU *cpu; + + /** Pointer to the exec context. Will not exist in the final version. */ + CPUExecContext *cpuXC; + + /** The kind of fault this instruction has generated. */ + Fault fault; + + /** The effective virtual address (lds & stores only). */ + Addr effAddr; + + /** The effective physical address. */ + Addr physEffAddr; + + /** Effective virtual address for a copy source. */ + Addr copySrcEffAddr; + + /** Effective physical address for a copy source. */ + Addr copySrcPhysEffAddr; + + /** The memory request flags (from translation). */ + unsigned memReqFlags; + + /** The size of the data to be stored. */ + int storeSize; + + /** The data to be stored. */ + IntReg storeData; + + union Result { + uint64_t integer; + float fp; + double dbl; + }; + + /** The result of the instruction; assumes for now that there's only one + * destination register. + */ + Result instResult; + + /** PC of this instruction. */ + Addr PC; + + /** Next non-speculative PC. It is not filled in at fetch, but rather + * once the target of the branch is truly known (either decode or + * execute). + */ + Addr nextPC; + + /** Predicted next PC. */ + Addr predPC; + + /** Count of total number of dynamic instructions. */ + static int instcount; + + /** Whether or not the source register is ready. Not sure this should be + * here vs. the derived class. + */ + bool _readySrcRegIdx[MaxInstSrcRegs]; + + public: + /** BaseDynInst constructor given a binary instruction. */ + BaseDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + BaseDynInst(StaticInstPtr &_staticInst); + + /** BaseDynInst destructor. */ + ~BaseDynInst(); + + private: + /** Function to initialize variables in the constructors. */ + void initVars(); + + public: + void + trace_mem(Fault fault, // last fault + MemCmd cmd, // last command + Addr addr, // virtual address of access + void *p, // memory accessed + int nbytes); // access size + + /** Dumps out contents of this BaseDynInst. */ + void dump(); + + /** Dumps out contents of this BaseDynInst into given string. */ + void dump(std::string &outstring); + + /** Returns the fault type. */ + Fault getFault() { return fault; } + + /** Checks whether or not this instruction has had its branch target + * calculated yet. For now it is not utilized and is hacked to be + * always false. + */ + bool doneTargCalc() { return false; } + + /** Returns the next PC. This could be the speculative next PC if it is + * called prior to the actual branch target being calculated. + */ + Addr readNextPC() { return nextPC; } + + /** Set the predicted target of this current instruction. */ + void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; } + + /** Returns the predicted target of the branch. */ + Addr readPredTarg() { return predPC; } + + /** Returns whether the instruction was predicted taken or not. */ + bool predTaken() { + return( predPC != (PC + sizeof(MachInst) ) ); + } + + /** Returns whether the instruction mispredicted. */ + bool mispredicted() { return (predPC != nextPC); } + + // + // Instruction types. Forward checks to StaticInst object. + // + bool isNop() const { return staticInst->isNop(); } + bool isMemRef() const { return staticInst->isMemRef(); } + bool isLoad() const { return staticInst->isLoad(); } + bool isStore() const { return staticInst->isStore(); } + bool isInstPrefetch() const { return staticInst->isInstPrefetch(); } + bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } + bool isCopy() const { return staticInst->isCopy(); } + bool isInteger() const { return staticInst->isInteger(); } + bool isFloating() const { return staticInst->isFloating(); } + bool isControl() const { return staticInst->isControl(); } + bool isCall() const { return staticInst->isCall(); } + bool isReturn() const { return staticInst->isReturn(); } + bool isDirectCtrl() const { return staticInst->isDirectCtrl(); } + bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); } + bool isCondCtrl() const { return staticInst->isCondCtrl(); } + bool isUncondCtrl() const { return staticInst->isUncondCtrl(); } + bool isThreadSync() const { return staticInst->isThreadSync(); } + bool isSerializing() const { return staticInst->isSerializing(); } + bool isMemBarrier() const { return staticInst->isMemBarrier(); } + bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } + bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } + + /** Returns the opclass of this instruction. */ + OpClass opClass() const { return staticInst->opClass(); } + + /** Returns the branch target address. */ + Addr branchTarget() const { return staticInst->branchTarget(PC); } + + /** Number of source registers. */ + int8_t numSrcRegs() const { return staticInst->numSrcRegs(); } + + /** Number of destination registers. */ + int8_t numDestRegs() const { return staticInst->numDestRegs(); } + + // the following are used to track physical register usage + // for machines with separate int & FP reg files + int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } + int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } + + /** Returns the logical register index of the i'th destination register. */ + RegIndex destRegIdx(int i) const + { + return staticInst->destRegIdx(i); + } + + /** Returns the logical register index of the i'th source register. */ + RegIndex srcRegIdx(int i) const + { + return staticInst->srcRegIdx(i); + } + + /** Returns the result of an integer instruction. */ + uint64_t readIntResult() { return instResult.integer; } + + /** Returns the result of a floating point instruction. */ + float readFloatResult() { return instResult.fp; } + + /** Returns the result of a floating point (double) instruction. */ + double readDoubleResult() { return instResult.dbl; } + + //Push to .cc file. + /** Records that one of the source registers is ready. */ + void markSrcRegReady() + { + ++readyRegs; + if(readyRegs == numSrcRegs()) { + canIssue = true; + } + } + + /** Marks a specific register as ready. + * @todo: Move this to .cc file. + */ + void markSrcRegReady(RegIndex src_idx) + { + ++readyRegs; + + _readySrcRegIdx[src_idx] = 1; + + if(readyRegs == numSrcRegs()) { + canIssue = true; + } + } + + /** Returns if a source register is ready. */ + bool isReadySrcRegIdx(int idx) const + { + return this->_readySrcRegIdx[idx]; + } + + /** Sets this instruction as completed. */ + void setCompleted() { completed = true; } + + /** Returns whethe or not this instruction is completed. */ + bool isCompleted() const { return completed; } + + /** Sets this instruction as ready to issue. */ + void setCanIssue() { canIssue = true; } + + /** Returns whether or not this instruction is ready to issue. */ + bool readyToIssue() const { return canIssue; } + + /** Sets this instruction as issued from the IQ. */ + void setIssued() { issued = true; } + + /** Returns whether or not this instruction has issued. */ + bool isIssued() const { return issued; } + + /** Sets this instruction as executed. */ + void setExecuted() { executed = true; } + + /** Returns whether or not this instruction has executed. */ + bool isExecuted() const { return executed; } + + /** Sets this instruction as ready to commit. */ + void setCanCommit() { canCommit = true; } + + /** Clears this instruction as being ready to commit. */ + void clearCanCommit() { canCommit = false; } + + /** Returns whether or not this instruction is ready to commit. */ + bool readyToCommit() const { return canCommit; } + + /** Sets this instruction as squashed. */ + void setSquashed() { squashed = true; } + + /** Returns whether or not this instruction is squashed. */ + bool isSquashed() const { return squashed; } + + /** Sets this instruction as squashed in the IQ. */ + void setSquashedInIQ() { squashedInIQ = true; } + + /** Returns whether or not this instruction is squashed in the IQ. */ + bool isSquashedInIQ() const { return squashedInIQ; } + + /** Read the PC of this instruction. */ + const Addr readPC() const { return PC; } + + /** Set the next PC of this instruction (its actual target). */ + void setNextPC(uint64_t val) { nextPC = val; } + + /** Returns the exec context. + * @todo: Remove this once the ExecContext is no longer used. + */ + ExecContext *xcBase() { return cpuXC->getProxy(); } + + private: + /** Instruction effective address. + * @todo: Consider if this is necessary or not. + */ + Addr instEffAddr; + /** Whether or not the effective address calculation is completed. + * @todo: Consider if this is necessary or not. + */ + bool eaCalcDone; + + public: + /** Sets the effective address. */ + void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } + + /** Returns the effective address. */ + const Addr &getEA() const { return instEffAddr; } + + /** Returns whether or not the eff. addr. calculation has been completed. */ + bool doneEACalc() { return eaCalcDone; } + + /** Returns whether or not the eff. addr. source registers are ready. */ + bool eaSrcsReady(); + + public: + /** Load queue index. */ + int16_t lqIdx; + + /** Store queue index. */ + int16_t sqIdx; +}; + +template<class Impl> +template<class T> +inline Fault +BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags) +{ + MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), sizeof(T), flags); + req->asid = asid; + + fault = cpu->translateDataReadReq(req); + + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + // Do I ever really need this? -KTL 3/05 + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#if !FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (fault == NoFault) { + fault = cpu->read(req, data, lqIdx); + } else { + // Return a fixed value to keep simulation deterministic even + // along misspeculated paths. + data = (T)-1; + } + + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + return fault; +} + +template<class Impl> +template<class T> +inline Fault +BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), sizeof(T), flags); + + req->asid = asid; + + fault = cpu->translateDataWriteReq(req); + + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#if !FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (fault == NoFault) { + fault = cpu->write(req, data, sqIdx); + } + + if (res) { + // always return some result to keep misspeculated paths + // (which will ignore faults) deterministic + *res = (fault == NoFault) ? req->result : 0; + } + + return fault; +} + +#endif // __CPU_BASE_DYN_INST_HH__ diff --git a/src/cpu/cpu_exec_context.cc b/src/cpu/cpu_exec_context.cc new file mode 100644 index 000000000..ec1e94561 --- /dev/null +++ b/src/cpu/cpu_exec_context.cc @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2001-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string> + +#include "arch/isa_traits.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" + +#if FULL_SYSTEM +#include "base/callback.hh" +#include "base/cprintf.hh" +#include "base/output.hh" +#include "base/trace.hh" +#include "cpu/profile.hh" +#include "kern/kernel_stats.hh" +#include "sim/serialize.hh" +#include "sim/sim_exit.hh" +#include "arch/stacktrace.hh" +#else +#include "sim/process.hh" +#include "sim/system.hh" +#include "mem/translating_port.hh" +#endif + +using namespace std; + +// constructor +#if FULL_SYSTEM +CPUExecContext::CPUExecContext(BaseCPU *_cpu, int _thread_num, System *_sys, + AlphaITB *_itb, AlphaDTB *_dtb) + : _status(ExecContext::Unallocated), cpu(_cpu), thread_num(_thread_num), + cpu_id(-1), lastActivate(0), lastSuspend(0), system(_sys), itb(_itb), + dtb(_dtb), profile(NULL), quiesceEvent(this), func_exe_inst(0), + storeCondFailures(0) + +{ + proxy = new ProxyExecContext<CPUExecContext>(this); + + regs.clear(); + + if (cpu->params->profile) { + profile = new FunctionProfile(system->kernelSymtab); + Callback *cb = + new MakeCallback<CPUExecContext, + &CPUExecContext::dumpFuncProfile>(this); + registerExitCallback(cb); + } + + // let's fill with a dummy node for now so we don't get a segfault + // on the first cycle when there's no node available. + static ProfileNode dummyNode; + profileNode = &dummyNode; + profilePC = 3; + + Port *mem_port; + physPort = new FunctionalPort(); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(physPort); + physPort->setPeer(mem_port); + + virtPort = new VirtualPort(); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(virtPort); + virtPort->setPeer(mem_port); +} +#else +CPUExecContext::CPUExecContext(BaseCPU *_cpu, int _thread_num, + Process *_process, int _asid, MemObject* memobj) + : _status(ExecContext::Unallocated), + cpu(_cpu), thread_num(_thread_num), cpu_id(-1), lastActivate(0), + lastSuspend(0), process(_process), asid(_asid), + func_exe_inst(0), storeCondFailures(0) +{ + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + port = new TranslatingPort(process->pTable, false); + mem_port = memobj->getPort("functional"); + mem_port->setPeer(port); + port->setPeer(mem_port); + + regs.clear(); + proxy = new ProxyExecContext<CPUExecContext>(this); +} + +CPUExecContext::CPUExecContext(RegFile *regFile) + : cpu(NULL), thread_num(-1), process(NULL), asid(-1), + func_exe_inst(0), storeCondFailures(0) +{ + regs = *regFile; + proxy = new ProxyExecContext<CPUExecContext>(this); +} + +#endif + +CPUExecContext::~CPUExecContext() +{ + delete proxy; +} + +#if FULL_SYSTEM +void +CPUExecContext::dumpFuncProfile() +{ + std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name())); + profile->dump(proxy, *os); +} + +CPUExecContext::EndQuiesceEvent::EndQuiesceEvent(CPUExecContext *_cpuXC) + : Event(&mainEventQueue), cpuXC(_cpuXC) +{ +} + +void +CPUExecContext::EndQuiesceEvent::process() +{ + cpuXC->activate(); +} + +const char* +CPUExecContext::EndQuiesceEvent::description() +{ + return "End Quiesce Event."; +} + +void +CPUExecContext::profileClear() +{ + if (profile) + profile->clear(); +} + +void +CPUExecContext::profileSample() +{ + if (profile) + profile->sample(profileNode, profilePC); +} + +#endif + +void +CPUExecContext::takeOverFrom(ExecContext *oldContext) +{ + // some things should already be set up +#if FULL_SYSTEM + assert(system == oldContext->getSystemPtr()); +#else + assert(process == oldContext->getProcessPtr()); +#endif + + // copy over functional state + _status = oldContext->status(); + copyArchRegs(oldContext); + cpu_id = oldContext->readCpuId(); +#if !FULL_SYSTEM + func_exe_inst = oldContext->readFuncExeInst(); +#endif + + storeCondFailures = 0; + + oldContext->setStatus(ExecContext::Unallocated); +} + +void +CPUExecContext::serialize(ostream &os) +{ + SERIALIZE_ENUM(_status); + regs.serialize(os); + // thread_num and cpu_id are deterministic from the config + SERIALIZE_SCALAR(func_exe_inst); + SERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick = 0; + if (quiesceEvent.scheduled()) + quiesceEndTick = quiesceEvent.when(); + SERIALIZE_SCALAR(quiesceEndTick); + +#endif +} + + +void +CPUExecContext::unserialize(Checkpoint *cp, const std::string §ion) +{ + UNSERIALIZE_ENUM(_status); + regs.unserialize(cp, section); + // thread_num and cpu_id are deterministic from the config + UNSERIALIZE_SCALAR(func_exe_inst); + UNSERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick; + UNSERIALIZE_SCALAR(quiesceEndTick); + if (quiesceEndTick) + quiesceEvent.schedule(quiesceEndTick); +#endif +} + + +void +CPUExecContext::activate(int delay) +{ + if (status() == ExecContext::Active) + return; + + lastActivate = curTick; + + _status = ExecContext::Active; + cpu->activateContext(thread_num, delay); +} + +void +CPUExecContext::suspend() +{ + if (status() == ExecContext::Suspended) + return; + + lastActivate = curTick; + lastSuspend = curTick; +/* +#if FULL_SYSTEM + // Don't change the status from active if there are pending interrupts + if (cpu->check_interrupts()) { + assert(status() == ExecContext::Active); + return; + } +#endif +*/ + _status = ExecContext::Suspended; + cpu->suspendContext(thread_num); +} + +void +CPUExecContext::deallocate() +{ + if (status() == ExecContext::Unallocated) + return; + + _status = ExecContext::Unallocated; + cpu->deallocateContext(thread_num); +} + +void +CPUExecContext::halt() +{ + if (status() == ExecContext::Halted) + return; + + _status = ExecContext::Halted; + cpu->haltContext(thread_num); +} + + +void +CPUExecContext::regStats(const string &name) +{ +} + +void +CPUExecContext::copyArchRegs(ExecContext *xc) +{ + TheISA::copyRegs(xc, proxy); +} + +#if FULL_SYSTEM +VirtualPort* +CPUExecContext::getVirtPort(ExecContext *xc) +{ + if (!xc) + return virtPort; + + VirtualPort *vp; + Port *mem_port; + + vp = new VirtualPort(xc); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(vp); + vp->setPeer(mem_port); + return vp; +} + +void +CPUExecContext::delVirtPort(VirtualPort *vp) +{ +// assert(!vp->nullExecContext()); + delete vp->getPeer(); + delete vp; +} + + +#endif + diff --git a/src/cpu/cpu_exec_context.hh b/src/cpu/cpu_exec_context.hh new file mode 100644 index 000000000..2c06a7b3b --- /dev/null +++ b/src/cpu/cpu_exec_context.hh @@ -0,0 +1,546 @@ +/* + * Copyright (c) 2001-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_CPU_EXEC_CONTEXT_HH__ +#define __CPU_CPU_EXEC_CONTEXT_HH__ + +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "cpu/exec_context.hh" +#include "mem/physical.hh" +#include "mem/request.hh" +#include "sim/byteswap.hh" +#include "sim/eventq.hh" +#include "sim/host.hh" +#include "sim/serialize.hh" + +class BaseCPU; + +#if FULL_SYSTEM + +#include "sim/system.hh" +#include "arch/tlb.hh" + +class FunctionProfile; +class ProfileNode; +class FunctionalPort; +class PhysicalPort; + + +#else // !FULL_SYSTEM + +#include "sim/process.hh" +#include "mem/page_table.hh" +class TranslatingPort; + + +#endif // FULL_SYSTEM + +// +// The CPUExecContext object represents a functional context for +// instruction execution. It incorporates everything required for +// architecture-level functional simulation of a single thread. +// + +class CPUExecContext +{ + protected: + typedef TheISA::RegFile RegFile; + typedef TheISA::MachInst MachInst; + typedef TheISA::MiscRegFile MiscRegFile; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + public: + typedef ExecContext::Status Status; + + private: + Status _status; + + public: + Status status() const { return _status; } + + void setStatus(Status newStatus) { _status = newStatus; } + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + void activate(int delay = 1); + + /// Set the status to Suspended. + void suspend(); + + /// Set the status to Unallocated. + void deallocate(); + + /// Set the status to Halted. + void halt(); + + protected: + RegFile regs; // correct-path register context + + public: + // pointer to CPU associated with this context + BaseCPU *cpu; + + ProxyExecContext<CPUExecContext> *proxy; + + // Current instruction + MachInst inst; + + // Index of hardware thread context on the CPU that this represents. + int thread_num; + + // ID of this context w.r.t. the System or Process object to which + // it belongs. For full-system mode, this is the system CPU ID. + int cpu_id; + + Tick lastActivate; + Tick lastSuspend; + + System *system; + + +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; + + /** A functional port outgoing only for functional accesses to physical + * addresses.*/ + FunctionalPort *physPort; + + /** A functional port, outgoing only, for functional accesse to virtual + * addresses. That doen't require execution context information */ + VirtualPort *virtPort; + + FunctionProfile *profile; + ProfileNode *profileNode; + Addr profilePC; + void dumpFuncProfile(); + + /** Event for timing out quiesce instruction */ + struct EndQuiesceEvent : public Event + { + /** A pointer to the execution context that is quiesced */ + CPUExecContext *cpuXC; + + EndQuiesceEvent(CPUExecContext *_cpuXC); + + /** Event process to occur at interrupt*/ + virtual void process(); + + /** Event description */ + virtual const char *description(); + }; + EndQuiesceEvent quiesceEvent; + + Event *getQuiesceEvent() { return &quiesceEvent; } + + Tick readLastActivate() { return lastActivate; } + + Tick readLastSuspend() { return lastSuspend; } + + void profileClear(); + + void profileSample(); + +#else + /// Port that syscalls can use to access memory (provides translation step). + TranslatingPort *port; + + Process *process; + + // Address space ID. Note that this is used for TIMING cache + // simulation only; all functional memory accesses should use + // one of the FunctionalMemory pointers above. + short asid; + +#endif + + /** + * Temporary storage to pass the source address from copy_load to + * copy_store. + * @todo Remove this temporary when we have a better way to do it. + */ + Addr copySrcAddr; + /** + * Temp storage for the physical source address of a copy. + * @todo Remove this temporary when we have a better way to do it. + */ + Addr copySrcPhysAddr; + + + /* + * number of executed instructions, for matching with syscall trace + * points in EIO files. + */ + Counter func_exe_inst; + + // + // Count failed store conditionals so we can warn of apparent + // application deadlock situations. + unsigned storeCondFailures; + + // constructor: initialize context from given process structure +#if FULL_SYSTEM + CPUExecContext(BaseCPU *_cpu, int _thread_num, System *_system, + AlphaITB *_itb, AlphaDTB *_dtb); +#else + CPUExecContext(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid, + MemObject *memobj); + // Constructor to use XC to pass reg file around. Not used for anything + // else. + CPUExecContext(RegFile *regFile); +#endif + virtual ~CPUExecContext(); + + virtual void takeOverFrom(ExecContext *oldContext); + + void regStats(const std::string &name); + + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + + BaseCPU *getCpuPtr() { return cpu; } + + ExecContext *getProxy() { return proxy; } + + int getThreadNum() { return thread_num; } + +#if FULL_SYSTEM + System *getSystemPtr() { return system; } + + AlphaITB *getITBPtr() { return itb; } + + AlphaDTB *getDTBPtr() { return dtb; } + + int getInstAsid() { return regs.instAsid(); } + int getDataAsid() { return regs.dataAsid(); } + + Fault translateInstReq(RequestPtr &req) + { + return itb->translate(req, proxy); + } + + Fault translateDataReadReq(RequestPtr &req) + { + return dtb->translate(req, proxy, false); + } + + Fault translateDataWriteReq(RequestPtr &req) + { + return dtb->translate(req, proxy, true); + } + + FunctionalPort *getPhysPort() { return physPort; } + + /** Return a virtual port. If no exec context is specified then a static + * port is returned. Otherwise a port is created and returned. It must be + * deleted by deleteVirtPort(). */ + VirtualPort *getVirtPort(ExecContext *xc); + + void delVirtPort(VirtualPort *vp); + +#else + TranslatingPort *getMemPort() { return port; } + + Process *getProcessPtr() { return process; } + + int getInstAsid() { return asid; } + int getDataAsid() { return asid; } + + Fault translateInstReq(RequestPtr &req) + { + return process->pTable->translate(req); + } + + Fault translateDataReadReq(RequestPtr &req) + { + return process->pTable->translate(req); + } + + Fault translateDataWriteReq(RequestPtr &req) + { + return process->pTable->translate(req); + } + +#endif + +/* + template <class T> + Fault read(RequestPtr &req, T &data) + { +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + if (req->flags & LOCKED) { + req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); + req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); + } +#endif + + Fault error; + error = mem->prot_read(req->paddr, data, req->size); + data = LittleEndianGuest::gtoh(data); + return error; + } + + template <class T> + Fault write(RequestPtr &req, T &data) + { +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + ExecContext *xc; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + xc = req->xc; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + xc->setStCondFailures(0);//Needed? [RGD] + } else { + bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); + req->result = lock_flag; + if (!lock_flag || + ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + xc->setStCondFailures(xc->readStCondFailures() + 1); + if (((xc->readStCondFailures()) % 100000) == 0) { + std::cerr << "Warning: " + << xc->readStCondFailures() + << " consecutive store conditional failures " + << "on cpu " << req->xc->readCpuId() + << std::endl; + } + return NoFault; + } + else xc->setStCondFailures(0); + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < system->execContexts.size(); i++){ + xc = system->execContexts[i]; + if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == + (req->paddr & ~0xf)) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + } + } + +#endif + return mem->prot_write(req->paddr, (T)htog(data), req->size); + } +*/ + virtual bool misspeculating(); + + + MachInst getInst() { return inst; } + + void setInst(MachInst new_inst) + { + inst = new_inst; + } + + Fault instRead(RequestPtr &req) + { + panic("instRead not implemented"); + // return funcPhysMem->read(req, inst); + return NoFault; + } + + void setCpuId(int id) { cpu_id = id; } + + int readCpuId() { return cpu_id; } + + void copyArchRegs(ExecContext *xc); + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx) + { + return regs.readIntReg(reg_idx); + } + + FloatReg readFloatReg(int reg_idx, int width) + { + return regs.readFloatReg(reg_idx, width); + } + + FloatReg readFloatReg(int reg_idx) + { + return regs.readFloatReg(reg_idx); + } + + FloatRegBits readFloatRegBits(int reg_idx, int width) + { + return regs.readFloatRegBits(reg_idx, width); + } + + FloatRegBits readFloatRegBits(int reg_idx) + { + return regs.readFloatRegBits(reg_idx); + } + + void setIntReg(int reg_idx, uint64_t val) + { + regs.setIntReg(reg_idx, val); + } + + void setFloatReg(int reg_idx, FloatReg val, int width) + { + regs.setFloatReg(reg_idx, val, width); + } + + void setFloatReg(int reg_idx, FloatReg val) + { + regs.setFloatReg(reg_idx, val); + } + + void setFloatRegBits(int reg_idx, FloatRegBits val, int width) + { + regs.setFloatRegBits(reg_idx, val, width); + } + + void setFloatRegBits(int reg_idx, FloatRegBits val) + { + regs.setFloatRegBits(reg_idx, val); + } + + uint64_t readPC() + { + return regs.readPC(); + } + + void setPC(uint64_t val) + { + regs.setPC(val); + } + + uint64_t readNextPC() + { + return regs.readNextPC(); + } + + void setNextPC(uint64_t val) + { + regs.setNextPC(val); + } + + uint64_t readNextNPC() + { + return regs.readNextNPC(); + } + + void setNextNPC(uint64_t val) + { + regs.setNextNPC(val); + } + + + MiscReg readMiscReg(int misc_reg) + { + return regs.readMiscReg(misc_reg); + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return regs.readMiscRegWithEffect(misc_reg, fault, proxy); + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + return regs.setMiscReg(misc_reg, val); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return regs.setMiscRegWithEffect(misc_reg, val, proxy); + } + + unsigned readStCondFailures() { return storeCondFailures; } + + void setStCondFailures(unsigned sc_failures) + { storeCondFailures = sc_failures; } + + void clearArchRegs() { regs.clear(); } + +#if FULL_SYSTEM + int readIntrFlag() { return regs.intrflag; } + void setIntrFlag(int val) { regs.intrflag = val; } + Fault hwrei(); + bool inPalMode() { return AlphaISA::PcPAL(regs.readPC()); } + bool simPalCheck(int palFunc); +#endif + +#if !FULL_SYSTEM + TheISA::IntReg getSyscallArg(int i) + { + return regs.readIntReg(TheISA::ArgumentReg0 + i); + } + + // used to shift args for indirect syscall + void setSyscallArg(int i, TheISA::IntReg val) + { + regs.setIntReg(TheISA::ArgumentReg0 + i, val); + } + + void setSyscallReturn(SyscallReturn return_value) + { + TheISA::setSyscallReturn(return_value, ®s); + } + + void syscall(int64_t callnum) + { + process->syscall(callnum, proxy); + } + + Counter readFuncExeInst() { return func_exe_inst; } + + void setFuncExeInst(Counter new_val) { func_exe_inst = new_val; } +#endif + + void changeRegFileContext(RegFile::ContextParam param, + RegFile::ContextVal val) + { + regs.changeContext(param, val); + } +}; + + +// for non-speculative execution context, spec_mode is always false +inline bool +CPUExecContext::misspeculating() +{ + return false; +} + +#endif // __CPU_CPU_EXEC_CONTEXT_HH__ diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py new file mode 100644 index 000000000..30cbabde1 --- /dev/null +++ b/src/cpu/cpu_models.py @@ -0,0 +1,74 @@ +# Copyright (c) 2003-2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +################ +# CpuModel class +# +# The CpuModel class encapsulates everything the ISA parser needs to +# know about a particular CPU model. + +class CpuModel: + # Dict of available CPU model objects. Accessible as CpuModel.dict. + dict = {} + + # Constructor. Automatically adds models to CpuModel.dict. + def __init__(self, name, filename, includes, strings): + self.name = name + self.filename = filename # filename for output exec code + self.includes = includes # include files needed in exec file + # The 'strings' dict holds all the per-CPU symbols we can + # substitute into templates etc. + self.strings = strings + # Add self to dict + CpuModel.dict[name] = self + + +# +# Define CPU models. +# +# Parameters are: +# - name of model +# - filename for generated ISA execution file +# - includes needed for generated ISA execution file +# - substitution strings for ISA description templates +# + +CpuModel('AtomicSimpleCPU', 'atomic_simple_cpu_exec.cc', + '#include "cpu/simple/atomic.hh"', + { 'CPU_exec_context': 'AtomicSimpleCPU' }) +CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc', + '#include "cpu/simple/timing.hh"', + { 'CPU_exec_context': 'TimingSimpleCPU' }) +CpuModel('FastCPU', 'fast_cpu_exec.cc', + '#include "cpu/fast/cpu.hh"', + { 'CPU_exec_context': 'FastCPU' }) +CpuModel('FullCPU', 'full_cpu_exec.cc', + '#include "encumbered/cpu/full/dyn_inst.hh"', + { 'CPU_exec_context': 'DynInst' }) +CpuModel('AlphaFullCPU', 'alpha_o3_exec.cc', + '#include "cpu/o3/alpha_dyn_inst.hh"', + { 'CPU_exec_context': 'AlphaDynInst<AlphaSimpleImpl>' }) + diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh new file mode 100644 index 000000000..1f26183ab --- /dev/null +++ b/src/cpu/exec_context.hh @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_EXEC_CONTEXT_HH__ +#define __CPU_EXEC_CONTEXT_HH__ + +#include "config/full_system.hh" +#include "mem/request.hh" +#include "sim/faults.hh" +#include "sim/host.hh" +#include "sim/serialize.hh" +#include "sim/byteswap.hh" + +// @todo: Figure out a more architecture independent way to obtain the ITB and +// DTB pointers. +class AlphaDTB; +class AlphaITB; +class BaseCPU; +class Event; +class TranslatingPort; +class FunctionalPort; +class VirtualPort; +class Process; +class System; + +class ExecContext +{ + protected: + typedef TheISA::RegFile RegFile; + typedef TheISA::MachInst MachInst; + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscRegFile MiscRegFile; + typedef TheISA::MiscReg MiscReg; + public: + enum Status + { + /// Initialized but not running yet. All CPUs start in + /// this state, but most transition to Active on cycle 1. + /// In MP or SMT systems, non-primary contexts will stay + /// in this state until a thread is assigned to them. + Unallocated, + + /// Running. Instructions should be executed only when + /// the context is in this state. + Active, + + /// Temporarily inactive. Entered while waiting for + /// synchronization, etc. + Suspended, + + /// Permanently shut down. Entered when target executes + /// m5exit pseudo-instruction. When all contexts enter + /// this state, the simulation will terminate. + Halted + }; + + virtual ~ExecContext() { }; + + virtual BaseCPU *getCpuPtr() = 0; + + virtual void setCpuId(int id) = 0; + + virtual int readCpuId() = 0; + +#if FULL_SYSTEM + virtual System *getSystemPtr() = 0; + + virtual AlphaITB *getITBPtr() = 0; + + virtual AlphaDTB * getDTBPtr() = 0; + + virtual FunctionalPort *getPhysPort() = 0; + + virtual VirtualPort *getVirtPort(ExecContext *xc = NULL) = 0; + + virtual void delVirtPort(VirtualPort *vp) = 0; +#else + virtual TranslatingPort *getMemPort() = 0; + + virtual Process *getProcessPtr() = 0; +#endif + + virtual Status status() const = 0; + + virtual void setStatus(Status new_status) = 0; + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + virtual void activate(int delay = 1) = 0; + + /// Set the status to Suspended. + virtual void suspend() = 0; + + /// Set the status to Unallocated. + virtual void deallocate() = 0; + + /// Set the status to Halted. + virtual void halt() = 0; + +#if FULL_SYSTEM + virtual void dumpFuncProfile() = 0; +#endif + + virtual void takeOverFrom(ExecContext *old_context) = 0; + + virtual void regStats(const std::string &name) = 0; + + virtual void serialize(std::ostream &os) = 0; + virtual void unserialize(Checkpoint *cp, const std::string §ion) = 0; + +#if FULL_SYSTEM + virtual Event *getQuiesceEvent() = 0; + + // Not necessarily the best location for these... + // Having an extra function just to read these is obnoxious + virtual Tick readLastActivate() = 0; + virtual Tick readLastSuspend() = 0; + + virtual void profileClear() = 0; + virtual void profileSample() = 0; +#endif + + virtual int getThreadNum() = 0; + + virtual int getInstAsid() = 0; + virtual int getDataAsid() = 0; + + virtual Fault translateInstReq(RequestPtr &req) = 0; + + virtual Fault translateDataReadReq(RequestPtr &req) = 0; + + virtual Fault translateDataWriteReq(RequestPtr &req) = 0; + + // Also somewhat obnoxious. Really only used for the TLB fault. + // However, may be quite useful in SPARC. + virtual TheISA::MachInst getInst() = 0; + + virtual void copyArchRegs(ExecContext *xc) = 0; + + virtual void clearArchRegs() = 0; + + // + // New accessors for new decoder. + // + virtual uint64_t readIntReg(int reg_idx) = 0; + + virtual FloatReg readFloatReg(int reg_idx, int width) = 0; + + virtual FloatReg readFloatReg(int reg_idx) = 0; + + virtual FloatRegBits readFloatRegBits(int reg_idx, int width) = 0; + + virtual FloatRegBits readFloatRegBits(int reg_idx) = 0; + + virtual void setIntReg(int reg_idx, uint64_t val) = 0; + + virtual void setFloatReg(int reg_idx, FloatReg val, int width) = 0; + + virtual void setFloatReg(int reg_idx, FloatReg val) = 0; + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val) = 0; + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width) = 0; + + virtual uint64_t readPC() = 0; + + virtual void setPC(uint64_t val) = 0; + + virtual uint64_t readNextPC() = 0; + + virtual void setNextPC(uint64_t val) = 0; + + virtual uint64_t readNextNPC() = 0; + + virtual void setNextNPC(uint64_t val) = 0; + + virtual MiscReg readMiscReg(int misc_reg) = 0; + + virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) = 0; + + virtual Fault setMiscReg(int misc_reg, const MiscReg &val) = 0; + + virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) = 0; + + // Also not necessarily the best location for these two. Hopefully will go + // away once we decide upon where st cond failures goes. + virtual unsigned readStCondFailures() = 0; + + virtual void setStCondFailures(unsigned sc_failures) = 0; + +#if FULL_SYSTEM + virtual int readIntrFlag() = 0; + virtual void setIntrFlag(int val) = 0; + virtual Fault hwrei() = 0; + virtual bool inPalMode() = 0; + virtual bool simPalCheck(int palFunc) = 0; +#endif + + // Only really makes sense for old CPU model. Still could be useful though. + virtual bool misspeculating() = 0; + +#if !FULL_SYSTEM + virtual IntReg getSyscallArg(int i) = 0; + + // used to shift args for indirect syscall + virtual void setSyscallArg(int i, IntReg val) = 0; + + virtual void setSyscallReturn(SyscallReturn return_value) = 0; + + virtual void syscall(int64_t callnum) = 0; + + // Same with st cond failures. + virtual Counter readFuncExeInst() = 0; + + virtual void setFuncExeInst(Counter new_val) = 0; +#endif + + virtual void changeRegFileContext(RegFile::ContextParam param, + RegFile::ContextVal val) = 0; +}; + +template <class XC> +class ProxyExecContext : public ExecContext +{ + public: + ProxyExecContext(XC *actual_xc) + { actualXC = actual_xc; } + + private: + XC *actualXC; + + public: + + BaseCPU *getCpuPtr() { return actualXC->getCpuPtr(); } + + void setCpuId(int id) { actualXC->setCpuId(id); } + + int readCpuId() { return actualXC->readCpuId(); } + +#if FULL_SYSTEM + System *getSystemPtr() { return actualXC->getSystemPtr(); } + + AlphaITB *getITBPtr() { return actualXC->getITBPtr(); } + + AlphaDTB *getDTBPtr() { return actualXC->getDTBPtr(); } + + FunctionalPort *getPhysPort() { return actualXC->getPhysPort(); } + + VirtualPort *getVirtPort(ExecContext *xc = NULL) { return actualXC->getVirtPort(xc); } + + void delVirtPort(VirtualPort *vp) { return actualXC->delVirtPort(vp); } +#else + TranslatingPort *getMemPort() { return actualXC->getMemPort(); } + + Process *getProcessPtr() { return actualXC->getProcessPtr(); } +#endif + + Status status() const { return actualXC->status(); } + + void setStatus(Status new_status) { actualXC->setStatus(new_status); } + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + void activate(int delay = 1) { actualXC->activate(delay); } + + /// Set the status to Suspended. + void suspend() { actualXC->suspend(); } + + /// Set the status to Unallocated. + void deallocate() { actualXC->deallocate(); } + + /// Set the status to Halted. + void halt() { actualXC->halt(); } + +#if FULL_SYSTEM + void dumpFuncProfile() { actualXC->dumpFuncProfile(); } +#endif + + void takeOverFrom(ExecContext *oldContext) + { actualXC->takeOverFrom(oldContext); } + + void regStats(const std::string &name) { actualXC->regStats(name); } + + void serialize(std::ostream &os) { actualXC->serialize(os); } + void unserialize(Checkpoint *cp, const std::string §ion) + { actualXC->unserialize(cp, section); } + +#if FULL_SYSTEM + Event *getQuiesceEvent() { return actualXC->getQuiesceEvent(); } + + Tick readLastActivate() { return actualXC->readLastActivate(); } + Tick readLastSuspend() { return actualXC->readLastSuspend(); } + + void profileClear() { return actualXC->profileClear(); } + void profileSample() { return actualXC->profileSample(); } +#endif + + int getThreadNum() { return actualXC->getThreadNum(); } + + int getInstAsid() { return actualXC->getInstAsid(); } + int getDataAsid() { return actualXC->getDataAsid(); } + + Fault translateInstReq(RequestPtr &req) + { return actualXC->translateInstReq(req); } + + Fault translateDataReadReq(RequestPtr &req) + { return actualXC->translateDataReadReq(req); } + + Fault translateDataWriteReq(RequestPtr &req) + { return actualXC->translateDataWriteReq(req); } + + // @todo: Do I need this? + MachInst getInst() { return actualXC->getInst(); } + + // @todo: Do I need this? + void copyArchRegs(ExecContext *xc) { actualXC->copyArchRegs(xc); } + + void clearArchRegs() { actualXC->clearArchRegs(); } + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx) + { return actualXC->readIntReg(reg_idx); } + + FloatReg readFloatReg(int reg_idx, int width) + { return actualXC->readFloatReg(reg_idx, width); } + + FloatReg readFloatReg(int reg_idx) + { return actualXC->readFloatReg(reg_idx); } + + FloatRegBits readFloatRegBits(int reg_idx, int width) + { return actualXC->readFloatRegBits(reg_idx, width); } + + FloatRegBits readFloatRegBits(int reg_idx) + { return actualXC->readFloatRegBits(reg_idx); } + + void setIntReg(int reg_idx, uint64_t val) + { actualXC->setIntReg(reg_idx, val); } + + void setFloatReg(int reg_idx, FloatReg val, int width) + { actualXC->setFloatReg(reg_idx, val, width); } + + void setFloatReg(int reg_idx, FloatReg val) + { actualXC->setFloatReg(reg_idx, val); } + + void setFloatRegBits(int reg_idx, FloatRegBits val, int width) + { actualXC->setFloatRegBits(reg_idx, val, width); } + + void setFloatRegBits(int reg_idx, FloatRegBits val) + { actualXC->setFloatRegBits(reg_idx, val); } + + uint64_t readPC() { return actualXC->readPC(); } + + void setPC(uint64_t val) { actualXC->setPC(val); } + + uint64_t readNextPC() { return actualXC->readNextPC(); } + + void setNextPC(uint64_t val) { actualXC->setNextPC(val); } + + uint64_t readNextNPC() { return actualXC->readNextNPC(); } + + void setNextNPC(uint64_t val) { actualXC->setNextNPC(val); } + + MiscReg readMiscReg(int misc_reg) + { return actualXC->readMiscReg(misc_reg); } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { return actualXC->readMiscRegWithEffect(misc_reg, fault); } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { return actualXC->setMiscReg(misc_reg, val); } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { return actualXC->setMiscRegWithEffect(misc_reg, val); } + + unsigned readStCondFailures() + { return actualXC->readStCondFailures(); } + + void setStCondFailures(unsigned sc_failures) + { actualXC->setStCondFailures(sc_failures); } + +#if FULL_SYSTEM + int readIntrFlag() { return actualXC->readIntrFlag(); } + + void setIntrFlag(int val) { actualXC->setIntrFlag(val); } + + Fault hwrei() { return actualXC->hwrei(); } + + bool inPalMode() { return actualXC->inPalMode(); } + + bool simPalCheck(int palFunc) { return actualXC->simPalCheck(palFunc); } +#endif + + // @todo: Fix this! + bool misspeculating() { return actualXC->misspeculating(); } + +#if !FULL_SYSTEM + IntReg getSyscallArg(int i) { return actualXC->getSyscallArg(i); } + + // used to shift args for indirect syscall + void setSyscallArg(int i, IntReg val) + { actualXC->setSyscallArg(i, val); } + + void setSyscallReturn(SyscallReturn return_value) + { actualXC->setSyscallReturn(return_value); } + + void syscall(int64_t callnum) { actualXC->syscall(callnum); } + + Counter readFuncExeInst() { return actualXC->readFuncExeInst(); } + + void setFuncExeInst(Counter new_val) + { return actualXC->setFuncExeInst(new_val); } +#endif + + void changeRegFileContext(RegFile::ContextParam param, + RegFile::ContextVal val) + { + actualXC->changeRegFileContext(param, val); + } +}; + +#endif diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc new file mode 100644 index 000000000..0ed3b43c4 --- /dev/null +++ b/src/cpu/exetrace.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2001-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fstream> +#include <iomanip> + +#include "base/loader/symtab.hh" +#include "cpu/base.hh" +#include "cpu/exetrace.hh" +#include "cpu/static_inst.hh" +#include "sim/param.hh" +#include "sim/system.hh" + +using namespace std; + + +//////////////////////////////////////////////////////////////////////// +// +// Methods for the InstRecord object +// + + +void +Trace::InstRecord::dump(ostream &outs) +{ + if (flags[INTEL_FORMAT]) { +#if FULL_SYSTEM + bool is_trace_system = (cpu->system->name() == trace_system); +#else + bool is_trace_system = true; +#endif + if (is_trace_system) { + ccprintf(outs, "%7d ) ", cycle); + outs << "0x" << hex << PC << ":\t"; + if (staticInst->isLoad()) { + outs << "<RD 0x" << hex << addr; + outs << ">"; + } else if (staticInst->isStore()) { + outs << "<WR 0x" << hex << addr; + outs << ">"; + } + outs << endl; + } + } else { + if (flags[PRINT_CYCLE]) + ccprintf(outs, "%7d: ", cycle); + + outs << cpu->name() << " "; + + if (flags[TRACE_MISSPEC]) + outs << (misspeculating ? "-" : "+") << " "; + + if (flags[PRINT_THREAD_NUM]) + outs << "T" << thread << " : "; + + + std::string sym_str; + Addr sym_addr; + if (debugSymbolTable + && debugSymbolTable->findNearestSymbol(PC, sym_str, sym_addr)) { + if (PC != sym_addr) + sym_str += csprintf("+%d", PC - sym_addr); + outs << "@" << sym_str << " : "; + } + else { + outs << "0x" << hex << PC << " : "; + } + + // + // Print decoded instruction + // + +#if defined(__GNUC__) && (__GNUC__ < 3) + // There's a bug in gcc 2.x library that prevents setw() + // from working properly on strings + string mc(staticInst->disassemble(PC, debugSymbolTable)); + while (mc.length() < 26) + mc += " "; + outs << mc; +#else + outs << setw(26) << left << staticInst->disassemble(PC, debugSymbolTable); +#endif + + outs << " : "; + + if (flags[PRINT_OP_CLASS]) { + outs << opClassStrings[staticInst->opClass()] << " : "; + } + + if (flags[PRINT_RESULT_DATA] && data_status != DataInvalid) { + outs << " D="; +#if 0 + if (data_status == DataDouble) + ccprintf(outs, "%f", data.as_double); + else + ccprintf(outs, "%#018x", data.as_int); +#else + ccprintf(outs, "%#018x", data.as_int); +#endif + } + + if (flags[PRINT_EFF_ADDR] && addr_valid) + outs << " A=0x" << hex << addr; + + if (flags[PRINT_INT_REGS] && regs_valid) { + for (int i = 0; i < TheISA::NumIntRegs;) + for (int j = i + 1; i <= j; i++) + ccprintf(outs, "r%02d = %#018x%s", i, + iregs->regs.readReg(i), + ((i == j) ? "\n" : " ")); + outs << "\n"; + } + + if (flags[PRINT_FETCH_SEQ] && fetch_seq_valid) + outs << " FetchSeq=" << dec << fetch_seq; + + if (flags[PRINT_CP_SEQ] && cp_seq_valid) + outs << " CPSeq=" << dec << cp_seq; + + // + // End of line... + // + outs << endl; + } +} + + +vector<bool> Trace::InstRecord::flags(NUM_BITS); +string Trace::InstRecord::trace_system; + +//////////////////////////////////////////////////////////////////////// +// +// Parameter space for per-cycle execution address tracing options. +// Derive from ParamContext so we can override checkParams() function. +// +class ExecutionTraceParamContext : public ParamContext +{ + public: + ExecutionTraceParamContext(const string &_iniSection) + : ParamContext(_iniSection) + { + } + + void checkParams(); // defined at bottom of file +}; + +ExecutionTraceParamContext exeTraceParams("exetrace"); + +Param<bool> exe_trace_spec(&exeTraceParams, "speculative", + "capture speculative instructions", true); + +Param<bool> exe_trace_print_cycle(&exeTraceParams, "print_cycle", + "print cycle number", true); +Param<bool> exe_trace_print_opclass(&exeTraceParams, "print_opclass", + "print op class", true); +Param<bool> exe_trace_print_thread(&exeTraceParams, "print_thread", + "print thread number", true); +Param<bool> exe_trace_print_effaddr(&exeTraceParams, "print_effaddr", + "print effective address", true); +Param<bool> exe_trace_print_data(&exeTraceParams, "print_data", + "print result data", true); +Param<bool> exe_trace_print_iregs(&exeTraceParams, "print_iregs", + "print all integer regs", false); +Param<bool> exe_trace_print_fetchseq(&exeTraceParams, "print_fetchseq", + "print fetch sequence number", false); +Param<bool> exe_trace_print_cp_seq(&exeTraceParams, "print_cpseq", + "print correct-path sequence number", false); +Param<bool> exe_trace_intel_format(&exeTraceParams, "intel_format", + "print trace in intel compatible format", false); +Param<string> exe_trace_system(&exeTraceParams, "trace_system", + "print trace of which system (client or server)", + "client"); + + +// +// Helper function for ExecutionTraceParamContext::checkParams() just +// to get us into the InstRecord namespace +// +void +Trace::InstRecord::setParams() +{ + flags[TRACE_MISSPEC] = exe_trace_spec; + + flags[PRINT_CYCLE] = exe_trace_print_cycle; + flags[PRINT_OP_CLASS] = exe_trace_print_opclass; + flags[PRINT_THREAD_NUM] = exe_trace_print_thread; + flags[PRINT_RESULT_DATA] = exe_trace_print_effaddr; + flags[PRINT_EFF_ADDR] = exe_trace_print_data; + flags[PRINT_INT_REGS] = exe_trace_print_iregs; + flags[PRINT_FETCH_SEQ] = exe_trace_print_fetchseq; + flags[PRINT_CP_SEQ] = exe_trace_print_cp_seq; + flags[INTEL_FORMAT] = exe_trace_intel_format; + trace_system = exe_trace_system; +} + +void +ExecutionTraceParamContext::checkParams() +{ + Trace::InstRecord::setParams(); +} + diff --git a/src/cpu/exetrace.hh b/src/cpu/exetrace.hh new file mode 100644 index 000000000..a26cdc517 --- /dev/null +++ b/src/cpu/exetrace.hh @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2001-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EXETRACE_HH__ +#define __EXETRACE_HH__ + +#include <fstream> +#include <vector> + +#include "sim/host.hh" +#include "cpu/inst_seq.hh" // for InstSeqNum +#include "base/trace.hh" +#include "cpu/exec_context.hh" +#include "cpu/static_inst.hh" + +class BaseCPU; + + +namespace Trace { + +class InstRecord : public Record +{ + protected: + typedef TheISA::IntRegFile IntRegFile; + + // The following fields are initialized by the constructor and + // thus guaranteed to be valid. + BaseCPU *cpu; + // need to make this ref-counted so it doesn't go away before we + // dump the record + StaticInstPtr staticInst; + Addr PC; + bool misspeculating; + unsigned thread; + + // The remaining fields are only valid for particular instruction + // types (e.g, addresses for memory ops) or when particular + // options are enabled (e.g., tracing full register contents). + // Each data field has an associated valid flag to indicate + // whether the data field is valid. + Addr addr; + bool addr_valid; + + union { + uint64_t as_int; + double as_double; + } data; + enum { + DataInvalid = 0, + DataInt8 = 1, // set to equal number of bytes + DataInt16 = 2, + DataInt32 = 4, + DataInt64 = 8, + DataDouble = 3 + } data_status; + + InstSeqNum fetch_seq; + bool fetch_seq_valid; + + InstSeqNum cp_seq; + bool cp_seq_valid; + + struct iRegFile { + IntRegFile regs; + }; + iRegFile *iregs; + bool regs_valid; + + public: + InstRecord(Tick _cycle, BaseCPU *_cpu, + const StaticInstPtr &_staticInst, + Addr _pc, bool spec, int _thread) + : Record(_cycle), cpu(_cpu), staticInst(_staticInst), PC(_pc), + misspeculating(spec), thread(_thread) + { + data_status = DataInvalid; + addr_valid = false; + regs_valid = false; + + fetch_seq_valid = false; + cp_seq_valid = false; + } + + virtual ~InstRecord() { } + + virtual void dump(std::ostream &outs); + + void setAddr(Addr a) { addr = a; addr_valid = true; } + + void setData(uint64_t d) { data.as_int = d; data_status = DataInt64; } + void setData(uint32_t d) { data.as_int = d; data_status = DataInt32; } + void setData(uint16_t d) { data.as_int = d; data_status = DataInt16; } + void setData(uint8_t d) { data.as_int = d; data_status = DataInt8; } + + void setData(int64_t d) { setData((uint64_t)d); } + void setData(int32_t d) { setData((uint32_t)d); } + void setData(int16_t d) { setData((uint16_t)d); } + void setData(int8_t d) { setData((uint8_t)d); } + + void setData(double d) { data.as_double = d; data_status = DataDouble; } + + void setFetchSeq(InstSeqNum seq) + { fetch_seq = seq; fetch_seq_valid = true; } + + void setCPSeq(InstSeqNum seq) + { cp_seq = seq; cp_seq_valid = true; } + + void setRegs(const IntRegFile ®s); + + void finalize() { theLog.append(this); } + + enum InstExecFlagBits { + TRACE_MISSPEC = 0, + PRINT_CYCLE, + PRINT_OP_CLASS, + PRINT_THREAD_NUM, + PRINT_RESULT_DATA, + PRINT_EFF_ADDR, + PRINT_INT_REGS, + PRINT_FETCH_SEQ, + PRINT_CP_SEQ, + INTEL_FORMAT, + NUM_BITS + }; + + static std::vector<bool> flags; + static std::string trace_system; + + static void setParams(); + + static bool traceMisspec() { return flags[TRACE_MISSPEC]; } +}; + + +inline void +InstRecord::setRegs(const IntRegFile ®s) +{ + if (!iregs) + iregs = new iRegFile; + + memcpy(&iregs->regs, ®s, sizeof(IntRegFile)); + regs_valid = true; +} + +inline +InstRecord * +getInstRecord(Tick cycle, ExecContext *xc, BaseCPU *cpu, + const StaticInstPtr staticInst, + Addr pc, int thread = 0) +{ + if (DTRACE(InstExec) && + (InstRecord::traceMisspec() || !xc->misspeculating())) { + return new InstRecord(cycle, cpu, staticInst, pc, + xc->misspeculating(), thread); + } + + return NULL; +} + + +} + +#endif // __EXETRACE_HH__ diff --git a/src/cpu/inst_seq.hh b/src/cpu/inst_seq.hh new file mode 100644 index 000000000..8de047af7 --- /dev/null +++ b/src/cpu/inst_seq.hh @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2001, 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __STD_TYPES_HH__ +#define __STD_TYPES_HH__ + +// inst sequence type, used to order instructions in the ready list, +// if this rolls over the ready list order temporarily will get messed +// up, but execution will continue and complete correctly +typedef uint64_t InstSeqNum; + +// inst tag type, used to tag an operation instance in the IQ +typedef unsigned int InstTag; + +#endif // __STD_TYPES_HH__ diff --git a/src/cpu/intr_control.cc b/src/cpu/intr_control.cc new file mode 100644 index 000000000..43e7f654c --- /dev/null +++ b/src/cpu/intr_control.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string> +#include <vector> + +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "cpu/intr_control.hh" +#include "sim/builder.hh" +#include "sim/sim_object.hh" + +using namespace std; + +IntrControl::IntrControl(const string &name, BaseCPU *c) + : SimObject(name), cpu(c) +{} + +/* @todo + *Fix the cpu sim object parameter to be a system pointer + *instead, to avoid some extra dereferencing + */ +void +IntrControl::post(int int_num, int index) +{ + std::vector<ExecContext *> &xcvec = cpu->system->execContexts; + BaseCPU *temp = xcvec[0]->getCpuPtr(); + temp->post_interrupt(int_num, index); +} + +void +IntrControl::post(int cpu_id, int int_num, int index) +{ + std::vector<ExecContext *> &xcvec = cpu->system->execContexts; + BaseCPU *temp = xcvec[cpu_id]->getCpuPtr(); + temp->post_interrupt(int_num, index); +} + +void +IntrControl::clear(int int_num, int index) +{ + std::vector<ExecContext *> &xcvec = cpu->system->execContexts; + BaseCPU *temp = xcvec[0]->getCpuPtr(); + temp->clear_interrupt(int_num, index); +} + +void +IntrControl::clear(int cpu_id, int int_num, int index) +{ + std::vector<ExecContext *> &xcvec = cpu->system->execContexts; + BaseCPU *temp = xcvec[cpu_id]->getCpuPtr(); + temp->clear_interrupt(int_num, index); +} + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(IntrControl) + + SimObjectParam<BaseCPU *> cpu; + +END_DECLARE_SIM_OBJECT_PARAMS(IntrControl) + +BEGIN_INIT_SIM_OBJECT_PARAMS(IntrControl) + + INIT_PARAM(cpu, "the cpu") + +END_INIT_SIM_OBJECT_PARAMS(IntrControl) + +CREATE_SIM_OBJECT(IntrControl) +{ + return new IntrControl(getInstanceName(), cpu); +} + +REGISTER_SIM_OBJECT("IntrControl", IntrControl) diff --git a/src/cpu/intr_control.hh b/src/cpu/intr_control.hh new file mode 100644 index 000000000..5ec4e14cb --- /dev/null +++ b/src/cpu/intr_control.hh @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2001-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INTR_CONTROL_HH__ +#define __INTR_CONTROL_HH__ + +#include <vector> +#include "base/misc.hh" +#include "cpu/base.hh" +#include "sim/sim_object.hh" +#include "sim/system.hh" + + +class IntrControl : public SimObject +{ + public: + BaseCPU *cpu; + IntrControl(const std::string &name, BaseCPU *c); + + void clear(int int_num, int index = 0); + void post(int int_num, int index = 0); + void clear(int cpu_id, int int_num, int index); + void post(int cpu_id, int int_num, int index); +}; + +#endif // __INTR_CONTROL_HH__ + + + + + + + diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc new file mode 100644 index 000000000..54def1012 --- /dev/null +++ b/src/cpu/memtest/memtest.cc @@ -0,0 +1,440 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// FIX ME: make trackBlkAddr use blocksize from actual cache, not hard coded + +#include <iomanip> +#include <set> +#include <string> +#include <vector> + +#include "base/misc.hh" +#include "base/statistics.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/memtest/memtest.hh" +#include "mem/cache/base_cache.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +using namespace std; +using namespace TheISA; + +int TESTER_ALLOCATOR=0; + +MemTest::MemTest(const string &name, + MemInterface *_cache_interface, + FunctionalMemory *main_mem, + FunctionalMemory *check_mem, + unsigned _memorySize, + unsigned _percentReads, + unsigned _percentCopies, + unsigned _percentUncacheable, + unsigned _progressInterval, + unsigned _percentSourceUnaligned, + unsigned _percentDestUnaligned, + Addr _traceAddr, + Counter _max_loads) + : SimObject(name), + tickEvent(this), + cacheInterface(_cache_interface), + mainMem(main_mem), + checkMem(check_mem), + size(_memorySize), + percentReads(_percentReads), + percentCopies(_percentCopies), + percentUncacheable(_percentUncacheable), + progressInterval(_progressInterval), + nextProgressMessage(_progressInterval), + percentSourceUnaligned(_percentSourceUnaligned), + percentDestUnaligned(percentDestUnaligned), + maxLoads(_max_loads) +{ + vector<string> cmd; + cmd.push_back("/bin/ls"); + vector<string> null_vec; + cpuXC = new CPUExecContext(NULL, 0, mainMem, 0); + + blockSize = cacheInterface->getBlockSize(); + blockAddrMask = blockSize - 1; + traceBlockAddr = blockAddr(_traceAddr); + + //setup data storage with interesting values + uint8_t *data1 = new uint8_t[size]; + uint8_t *data2 = new uint8_t[size]; + uint8_t *data3 = new uint8_t[size]; + memset(data1, 1, size); + memset(data2, 2, size); + memset(data3, 3, size); + curTick = 0; + + baseAddr1 = 0x100000; + baseAddr2 = 0x400000; + uncacheAddr = 0x800000; + + // set up intial memory contents here + mainMem->prot_write(baseAddr1, data1, size); + checkMem->prot_write(baseAddr1, data1, size); + mainMem->prot_write(baseAddr2, data2, size); + checkMem->prot_write(baseAddr2, data2, size); + mainMem->prot_write(uncacheAddr, data3, size); + checkMem->prot_write(uncacheAddr, data3, size); + + delete [] data1; + delete [] data2; + delete [] data3; + + // set up counters + noResponseCycles = 0; + numReads = 0; + tickEvent.schedule(0); + + id = TESTER_ALLOCATOR++; +} + +static void +printData(ostream &os, uint8_t *data, int nbytes) +{ + os << hex << setfill('0'); + // assume little-endian: print bytes from highest address to lowest + for (uint8_t *dp = data + nbytes - 1; dp >= data; --dp) { + os << setw(2) << (unsigned)*dp; + } + os << dec; +} + +void +MemTest::completeRequest(MemReqPtr &req, uint8_t *data) +{ + //Remove the address from the list of outstanding + std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->paddr); + assert(removeAddr != outstandingAddrs.end()); + outstandingAddrs.erase(removeAddr); + + switch (req->cmd) { + case Read: + if (memcmp(req->data, data, req->size) != 0) { + cerr << name() << ": on read of 0x" << hex << req->paddr + << " (0x" << hex << blockAddr(req->paddr) << ")" + << "@ cycle " << dec << curTick + << ", cache returns 0x"; + printData(cerr, req->data, req->size); + cerr << ", expected 0x"; + printData(cerr, data, req->size); + cerr << endl; + fatal(""); + } + + numReads++; + numReadsStat++; + + if (numReads == nextProgressMessage) { + ccprintf(cerr, "%s: completed %d read accesses @%d\n", + name(), numReads, curTick); + nextProgressMessage += progressInterval; + } + + if (numReads >= maxLoads) + SimExit(curTick, "Maximum number of loads reached!"); + break; + + case Write: + numWritesStat++; + break; + + case Copy: + //Also remove dest from outstanding list + removeAddr = outstandingAddrs.find(req->dest); + assert(removeAddr != outstandingAddrs.end()); + outstandingAddrs.erase(removeAddr); + numCopiesStat++; + break; + + default: + panic("invalid command"); + } + + if (blockAddr(req->paddr) == traceBlockAddr) { + cerr << name() << ": completed " + << (req->cmd.isWrite() ? "write" : "read") + << " access of " + << dec << req->size << " bytes at address 0x" + << hex << req->paddr + << " (0x" << hex << blockAddr(req->paddr) << ")" + << ", value = 0x"; + printData(cerr, req->data, req->size); + cerr << " @ cycle " << dec << curTick; + + cerr << endl; + } + + noResponseCycles = 0; + delete [] data; +} + + +void +MemTest::regStats() +{ + using namespace Stats; + + + numReadsStat + .name(name() + ".num_reads") + .desc("number of read accesses completed") + ; + + numWritesStat + .name(name() + ".num_writes") + .desc("number of write accesses completed") + ; + + numCopiesStat + .name(name() + ".num_copies") + .desc("number of copy accesses completed") + ; +} + +void +MemTest::tick() +{ + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + cycles(1)); + + if (++noResponseCycles >= 500000) { + cerr << name() << ": deadlocked at cycle " << curTick << endl; + fatal(""); + } + + if (cacheInterface->isBlocked()) { + return; + } + + //make new request + unsigned cmd = random() % 100; + unsigned offset = random() % size; + unsigned base = random() % 2; + uint64_t data = random(); + unsigned access_size = random() % 4; + unsigned cacheable = random() % 100; + + //If we aren't doing copies, use id as offset, and do a false sharing + //mem tester + if (percentCopies == 0) { + //We can eliminate the lower bits of the offset, and then use the id + //to offset within the blks + offset &= ~63; //Not the low order bits + offset += id; + access_size = 0; + } + + MemReqPtr req = new MemReq(); + + if (cacheable < percentUncacheable) { + req->flags |= UNCACHEABLE; + req->paddr = uncacheAddr + offset; + } else { + req->paddr = ((base) ? baseAddr1 : baseAddr2) + offset; + } + // bool probe = (random() % 2 == 1) && !req->isUncacheable(); + bool probe = false; + + req->size = 1 << access_size; + req->data = new uint8_t[req->size]; + req->paddr &= ~(req->size - 1); + req->time = curTick; + req->xc = cpuXC->getProxy(); + + if (cmd < percentReads) { + // read + + //For now we only allow one outstanding request per addreess per tester + //This means we assume CPU does write forwarding to reads that alias something + //in the cpu store buffer. + if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(req->paddr); + + req->cmd = Read; + uint8_t *result = new uint8_t[8]; + checkMem->access(Read, req->paddr, result, req->size); + if (blockAddr(req->paddr) == traceBlockAddr) { + cerr << name() + << ": initiating read " + << ((probe) ? "probe of " : "access of ") + << dec << req->size << " bytes from addr 0x" + << hex << req->paddr + << " (0x" << hex << blockAddr(req->paddr) << ")" + << " at cycle " + << dec << curTick << endl; + } + if (probe) { + cacheInterface->probeAndUpdate(req); + completeRequest(req, result); + } else { + req->completionEvent = new MemCompleteEvent(req, result, this); + cacheInterface->access(req); + } + } else if (cmd < (100 - percentCopies)){ + // write + + //For now we only allow one outstanding request per addreess per tester + //This means we assume CPU does write forwarding to reads that alias something + //in the cpu store buffer. + if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(req->paddr); + + req->cmd = Write; + memcpy(req->data, &data, req->size); + checkMem->access(Write, req->paddr, req->data, req->size); + if (blockAddr(req->paddr) == traceBlockAddr) { + cerr << name() << ": initiating write " + << ((probe)?"probe of ":"access of ") + << dec << req->size << " bytes (value = 0x"; + printData(cerr, req->data, req->size); + cerr << ") to addr 0x" + << hex << req->paddr + << " (0x" << hex << blockAddr(req->paddr) << ")" + << " at cycle " + << dec << curTick << endl; + } + if (probe) { + cacheInterface->probeAndUpdate(req); + completeRequest(req, NULL); + } else { + req->completionEvent = new MemCompleteEvent(req, NULL, this); + cacheInterface->access(req); + } + } else { + // copy + unsigned source_align = random() % 100; + unsigned dest_align = random() % 100; + unsigned offset2 = random() % size; + + Addr source = ((base) ? baseAddr1 : baseAddr2) + offset; + Addr dest = ((base) ? baseAddr2 : baseAddr1) + offset2; + if (outstandingAddrs.find(source) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(source); + if (outstandingAddrs.find(dest) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(dest); + + if (source_align >= percentSourceUnaligned) { + source = blockAddr(source); + } + if (dest_align >= percentDestUnaligned) { + dest = blockAddr(dest); + } + req->cmd = Copy; + req->flags &= ~UNCACHEABLE; + req->paddr = source; + req->dest = dest; + delete [] req->data; + req->data = new uint8_t[blockSize]; + req->size = blockSize; + if (source == traceBlockAddr || dest == traceBlockAddr) { + cerr << name() + << ": initiating copy of " + << dec << req->size << " bytes from addr 0x" + << hex << source + << " (0x" << hex << blockAddr(source) << ")" + << " to addr 0x" + << hex << dest + << " (0x" << hex << blockAddr(dest) << ")" + << " at cycle " + << dec << curTick << endl; + } + cacheInterface->access(req); + uint8_t result[blockSize]; + checkMem->access(Read, source, &result, blockSize); + checkMem->access(Write, dest, &result, blockSize); + } +} + + +void +MemCompleteEvent::process() +{ + tester->completeRequest(req, data); + delete this; +} + + +const char * +MemCompleteEvent::description() +{ + return "memory access completion"; +} + + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(MemTest) + + SimObjectParam<BaseCache *> cache; + SimObjectParam<FunctionalMemory *> main_mem; + SimObjectParam<FunctionalMemory *> check_mem; + Param<unsigned> memory_size; + Param<unsigned> percent_reads; + Param<unsigned> percent_copies; + Param<unsigned> percent_uncacheable; + Param<unsigned> progress_interval; + Param<unsigned> percent_source_unaligned; + Param<unsigned> percent_dest_unaligned; + Param<Addr> trace_addr; + Param<Counter> max_loads; + +END_DECLARE_SIM_OBJECT_PARAMS(MemTest) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest) + + INIT_PARAM(cache, "L1 cache"), + INIT_PARAM(main_mem, "hierarchical memory"), + INIT_PARAM(check_mem, "check memory"), + INIT_PARAM(memory_size, "memory size"), + INIT_PARAM(percent_reads, "target read percentage"), + INIT_PARAM(percent_copies, "target copy percentage"), + INIT_PARAM(percent_uncacheable, "target uncacheable percentage"), + INIT_PARAM(progress_interval, "progress report interval (in accesses)"), + INIT_PARAM(percent_source_unaligned, + "percent of copy source address that are unaligned"), + INIT_PARAM(percent_dest_unaligned, + "percent of copy dest address that are unaligned"), + INIT_PARAM(trace_addr, "address to trace"), + INIT_PARAM(max_loads, "terminate when we have reached this load count") + +END_INIT_SIM_OBJECT_PARAMS(MemTest) + + +CREATE_SIM_OBJECT(MemTest) +{ + return new MemTest(getInstanceName(), cache->getInterface(), main_mem, + check_mem, memory_size, percent_reads, percent_copies, + percent_uncacheable, progress_interval, + percent_source_unaligned, percent_dest_unaligned, + trace_addr, max_loads); +} + +REGISTER_SIM_OBJECT("MemTest", MemTest) diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh new file mode 100644 index 000000000..cdb40a26a --- /dev/null +++ b/src/cpu/memtest/memtest.hh @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_MEMTEST_MEMTEST_HH__ +#define __CPU_MEMTEST_MEMTEST_HH__ + +#include <set> + +#include "base/statistics.hh" +#include "mem/functional/functional.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" +#include "sim/sim_exit.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +class ExecContext; +class MemTest : public SimObject +{ + public: + + MemTest(const std::string &name, + MemInterface *_cache_interface, + FunctionalMemory *main_mem, + FunctionalMemory *check_mem, + unsigned _memorySize, + unsigned _percentReads, + unsigned _percentCopies, + unsigned _percentUncacheable, + unsigned _progressInterval, + unsigned _percentSourceUnaligned, + unsigned _percentDestUnaligned, + Addr _traceAddr, + Counter _max_loads); + + // register statistics + virtual void regStats(); + + inline Tick cycles(int numCycles) const { return numCycles; } + + // main simulation loop (one cycle) + void tick(); + + protected: + class TickEvent : public Event + { + private: + MemTest *cpu; + public: + TickEvent(MemTest *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) {} + void process() {cpu->tick();} + virtual const char *description() { return "tick event"; } + }; + + TickEvent tickEvent; + + MemInterface *cacheInterface; + FunctionalMemory *mainMem; + FunctionalMemory *checkMem; + CPUExecContext *cpuXC; + + unsigned size; // size of testing memory region + + unsigned percentReads; // target percentage of read accesses + unsigned percentCopies; // target percentage of copy accesses + unsigned percentUncacheable; + + int id; + + std::set<unsigned> outstandingAddrs; + + unsigned blockSize; + + Addr blockAddrMask; + + Addr blockAddr(Addr addr) + { + return (addr & ~blockAddrMask); + } + + Addr traceBlockAddr; + + Addr baseAddr1; // fix this to option + Addr baseAddr2; // fix this to option + Addr uncacheAddr; + + unsigned progressInterval; // frequency of progress reports + Tick nextProgressMessage; // access # for next progress report + + unsigned percentSourceUnaligned; + unsigned percentDestUnaligned; + + Tick noResponseCycles; + + uint64_t numReads; + uint64_t maxLoads; + Stats::Scalar<> numReadsStat; + Stats::Scalar<> numWritesStat; + Stats::Scalar<> numCopiesStat; + + // called by MemCompleteEvent::process() + void completeRequest(MemReqPtr &req, uint8_t *data); + + friend class MemCompleteEvent; +}; + + +class MemCompleteEvent : public Event +{ + MemReqPtr req; + uint8_t *data; + MemTest *tester; + + public: + + MemCompleteEvent(MemReqPtr &_req, uint8_t *_data, MemTest *_tester) + : Event(&mainEventQueue), + req(_req), data(_data), tester(_tester) + { + } + + void process(); + + virtual const char *description(); +}; + +#endif // __CPU_MEMTEST_MEMTEST_HH__ + + + diff --git a/src/cpu/o3/2bit_local_pred.cc b/src/cpu/o3/2bit_local_pred.cc new file mode 100644 index 000000000..d9744eec7 --- /dev/null +++ b/src/cpu/o3/2bit_local_pred.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/trace.hh" +#include "cpu/o3/2bit_local_pred.hh" + +DefaultBP::DefaultBP(unsigned _localPredictorSize, + unsigned _localCtrBits, + unsigned _instShiftAmt) + : localPredictorSize(_localPredictorSize), + localCtrBits(_localCtrBits), + instShiftAmt(_instShiftAmt) +{ + // Should do checks here to make sure sizes are correct (powers of 2). + + // Setup the index mask. + indexMask = localPredictorSize - 1; + + DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask); + + // Setup the array of counters for the local predictor. + localCtrs = new SatCounter[localPredictorSize]; + + for (int i = 0; i < localPredictorSize; ++i) + localCtrs[i].setBits(_localCtrBits); + + DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n", + localPredictorSize); + + DPRINTF(Fetch, "Branch predictor: local counter bits: %i\n", localCtrBits); + + DPRINTF(Fetch, "Branch predictor: instruction shift amount: %i\n", + instShiftAmt); +} + +bool +DefaultBP::lookup(Addr &branch_addr) +{ + bool taken; + uint8_t local_prediction; + unsigned local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + assert(local_predictor_idx < localPredictorSize); + + local_prediction = localCtrs[local_predictor_idx].read(); + + DPRINTF(Fetch, "Branch predictor: prediction is %i.\n", + (int)local_prediction); + + taken = getPrediction(local_prediction); + +#if 0 + // Speculative update. + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +#endif + + return taken; +} + +void +DefaultBP::update(Addr &branch_addr, bool taken) +{ + unsigned local_predictor_idx; + + // Update the local predictor. + local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + assert(local_predictor_idx < localPredictorSize); + + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +} + +inline +bool +DefaultBP::getPrediction(uint8_t &count) +{ + // Get the MSB of the count + return (count >> (localCtrBits - 1)); +} + +inline +unsigned +DefaultBP::getLocalIndex(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & indexMask; +} diff --git a/src/cpu/o3/2bit_local_pred.hh b/src/cpu/o3/2bit_local_pred.hh new file mode 100644 index 000000000..97433e542 --- /dev/null +++ b/src/cpu/o3/2bit_local_pred.hh @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__ +#define __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" +#include "cpu/o3/sat_counter.hh" + +class DefaultBP +{ + public: + /** + * Default branch predictor constructor. + */ + DefaultBP(unsigned localPredictorSize, unsigned localCtrBits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, bool taken); + + private: + + /** Returns the taken/not taken prediction given the value of the + * counter. + */ + inline bool getPrediction(uint8_t &count); + + /** Calculates the local index based on the PC. */ + inline unsigned getLocalIndex(Addr &PC); + + /** Array of counters that make up the local predictor. */ + SatCounter *localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + + /** Number of bits of the local predictor's counters. */ + unsigned localCtrBits; + + /** Number of bits to shift the PC when calculating index. */ + unsigned instShiftAmt; + + /** Mask to get index bits. */ + unsigned indexMask; +}; + +#endif // __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__ diff --git a/src/cpu/o3/alpha_cpu.cc b/src/cpu/o3/alpha_cpu.cc new file mode 100644 index 000000000..7bc90dae6 --- /dev/null +++ b/src/cpu/o3/alpha_cpu.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha_cpu_impl.hh" +#include "cpu/o3/alpha_dyn_inst.hh" + +// Force instantiation of AlphaFullCPU for all the implemntations that are +// needed. Consider merging this and alpha_dyn_inst.cc, and maybe all +// classes that depend on a certain impl, into one file (alpha_impl.cc?). +template class AlphaFullCPU<AlphaSimpleImpl>; diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh new file mode 100644 index 000000000..8e1e0f42a --- /dev/null +++ b/src/cpu/o3/alpha_cpu.hh @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: Find all the stuff in ExecContext and ev5 that needs to be +// specifically designed for this CPU. + +#ifndef __CPU_O3_CPU_ALPHA_FULL_CPU_HH__ +#define __CPU_O3_CPU_ALPHA_FULL_CPU_HH__ + +#include "cpu/o3/cpu.hh" +#include "arch/isa_traits.hh" +#include "sim/byteswap.hh" + +template <class Impl> +class AlphaFullCPU : public FullO3CPU<Impl> +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: + typedef typename Impl::Params Params; + + public: + AlphaFullCPU(Params ¶ms); + +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#endif + + public: + void regStats(); + +#if FULL_SYSTEM + //Note that the interrupt stuff from the base CPU might be somewhat + //ISA specific (ie NumInterruptLevels). These functions might not + //be needed in FullCPU though. +// void post_interrupt(int int_num, int index); +// void clear_interrupt(int int_num, int index); +// void clear_interrupts(); + + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return NoFault; + } + + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + +#endif + + // Later on may want to remove this misc stuff from the regfile and + // have it handled at this level. Might prove to be an issue when + // trying to rename source/destination registers... + MiscReg readMiscReg(int misc_reg) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return 0; + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return NoFault; + } + + // Most of the full system code and syscall emulation is not yet + // implemented. These functions do show what the final interface will + // look like. +#if FULL_SYSTEM + int readIntrFlag(); + void setIntrFlag(int val); + Fault hwrei(); + bool inPalMode() { return AlphaISA::PcPAL(this->regFile.readPC()); } + bool inPalMode(uint64_t PC) + { return AlphaISA::PcPAL(PC); } + + void trap(Fault fault); + bool simPalCheck(int palFunc); + + void processInterrupts(); +#endif + + +#if !FULL_SYSTEM + // Need to change these into regfile calls that directly set a certain + // register. Actually, these functions should handle most of this + // functionality by themselves; should look up the rename and then + // set the register. + IntReg getSyscallArg(int i) + { + return this->cpuXC->readIntReg(AlphaISA::ArgumentReg0 + i); + } + + // used to shift args for indirect syscall + void setSyscallArg(int i, IntReg val) + { + this->cpuXC->setIntReg(AlphaISA::ArgumentReg0 + i, val); + } + + void setSyscallReturn(int64_t return_value) + { + // check for error condition. Alpha syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + const int RegA3 = 19; // only place this is used + if (return_value >= 0) { + // no error + this->cpuXC->setIntReg(RegA3, 0); + this->cpuXC->setIntReg(AlphaISA::ReturnValueReg, return_value); + } else { + // got an error, return details + this->cpuXC->setIntReg(RegA3, (IntReg) -1); + this->cpuXC->setIntReg(AlphaISA::ReturnValueReg, -return_value); + } + } + + void syscall(short thread_num); + void squashStages(); + +#endif + + void copyToXC(); + void copyFromXC(); + + public: +#if FULL_SYSTEM + bool palShadowEnabled; + + // Not sure this is used anywhere. + void intr_post(RegFile *regs, Fault fault, Addr pc); + // Actually used within exec files. Implement properly. + void swapPALShadow(bool use_shadow); + // Called by CPU constructor. Can implement as I please. + void initCPU(RegFile *regs); + // Called by initCPU. Implement as I please. + void initIPRs(RegFile *regs); + + void halt() { panic("Halt not implemented!\n"); } +#endif + + + template <class T> + Fault read(MemReqPtr &req, T &data) + { +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + if (req->flags & LOCKED) { + req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); + req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); + } +#endif + + Fault error; + error = this->mem->read(req, data); + data = gtoh(data); + return error; + } + + template <class T> + Fault read(MemReqPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + template <class T> + Fault write(MemReqPtr &req, T &data) + { +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + ExecContext *xc; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + xc = req->xc; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + xc->setStCondFailures(0);//Needed? [RGD] + } else { + bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); + req->result = lock_flag; + if (!lock_flag || + ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + xc->setStCondFailures(xc->readStCondFailures() + 1); + if (((xc->readStCondFailures()) % 100000) == 0) { + std::cerr << "Warning: " + << xc->readStCondFailures() + << " consecutive store conditional failures " + << "on cpu " << req->xc->readCpuId() + << std::endl; + } + return NoFault; + } + else xc->setStCondFailures(0); + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < this->system->execContexts.size(); i++){ + xc = this->system->execContexts[i]; + if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == + (req->paddr & ~0xf)) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + } + } + +#endif + + return this->mem->write(req, (T)htog(data)); + } + + template <class T> + Fault write(MemReqPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + +}; + +#endif // __CPU_O3_CPU_ALPHA_FULL_CPU_HH__ diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc new file mode 100644 index 000000000..6025b8ef2 --- /dev/null +++ b/src/cpu/o3/alpha_cpu_builder.cc @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/inifile.hh" +#include "base/loader/symtab.hh" +#include "base/misc.hh" +#include "cpu/base.hh" +#include "cpu/exetrace.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" +#include "mem/base_mem.hh" +#include "mem/cache/base_cache.hh" +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/debug.hh" +#include "sim/host.hh" +#include "sim/process.hh" +#include "sim/sim_events.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#if FULL_SYSTEM +#include "base/remote_gdb.hh" +#include "mem/functional/memory_control.hh" +#include "mem/functional/physical.hh" +#include "sim/system.hh" +#include "arch/tlb.hh" +#include "arch/vtophys.hh" +#else // !FULL_SYSTEM +#include "mem/functional/functional.hh" +#endif // FULL_SYSTEM + +class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl> +{ + public: + DerivAlphaFullCPU(AlphaSimpleParams p) + : AlphaFullCPU<AlphaSimpleImpl>(p) + { } +}; + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + + Param<int> clock; + Param<int> numThreads; + +#if FULL_SYSTEM +SimObjectParam<System *> system; +Param<int> cpu_id; +SimObjectParam<AlphaITB *> itb; +SimObjectParam<AlphaDTB *> dtb; +#else +SimObjectVectorParam<Process *> workload; +#endif // FULL_SYSTEM +SimObjectParam<FunctionalMemory *> mem; + +Param<Counter> max_insts_any_thread; +Param<Counter> max_insts_all_threads; +Param<Counter> max_loads_any_thread; +Param<Counter> max_loads_all_threads; + +SimObjectParam<BaseCache *> icache; +SimObjectParam<BaseCache *> dcache; + +Param<unsigned> decodeToFetchDelay; +Param<unsigned> renameToFetchDelay; +Param<unsigned> iewToFetchDelay; +Param<unsigned> commitToFetchDelay; +Param<unsigned> fetchWidth; + +Param<unsigned> renameToDecodeDelay; +Param<unsigned> iewToDecodeDelay; +Param<unsigned> commitToDecodeDelay; +Param<unsigned> fetchToDecodeDelay; +Param<unsigned> decodeWidth; + +Param<unsigned> iewToRenameDelay; +Param<unsigned> commitToRenameDelay; +Param<unsigned> decodeToRenameDelay; +Param<unsigned> renameWidth; + +Param<unsigned> commitToIEWDelay; +Param<unsigned> renameToIEWDelay; +Param<unsigned> issueToExecuteDelay; +Param<unsigned> issueWidth; +Param<unsigned> executeWidth; +Param<unsigned> executeIntWidth; +Param<unsigned> executeFloatWidth; +Param<unsigned> executeBranchWidth; +Param<unsigned> executeMemoryWidth; + +Param<unsigned> iewToCommitDelay; +Param<unsigned> renameToROBDelay; +Param<unsigned> commitWidth; +Param<unsigned> squashWidth; + +#if 0 +Param<unsigned> localPredictorSize; +Param<unsigned> localPredictorCtrBits; +#endif +Param<unsigned> local_predictor_size; +Param<unsigned> local_ctr_bits; +Param<unsigned> local_history_table_size; +Param<unsigned> local_history_bits; +Param<unsigned> global_predictor_size; +Param<unsigned> global_ctr_bits; +Param<unsigned> global_history_bits; +Param<unsigned> choice_predictor_size; +Param<unsigned> choice_ctr_bits; + +Param<unsigned> BTBEntries; +Param<unsigned> BTBTagSize; + +Param<unsigned> RASSize; + +Param<unsigned> LQEntries; +Param<unsigned> SQEntries; +Param<unsigned> LFSTSize; +Param<unsigned> SSITSize; + +Param<unsigned> numPhysIntRegs; +Param<unsigned> numPhysFloatRegs; +Param<unsigned> numIQEntries; +Param<unsigned> numROBEntries; + +Param<unsigned> instShiftAmt; + +Param<bool> defer_registration; + +Param<bool> function_trace; +Param<Tick> function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + +#if 0 + INIT_PARAM(localPredictorSize, "Size of the local predictor in entries. " + "Must be a power of 2."), + INIT_PARAM(localPredictorCtrBits, "Number of bits per counter for bpred"), +#endif + INIT_PARAM(local_predictor_size, "Size of local predictor"), + INIT_PARAM(local_ctr_bits, "Bits per counter"), + INIT_PARAM(local_history_table_size, "Size of local history table"), + INIT_PARAM(local_history_bits, "Bits for the local history"), + INIT_PARAM(global_predictor_size, "Size of global predictor"), + INIT_PARAM(global_ctr_bits, "Bits per counter"), + INIT_PARAM(global_history_bits, "Bits of history"), + INIT_PARAM(choice_predictor_size, "Size of choice predictor"), + INIT_PARAM(choice_ctr_bits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + +CREATE_SIM_OBJECT(DerivAlphaFullCPU) +{ + DerivAlphaFullCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + AlphaSimpleParams params; + + params.clock = clock; + + params.name = getInstanceName(); + params.numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params.system = system; + params.cpu_id = cpu_id; + params.itb = itb; + params.dtb = dtb; +#else + params.workload = workload; +#endif // FULL_SYSTEM + + params.mem = mem; + + params.max_insts_any_thread = max_insts_any_thread; + params.max_insts_all_threads = max_insts_all_threads; + params.max_loads_any_thread = max_loads_any_thread; + params.max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params.icacheInterface = icache ? icache->getInterface() : NULL; + params.dcacheInterface = dcache ? dcache->getInterface() : NULL; + + params.decodeToFetchDelay = decodeToFetchDelay; + params.renameToFetchDelay = renameToFetchDelay; + params.iewToFetchDelay = iewToFetchDelay; + params.commitToFetchDelay = commitToFetchDelay; + params.fetchWidth = fetchWidth; + + params.renameToDecodeDelay = renameToDecodeDelay; + params.iewToDecodeDelay = iewToDecodeDelay; + params.commitToDecodeDelay = commitToDecodeDelay; + params.fetchToDecodeDelay = fetchToDecodeDelay; + params.decodeWidth = decodeWidth; + + params.iewToRenameDelay = iewToRenameDelay; + params.commitToRenameDelay = commitToRenameDelay; + params.decodeToRenameDelay = decodeToRenameDelay; + params.renameWidth = renameWidth; + + params.commitToIEWDelay = commitToIEWDelay; + params.renameToIEWDelay = renameToIEWDelay; + params.issueToExecuteDelay = issueToExecuteDelay; + params.issueWidth = issueWidth; + params.executeWidth = executeWidth; + params.executeIntWidth = executeIntWidth; + params.executeFloatWidth = executeFloatWidth; + params.executeBranchWidth = executeBranchWidth; + params.executeMemoryWidth = executeMemoryWidth; + + params.iewToCommitDelay = iewToCommitDelay; + params.renameToROBDelay = renameToROBDelay; + params.commitWidth = commitWidth; + params.squashWidth = squashWidth; +#if 0 + params.localPredictorSize = localPredictorSize; + params.localPredictorCtrBits = localPredictorCtrBits; +#endif + params.local_predictor_size = local_predictor_size; + params.local_ctr_bits = local_ctr_bits; + params.local_history_table_size = local_history_table_size; + params.local_history_bits = local_history_bits; + params.global_predictor_size = global_predictor_size; + params.global_ctr_bits = global_ctr_bits; + params.global_history_bits = global_history_bits; + params.choice_predictor_size = choice_predictor_size; + params.choice_ctr_bits = choice_ctr_bits; + + params.BTBEntries = BTBEntries; + params.BTBTagSize = BTBTagSize; + + params.RASSize = RASSize; + + params.LQEntries = LQEntries; + params.SQEntries = SQEntries; + params.SSITSize = SSITSize; + params.LFSTSize = LFSTSize; + + params.numPhysIntRegs = numPhysIntRegs; + params.numPhysFloatRegs = numPhysFloatRegs; + params.numIQEntries = numIQEntries; + params.numROBEntries = numROBEntries; + + params.instShiftAmt = 2; + + params.defReg = defer_registration; + + params.functionTrace = function_trace; + params.functionTraceStart = function_trace_start; + + cpu = new DerivAlphaFullCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU) + diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh new file mode 100644 index 000000000..7c4c2b969 --- /dev/null +++ b/src/cpu/o3/alpha_cpu_impl.hh @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/alpha/faults.hh" +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "mem/cache/cache.hh" // for dynamic cast +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/comm.hh" + +#if FULL_SYSTEM +#include "arch/alpha/osfpal.hh" +#include "arch/alpha/isa_traits.hh" +#endif + +template <class Impl> +AlphaFullCPU<Impl>::AlphaFullCPU(Params ¶ms) + : FullO3CPU<Impl>(params) +{ + DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); + + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); + + this->rob.setCPU(this); +} + +template <class Impl> +void +AlphaFullCPU<Impl>::regStats() +{ + // Register stats for everything that has stats. + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); +} + +#if !FULL_SYSTEM + +// Will probably need to know which thread is calling syscall +// Will need to pass that information in to the DynInst when it is constructed, +// so that this call can be made with the proper thread number. +template <class Impl> +void +AlphaFullCPU<Impl>::syscall(short thread_num) +{ + DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); + + // Commit stage needs to run as well. + this->commit.tick(); + + squashStages(); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->funcExeInst); + + // Copy over all important state to xc once all the unrolling is done. + copyToXC(); + + // This is hardcoded to thread 0 while the CPU is only single threaded. + this->thread[0]->syscall(); + + // Copy over all important state back to CPU. + copyFromXC(); + + // Decrease funcExeInst by one as the normal commit will handle + // incrememnting it. + --(this->funcExeInst); +} + +// This is not a pretty function, and should only be used if it is necessary +// to fake having everything squash all at once (ie for non-full system +// syscalls). Maybe put this at the FullCPU level? +template <class Impl> +void +AlphaFullCPU<Impl>::squashStages() +{ + InstSeqNum rob_head = this->rob.readHeadSeqNum(); + + // Now hack the time buffer to put this sequence number in the places + // where the stages might read it. + for (int i = 0; i < 5; ++i) + { + this->timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; + } + + this->fetch.squash(this->rob.readHeadNextPC()); + this->fetchQueue.advance(); + + this->decode.squash(); + this->decodeQueue.advance(); + + this->rename.squash(); + this->renameQueue.advance(); + this->renameQueue.advance(); + + // Be sure to advance the IEW queues so that the commit stage doesn't + // try to set an instruction as completed at the same time that it + // might be deleting it. + this->iew.squash(); + this->iewQueue.advance(); + this->iewQueue.advance(); + // Needs to tell the LSQ to write back all of its data + this->iew.lsqWriteback(); + + this->rob.squash(rob_head); + this->commit.setSquashing(); + + // Now hack the time buffer to clear the sequence numbers in the places + // where the stages might read it.? + for (int i = 0; i < 5; ++i) + { + this->timeBuffer.access(-i)->commitInfo.doneSeqNum = 0; + } + +} + +#endif // FULL_SYSTEM + +template <class Impl> +void +AlphaFullCPU<Impl>::copyToXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = this->renameMap.lookup(i); + this->cpuXC->setIntReg(i, this->regFile.readIntReg(renamed_reg)); + DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", + renamed_reg, this->regFile.intRegFile[renamed_reg]); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + this->cpuXC->setFloatRegBits(i, + this->regFile.readFloatRegBits(renamed_reg)); + } + + this->cpuXC->setMiscReg(AlphaISA::Fpcr_DepTag, + this->regFile.readMiscReg(AlphaISA::Fpcr_DepTag)); + this->cpuXC->setMiscReg(AlphaISA::Uniq_DepTag, + this->regFile.readMiscReg(AlphaISA::Uniq_DepTag)); + this->cpuXC->setMiscReg(AlphaISA::Lock_Flag_DepTag, + this->regFile.readMiscReg(AlphaISA::Lock_Flag_DepTag)); + this->cpuXC->setMiscReg(AlphaISA::Lock_Addr_DepTag, + this->regFile.readMiscReg(AlphaISA::Lock_Addr_DepTag)); + + this->cpuXC->setPC(this->rob.readHeadPC()); + this->cpuXC->setNextPC(this->cpuXC->readPC()+4); + +#if !FULL_SYSTEM + this->cpuXC->setFuncExeInst(this->funcExeInst); +#endif +} + +// This function will probably mess things up unless the ROB is empty and +// there are no instructions in the pipeline. +template <class Impl> +void +AlphaFullCPU<Impl>::copyFromXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = this->renameMap.lookup(i); + + DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, this->regFile.intRegFile[renamed_reg], + this->cpuXC->readIntReg(i)); + + this->regFile.setIntReg(renamed_reg, this->cpuXC->readIntReg(i)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + this->regFile.setFloatRegBits(renamed_reg, + this->cpuXC->readFloatRegBits(i)); + } + + // Then loop through the misc registers. + this->regFile.setMiscReg(AlphaISA::Fpcr_DepTag, + this->cpuXC->readMiscReg(AlphaISA::Fpcr_DepTag)); + this->regFile.setMiscReg(AlphaISA::Uniq_DepTag, + this->cpuXC->readMiscReg(AlphaISA::Uniq_DepTag)); + this->regFile.setMiscReg(AlphaISA::Lock_Flag_DepTag, + this->cpuXC->readMiscReg(AlphaISA::Lock_Flag_DepTag)); + this->regFile.setMiscReg(AlphaISA::Lock_Addr_DepTag, + this->cpuXC->readMiscReg(AlphaISA::Lock_Addr_DepTag)); + + // Then finally set the PC and the next PC. +// regFile.pc = cpuXC->regs.pc; +// regFile.npc = cpuXC->regs.npc; +#if !FULL_SYSTEM + this->funcExeInst = this->cpuXC->readFuncExeInst(); +#endif +} + +#if FULL_SYSTEM + +template <class Impl> +int +AlphaFullCPU<Impl>::readIntrFlag() +{ + return this->regFile.readIntrFlag(); +} + +template <class Impl> +void +AlphaFullCPU<Impl>::setIntrFlag(int val) +{ + this->regFile.setIntrFlag(val); +} + +// Can force commit stage to squash and stuff. +template <class Impl> +Fault +AlphaFullCPU<Impl>::hwrei() +{ + if (!inPalMode()) + return new AlphaISA::UnimplementedOpcodeFault; + + this->setNextPC(this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR)); + +// kernelStats.hwrei(); + + if ((this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR) & 1) == 0) +// AlphaISA::swap_palshadow(®s, false); + + this->checkInterrupts = true; + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template <class Impl> +bool +AlphaFullCPU<Impl>::simPalCheck(int palFunc) +{ +// kernelStats.callpal(palFunc); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + new SimExitEvent("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (this->system->breakpoint()) + return false; + break; + } + + return true; +} + +// Probably shouldn't be able to switch to the trap handler as quickly as +// this. Also needs to get the exception restart address from the commit +// stage. +template <class Impl> +void +AlphaFullCPU<Impl>::trap(Fault fault) +{ +/* // Keep in mind that a trap may be initiated by fetch if there's a TLB + // miss + uint64_t PC = this->commit.readCommitPC(); + + DPRINTF(Fault, "Fault %s\n", fault->name()); + this->recordEvent(csprintf("Fault %s", fault->name())); + + //kernelStats.fault(fault); + + if (fault->isA<ArithmeticFault>()) + panic("Arithmetic traps are unimplemented!"); + + // exception restart address - Get the commit PC + if (!fault->isA<InterruptFault>() || !inPalMode(PC)) + this->regFile.miscRegs.setReg(AlphaISA::IPR_EXC_ADDR, PC); + + if (fault->isA<PalFault>() || fault->isA<ArithmeticFault>()) + // || fault == InterruptFault && !PC_PAL(regs.pc) + { + // traps... skip faulting instruction + AlphaISA::MiscReg ipr_exc_addr = + this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR); + this->regFile.miscRegs.setReg(AlphaISA::IPR_EXC_ADDR, + ipr_exc_addr + 4); + } + + if (!inPalMode(PC)) + swapPALShadow(true); + + this->regFile.setPC(this->regFile.miscRegs.readReg(AlphaISA::IPR_PAL_BASE) + + (dynamic_cast<AlphaFault *>(fault.get()))->vect()); + this->regFile.setNextPC(PC + sizeof(MachInst));*/ +} + +template <class Impl> +void +AlphaFullCPU<Impl>::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that exists + // within isa_fullsys_traits.hh. +} + +// swap_palshadow swaps in the values of the shadow registers and +// swaps them with the values of the physical registers that map to the +// same logical index. +template <class Impl> +void +AlphaFullCPU<Impl>::swapPALShadow(bool use_shadow) +{ + if (palShadowEnabled == use_shadow) + panic("swap_palshadow: wrong PAL shadow state"); + + palShadowEnabled = use_shadow; + + // Will have to lookup in rename map to get physical registers, then + // swap. +} + +#endif // FULL_SYSTEM diff --git a/src/cpu/o3/alpha_dyn_inst.cc b/src/cpu/o3/alpha_dyn_inst.cc new file mode 100644 index 000000000..72ac77d95 --- /dev/null +++ b/src/cpu/o3/alpha_dyn_inst.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst_impl.hh" +#include "cpu/o3/alpha_impl.hh" + +// Force instantiation of AlphaDynInst for all the implementations that +// are needed. +template class AlphaDynInst<AlphaSimpleImpl>; diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha_dyn_inst.hh new file mode 100644 index 000000000..5b8a05e5c --- /dev/null +++ b/src/cpu/o3/alpha_dyn_inst.hh @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_ALPHA_DYN_INST_HH__ +#define __CPU_O3_CPU_ALPHA_DYN_INST_HH__ + +#include "cpu/base_dyn_inst.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/inst_seq.hh" + +/** + * Mostly implementation specific AlphaDynInst. It is templated in case there + * are other implementations that are similar enough to be able to use this + * class without changes. This is mainly useful if there are multiple similar + * CPU implementations of the same ISA. + */ + +template <class Impl> +class AlphaDynInst : public BaseDynInst<Impl> +{ + public: + /** Typedef for the CPU. */ + typedef typename Impl::FullCPU FullCPU; + + /** Binary machine instruction type. */ + typedef TheISA::MachInst MachInst; + /** Logical register index type. */ + typedef TheISA::RegIndex RegIndex; + /** Integer register index type. */ + typedef TheISA::IntReg IntReg; + /** Misc register index type. */ + typedef TheISA::MiscReg MiscReg; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + public: + /** BaseDynInst constructor given a binary instruction. */ + AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + AlphaDynInst(StaticInstPtr &_staticInst); + + /** Executes the instruction.*/ + Fault execute() + { + return this->fault = this->staticInst->execute(this, this->traceData); + } + + public: + MiscReg readMiscReg(int misc_reg) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return 0; + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return 0; + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return NoFault; + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return NoFault; + } + +#if FULL_SYSTEM + Fault hwrei(); + int readIntrFlag(); + void setIntrFlag(int val); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); +#else + void syscall(); +#endif + + + + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return this->cpu->readIntReg(_srcRegIdx[idx]); + } + + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatReg(_srcRegIdx[idx], width); + } + + FloatReg readFloatReg(const StaticInst *si, int idx) + { + return this->cpu->readFloatReg(_srcRegIdx[idx]); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx], width); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx]); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + this->cpu->setIntReg(_destRegIdx[idx], val); + this->instResult.integer = val; + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + this->cpu->setFloatReg(_destRegIdx[idx], val, width); + this->instResult.fp = val; + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) + { + this->cpu->setFloatReg(_destRegIdx[idx], val); + this->instResult.dbl = val; + } + + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val, width); + this->instResult.integer = val; + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val); + this->instResult.integer = val; + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + public: + Fault calcEA() + { + return this->staticInst->eaCompInst()->execute(this, this->traceData); + } + + Fault memAccess() + { + return this->staticInst->memAccInst()->execute(this, this->traceData); + } +}; + +#endif // __CPU_O3_CPU_ALPHA_DYN_INST_HH__ + diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha_dyn_inst_impl.hh new file mode 100644 index 000000000..96b7d3430 --- /dev/null +++ b/src/cpu/o3/alpha_dyn_inst_impl.hh @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" + +template <class Impl> +AlphaDynInst<Impl>::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu) +{ + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < this->staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = this->staticInst->destRegIdx(i); + } + + for (int i = 0; i < this->staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = this->staticInst->srcRegIdx(i); + this->_readySrcRegIdx[i] = 0; + } + +} + +template <class Impl> +AlphaDynInst<Impl>::AlphaDynInst(StaticInstPtr &_staticInst) + : BaseDynInst<Impl>(_staticInst) +{ + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < _staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = _staticInst->destRegIdx(i); + } + + for (int i = 0; i < _staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = _staticInst->srcRegIdx(i); + } +} + +#if FULL_SYSTEM +template <class Impl> +Fault +AlphaDynInst<Impl>::hwrei() +{ + return this->cpu->hwrei(); +} + +template <class Impl> +int +AlphaDynInst<Impl>::readIntrFlag() +{ +return this->cpu->readIntrFlag(); +} + +template <class Impl> +void +AlphaDynInst<Impl>::setIntrFlag(int val) +{ + this->cpu->setIntrFlag(val); +} + +template <class Impl> +bool +AlphaDynInst<Impl>::inPalMode() +{ + return this->cpu->inPalMode(); +} + +template <class Impl> +void +AlphaDynInst<Impl>::trap(Fault fault) +{ + this->cpu->trap(fault); +} + +template <class Impl> +bool +AlphaDynInst<Impl>::simPalCheck(int palFunc) +{ + return this->cpu->simPalCheck(palFunc); +} +#else +template <class Impl> +void +AlphaDynInst<Impl>::syscall() +{ + this->cpu->syscall(this->threadNumber); +} +#endif + diff --git a/src/cpu/o3/alpha_impl.hh b/src/cpu/o3/alpha_impl.hh new file mode 100644 index 000000000..5e39fcb37 --- /dev/null +++ b/src/cpu/o3/alpha_impl.hh @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_ALPHA_IMPL_HH__ +#define __CPU_O3_CPU_ALPHA_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" + +#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/cpu_policy.hh" + +// Forward declarations. +template <class Impl> +class AlphaDynInst; + +template <class Impl> +class AlphaFullCPU; + +/** Implementation specific struct that defines several key things to the + * CPU, the stages within the CPU, the time buffers, and the DynInst. + * The struct defines the ISA, the CPU policy, the specific DynInst, the + * specific FullCPU, and all of the structs from the time buffers to do + * communication. + * This is one of the key things that must be defined for each hardware + * specific CPU implementation. + */ +struct AlphaSimpleImpl +{ + /** The type of MachInst. */ + typedef TheISA::MachInst MachInst; + + /** The CPU policy to be used (ie fetch, decode, etc.). */ + typedef SimpleCPUPolicy<AlphaSimpleImpl> CPUPol; + + /** The DynInst to be used. */ + typedef AlphaDynInst<AlphaSimpleImpl> DynInst; + + /** The refcounted DynInst pointer to be used. In most cases this is + * what should be used, and not DynInst *. + */ + typedef RefCountingPtr<DynInst> DynInstPtr; + + /** The FullCPU to be used. */ + typedef AlphaFullCPU<AlphaSimpleImpl> FullCPU; + + /** The Params to be passed to each stage. */ + typedef AlphaSimpleParams Params; + + enum { + MaxWidth = 8 + }; +}; + +#endif // __CPU_O3_CPU_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh new file mode 100644 index 000000000..79b0937e3 --- /dev/null +++ b/src/cpu/o3/alpha_params.hh @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_ALPHA_SIMPLE_PARAMS_HH__ +#define __CPU_O3_CPU_ALPHA_SIMPLE_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" + +//Forward declarations +class System; +class AlphaITB; +class AlphaDTB; +class FunctionalMemory; +class Process; +class MemInterface; + +/** + * This file defines the parameters that will be used for the AlphaFullCPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams : public BaseFullCPU::Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; AlphaDTB *dtb; +#else + std::vector<Process *> workload; + Process *process; +#endif // FULL_SYSTEM + + FunctionalMemory *mem; + + // + // Caches + // + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + // + // Fetch + // + unsigned decodeToFetchDelay; + unsigned renameToFetchDelay; + unsigned iewToFetchDelay; + unsigned commitToFetchDelay; + unsigned fetchWidth; + + // + // Decode + // + unsigned renameToDecodeDelay; + unsigned iewToDecodeDelay; + unsigned commitToDecodeDelay; + unsigned fetchToDecodeDelay; + unsigned decodeWidth; + + // + // Rename + // + unsigned iewToRenameDelay; + unsigned commitToRenameDelay; + unsigned decodeToRenameDelay; + unsigned renameWidth; + + // + // IEW + // + unsigned commitToIEWDelay; + unsigned renameToIEWDelay; + unsigned issueToExecuteDelay; + unsigned issueWidth; + unsigned executeWidth; + unsigned executeIntWidth; + unsigned executeFloatWidth; + unsigned executeBranchWidth; + unsigned executeMemoryWidth; + + // + // Commit + // + unsigned iewToCommitDelay; + unsigned renameToROBDelay; + unsigned commitWidth; + unsigned squashWidth; + + // + // Branch predictor (BP & BTB) + // +/* + unsigned localPredictorSize; + unsigned localPredictorCtrBits; +*/ + + unsigned local_predictor_size; + unsigned local_ctr_bits; + unsigned local_history_table_size; + unsigned local_history_bits; + unsigned global_predictor_size; + unsigned global_ctr_bits; + unsigned global_history_bits; + unsigned choice_predictor_size; + unsigned choice_ctr_bits; + + unsigned BTBEntries; + unsigned BTBTagSize; + + unsigned RASSize; + + // + // Load store queue + // + unsigned LQEntries; + unsigned SQEntries; + + // + // Memory dependence + // + unsigned SSITSize; + unsigned LFSTSize; + + // + // Miscellaneous + // + unsigned numPhysIntRegs; + unsigned numPhysFloatRegs; + unsigned numIQEntries; + unsigned numROBEntries; + + // Probably can get this from somewhere. + unsigned instShiftAmt; + + bool defReg; +}; + +#endif // __CPU_O3_CPU_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc new file mode 100644 index 000000000..85bd6f0a6 --- /dev/null +++ b/src/cpu/o3/bpred_unit.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/bpred_unit_impl.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha_dyn_inst.hh" + +template class TwobitBPredUnit<AlphaSimpleImpl>; diff --git a/src/cpu/o3/bpred_unit.hh b/src/cpu/o3/bpred_unit.hh new file mode 100644 index 000000000..2725684f7 --- /dev/null +++ b/src/cpu/o3/bpred_unit.hh @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BPRED_UNIT_HH__ +#define __BPRED_UNIT_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" +#include "base/statistics.hh" +#include "cpu/inst_seq.hh" + +#include "cpu/o3/2bit_local_pred.hh" +#include "cpu/o3/tournament_pred.hh" +#include "cpu/o3/btb.hh" +#include "cpu/o3/ras.hh" + +#include <list> + +/** + * Basically a wrapper class to hold both the branch predictor + * and the BTB. Right now I'm unsure of the implementation; it would + * be nicer to have something closer to the CPUPolicy or the Impl where + * this is just typedefs, but it forces the upper level stages to be + * aware of the constructors of the BP and the BTB. The nicer thing + * to do is have this templated on the Impl, accept the usual Params + * object, and be able to call the constructors on the BP and BTB. + */ +template<class Impl> +class TwobitBPredUnit +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + TwobitBPredUnit(Params ¶ms); + + void regStats(); + + bool predict(DynInstPtr &inst, Addr &PC); + + void update(const InstSeqNum &done_sn); + + void squash(const InstSeqNum &squashed_sn); + + void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, + bool actually_taken); + + bool BPLookup(Addr &inst_PC) + { return BP.lookup(inst_PC); } + + bool BTBValid(Addr &inst_PC) + { return BTB.valid(inst_PC); } + + Addr BTBLookup(Addr &inst_PC) + { return BTB.lookup(inst_PC); } + + // Will want to include global history. + void BPUpdate(Addr &inst_PC, bool taken) + { BP.update(inst_PC, taken); } + + void BTBUpdate(Addr &inst_PC, Addr &target_PC) + { BTB.update(inst_PC, target_PC); } + + private: + struct PredictorHistory { + PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC, + const bool pred_taken) + : seqNum(seq_num), PC(inst_PC), predTaken(pred_taken), + globalHistory(0), usedRAS(0), wasCall(0), RASIndex(0), + RASTarget(0) + { } + + InstSeqNum seqNum; + + Addr PC; + + bool predTaken; + + unsigned globalHistory; + + bool usedRAS; + + bool wasCall; + + unsigned RASIndex; + + Addr RASTarget; + }; + + std::list<PredictorHistory> predHist; + + DefaultBP BP; + + DefaultBTB BTB; + + ReturnAddrStack RAS; + + Stats::Scalar<> lookups; + Stats::Scalar<> condPredicted; + Stats::Scalar<> condIncorrect; + Stats::Scalar<> BTBLookups; + Stats::Scalar<> BTBHits; + Stats::Scalar<> BTBCorrect; + Stats::Scalar<> usedRAS; + Stats::Scalar<> RASIncorrect; +}; + +#endif // __BPRED_UNIT_HH__ diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh new file mode 100644 index 000000000..8d16a0cdf --- /dev/null +++ b/src/cpu/o3/bpred_unit_impl.hh @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/o3/bpred_unit.hh" + +template<class Impl> +TwobitBPredUnit<Impl>::TwobitBPredUnit(Params ¶ms) + : BP(params.local_predictor_size, + params.local_ctr_bits, + params.instShiftAmt), + BTB(params.BTBEntries, + params.BTBTagSize, + params.instShiftAmt), + RAS(params.RASSize) +{ +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::regStats() +{ + lookups + .name(name() + ".BPredUnit.lookups") + .desc("Number of BP lookups") + ; + + condPredicted + .name(name() + ".BPredUnit.condPredicted") + .desc("Number of conditional branches predicted") + ; + + condIncorrect + .name(name() + ".BPredUnit.condIncorrect") + .desc("Number of conditional branches incorrect") + ; + + BTBLookups + .name(name() + ".BPredUnit.BTBLookups") + .desc("Number of BTB lookups") + ; + + BTBHits + .name(name() + ".BPredUnit.BTBHits") + .desc("Number of BTB hits") + ; + + BTBCorrect + .name(name() + ".BPredUnit.BTBCorrect") + .desc("Number of correct BTB predictions (this stat may not " + "work properly.") + ; + + usedRAS + .name(name() + ".BPredUnit.usedRAS") + .desc("Number of times the RAS was used.") + ; + + RASIncorrect + .name(name() + ".BPredUnit.RASInCorrect") + .desc("Number of incorrect RAS predictions.") + ; +} + +template <class Impl> +bool +TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC) +{ + // See if branch predictor predicts taken. + // If so, get its target addr either from the BTB or the RAS. + // Once that's done, speculatively update the predictor? + // Save off record of branch stuff so the RAS can be fixed + // up once it's done. + + using TheISA::MachInst; + + bool pred_taken = false; + Addr target; + + ++lookups; + + if (inst->isUncondCtrl()) { + DPRINTF(Fetch, "BranchPred: Unconditional control.\n"); + pred_taken = true; + } else { + ++condPredicted; + + pred_taken = BPLookup(PC); + + DPRINTF(Fetch, "BranchPred: Branch predictor predicted %i for PC %#x" + "\n", pred_taken, inst->readPC()); + } + + PredictorHistory predict_record(inst->seqNum, PC, pred_taken); + + // Now lookup in the BTB or RAS. + if (pred_taken) { + if (inst->isReturn()) { + ++usedRAS; + + // If it's a function return call, then look up the address + // in the RAS. + target = RAS.top(); + + // Record the top entry of the RAS, and its index. + predict_record.usedRAS = true; + predict_record.RASIndex = RAS.topIdx(); + predict_record.RASTarget = target; + + RAS.pop(); + + DPRINTF(Fetch, "BranchPred: Instruction %#x is a return, RAS " + "predicted target: %#x, RAS index: %i.\n", + inst->readPC(), target, predict_record.RASIndex); + } else { + ++BTBLookups; + + if (inst->isCall()) { + RAS.push(PC+sizeof(MachInst)); + + // Record that it was a call so that the top RAS entry can + // be popped off if the speculation is incorrect. + predict_record.wasCall = true; + + DPRINTF(Fetch, "BranchPred: Instruction %#x was a call, " + "adding %#x to the RAS.\n", + inst->readPC(), PC+sizeof(MachInst)); + } + + if (BTB.valid(PC)) { + ++BTBHits; + + //If it's anything else, use the BTB to get the target addr. + target = BTB.lookup(PC); + + DPRINTF(Fetch, "BranchPred: Instruction %#x predicted target " + "is %#x.\n", inst->readPC(), target); + + } else { + DPRINTF(Fetch, "BranchPred: BTB doesn't have a valid entry." + "\n"); + pred_taken = false; + } + + } + } + + if (pred_taken) { + // Set the PC and the instruction's predicted target. + PC = target; + inst->setPredTarg(target); + } else { + PC = PC + sizeof(MachInst); + inst->setPredTarg(PC); + } + + predHist.push_front(predict_record); + + assert(!predHist.empty()); + + return pred_taken; +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn) +{ + DPRINTF(Fetch, "BranchPred: Commiting branches until sequence number " + "%i.\n", done_sn); + + while (!predHist.empty() && predHist.back().seqNum <= done_sn) { + assert(!predHist.empty()); + + // Update the branch predictor with the correct results of branches. + BP.update(predHist.back().PC, predHist.back().predTaken); + + predHist.pop_back(); + } +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn) +{ + while (!predHist.empty() && predHist.front().seqNum > squashed_sn) { + if (predHist.front().usedRAS) { + DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, " + "target: %#x.\n", + predHist.front().RASIndex, + predHist.front().RASTarget); + + RAS.restore(predHist.front().RASIndex, + predHist.front().RASTarget); + } else if (predHist.front().wasCall) { + DPRINTF(Fetch, "BranchPred: Removing speculative entry added " + "to the RAS.\n"); + + RAS.pop(); + } + + predHist.pop_front(); + } +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, + const Addr &corr_target, + const bool actually_taken) +{ + // Now that we know that a branch was mispredicted, we need to undo + // all the branches that have been seen up until this branch and + // fix up everything. + + ++condIncorrect; + + DPRINTF(Fetch, "BranchPred: Squashing from sequence number %i, " + "setting target to %#x.\n", + squashed_sn, corr_target); + + while (!predHist.empty() && predHist.front().seqNum > squashed_sn) { + + if (predHist.front().usedRAS) { + DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, " + "target: %#x.\n", + predHist.front().RASIndex, + predHist.front().RASTarget); + + RAS.restore(predHist.front().RASIndex, + predHist.front().RASTarget); + } else if (predHist.front().wasCall) { + DPRINTF(Fetch, "BranchPred: Removing speculative entry added " + "to the RAS.\n"); + + RAS.pop(); + } + + predHist.pop_front(); + } + + predHist.front().predTaken = actually_taken; + + if (predHist.front().usedRAS) { + ++RASIncorrect; + } + + BP.update(predHist.front().PC, actually_taken); + + BTB.update(predHist.front().PC, corr_target); +} diff --git a/src/cpu/o3/btb.cc b/src/cpu/o3/btb.cc new file mode 100644 index 000000000..2d39c3856 --- /dev/null +++ b/src/cpu/o3/btb.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/intmath.hh" +#include "base/trace.hh" +#include "cpu/o3/btb.hh" + +using namespace TheISA; + +DefaultBTB::DefaultBTB(unsigned _numEntries, + unsigned _tagBits, + unsigned _instShiftAmt) + : numEntries(_numEntries), + tagBits(_tagBits), + instShiftAmt(_instShiftAmt) +{ + // @todo Check to make sure num_entries is valid (a power of 2) + + DPRINTF(Fetch, "BTB: Creating BTB object.\n"); + + btb = new BTBEntry[numEntries]; + + for (int i = 0; i < numEntries; ++i) + { + btb[i].valid = false; + } + + idxMask = numEntries - 1; + + tagMask = (1 << tagBits) - 1; + + tagShiftAmt = instShiftAmt + floorLog2(numEntries); +} + +inline +unsigned +DefaultBTB::getIndex(const Addr &inst_PC) +{ + // Need to shift PC over by the word offset. + return (inst_PC >> instShiftAmt) & idxMask; +} + +inline +Addr +DefaultBTB::getTag(const Addr &inst_PC) +{ + return (inst_PC >> tagShiftAmt) & tagMask; +} + +bool +DefaultBTB::valid(const Addr &inst_PC) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + assert(btb_idx < numEntries); + + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { + return true; + } else { + return false; + } +} + +// @todo Create some sort of return struct that has both whether or not the +// address is valid, and also the address. For now will just use addr = 0 to +// represent invalid entry. +Addr +DefaultBTB::lookup(const Addr &inst_PC) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + assert(btb_idx < numEntries); + + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { + return btb[btb_idx].target; + } else { + return 0; + } +} + +void +DefaultBTB::update(const Addr &inst_PC, const Addr &target) +{ + unsigned btb_idx = getIndex(inst_PC); + + assert(btb_idx < numEntries); + + btb[btb_idx].valid = true; + btb[btb_idx].target = target; + btb[btb_idx].tag = getTag(inst_PC); +} diff --git a/src/cpu/o3/btb.hh b/src/cpu/o3/btb.hh new file mode 100644 index 000000000..77bdc32ea --- /dev/null +++ b/src/cpu/o3/btb.hh @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_BTB_HH__ +#define __CPU_O3_CPU_BTB_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" + +class DefaultBTB +{ + private: + struct BTBEntry + { + BTBEntry() + : tag(0), target(0), valid(false) + { + } + + Addr tag; + Addr target; + bool valid; + }; + + public: + DefaultBTB(unsigned numEntries, unsigned tagBits, + unsigned instShiftAmt); + + Addr lookup(const Addr &inst_PC); + + bool valid(const Addr &inst_PC); + + void update(const Addr &inst_PC, const Addr &target_PC); + + private: + inline unsigned getIndex(const Addr &inst_PC); + + inline Addr getTag(const Addr &inst_PC); + + BTBEntry *btb; + + unsigned numEntries; + + unsigned idxMask; + + unsigned tagBits; + + unsigned tagMask; + + unsigned instShiftAmt; + + unsigned tagShiftAmt; +}; + +#endif // __CPU_O3_CPU_BTB_HH__ diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh new file mode 100644 index 000000000..c74c77ddf --- /dev/null +++ b/src/cpu/o3/comm.hh @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_COMM_HH__ +#define __CPU_O3_CPU_COMM_HH__ + +#include <vector> + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" +#include "sim/host.hh" + +// Find better place to put this typedef. +// The impl might be the best place for this. +typedef short int PhysRegIndex; + +template<class Impl> +struct SimpleFetchSimpleDecode { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template<class Impl> +struct SimpleDecodeSimpleRename { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template<class Impl> +struct SimpleRenameSimpleIEW { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template<class Impl> +struct SimpleIEWSimpleCommit { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; + + bool squash; + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + InstSeqNum squashedSeqNum; +}; + +template<class Impl> +struct IssueStruct { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +struct TimeBufStruct { + struct decodeComm { + bool squash; + bool stall; + bool predIncorrect; + uint64_t branchAddr; + + InstSeqNum doneSeqNum; + + // Might want to package this kind of branch stuff into a single + // struct as it is used pretty frequently. + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + }; + + decodeComm decodeInfo; + + // Rename can't actually tell anything to squash or send a new PC back + // because it doesn't do anything along those lines. But maybe leave + // these fields in here to keep the stages mostly orthagonal. + struct renameComm { + bool squash; + bool stall; + + uint64_t nextPC; + }; + + renameComm renameInfo; + + struct iewComm { + bool stall; + + // Also eventually include skid buffer space. + unsigned freeIQEntries; + }; + + iewComm iewInfo; + + struct commitComm { + bool squash; + bool stall; + unsigned freeROBEntries; + + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + + bool robSquashing; + + // Represents the instruction that has either been retired or + // squashed. Similar to having a single bus that broadcasts the + // retired or squashed sequence number. + InstSeqNum doneSeqNum; + + // Extra bit of information so that the LDSTQ only updates when it + // needs to. + bool commitIsLoad; + + // Communication specifically to the IQ to tell the IQ that it can + // schedule a non-speculative instruction. + InstSeqNum nonSpecSeqNum; + }; + + commitComm commitInfo; +}; + +#endif //__CPU_O3_CPU_COMM_HH__ diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc new file mode 100644 index 000000000..cf33d7f8b --- /dev/null +++ b/src/cpu/o3/commit.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/commit_impl.hh" + +template class SimpleCommit<AlphaSimpleImpl>; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh new file mode 100644 index 000000000..580c1a316 --- /dev/null +++ b/src/cpu/o3/commit.hh @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: Maybe have a special method for handling interrupts/traps. +// +// Traps: Have IEW send a signal to commit saying that there's a trap to +// be handled. Have commit send the PC back to the fetch stage, along +// with the current commit PC. Fetch will directly access the IPR and save +// off all the proper stuff. Commit can send out a squash, or something +// close to it. +// Do the same for hwrei(). However, requires that commit be specifically +// built to support that kind of stuff. Probably not horrible to have +// commit support having the CPU tell it to squash the other stages and +// restart at a given address. The IPR register does become an issue. +// Probably not a big deal if the IPR stuff isn't cycle accurate. Can just +// have the original function handle writing to the IPR register. + +#ifndef __CPU_O3_CPU_SIMPLE_COMMIT_HH__ +#define __CPU_O3_CPU_SIMPLE_COMMIT_HH__ + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "mem/memory_interface.hh" + +template<class Impl> +class SimpleCommit +{ + public: + // Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; + + typedef typename CPUPol::ROB ROB; + + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + + public: + // I don't believe commit can block, so it will only have two + // statuses for now. + // Actually if there's a cache access that needs to block (ie + // uncachable load or just a mem access in commit) then the stage + // may have to wait. + enum Status { + Running, + Idle, + ROBSquashing, + DcacheMissStall, + DcacheMissComplete + }; + + private: + Status _status; + + public: + SimpleCommit(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr); + + void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); + + void setROB(ROB *rob_ptr); + + void tick(); + + void commit(); + + private: + + void commitInsts(); + + bool commitHead(DynInstPtr &head_inst, unsigned inst_num); + + void getInsts(); + + void markCompletedInsts(); + + public: + uint64_t readCommitPC(); + + void setSquashing() { _status = ROBSquashing; } + + private: + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer<TimeStruct>::wire toIEW; + + /** Wire to read information from IEW (for ROB). */ + typename TimeBuffer<TimeStruct>::wire robInfoFromIEW; + + /** IEW instruction queue interface. */ + TimeBuffer<IEWStruct> *iewQueue; + + /** Wire to read information from IEW queue. */ + typename TimeBuffer<IEWStruct>::wire fromIEW; + + /** Rename instruction queue interface, for ROB. */ + TimeBuffer<RenameStruct> *renameQueue; + + /** Wire to read information from rename queue. */ + typename TimeBuffer<RenameStruct>::wire fromRename; + + /** ROB interface. */ + ROB *rob; + + /** Pointer to FullCPU. */ + FullCPU *cpu; + + /** Memory interface. Used for d-cache accesses. */ + MemInterface *dcacheInterface; + + private: + /** IEW to Commit delay, in ticks. */ + unsigned iewToCommitDelay; + + /** Rename to ROB delay, in ticks. */ + unsigned renameToROBDelay; + + /** Rename width, in instructions. Used so ROB knows how many + * instructions to get from the rename instruction queue. + */ + unsigned renameWidth; + + /** IEW width, in instructions. Used so ROB knows how many + * instructions to get from the IEW instruction queue. + */ + unsigned iewWidth; + + /** Commit width, in instructions. */ + unsigned commitWidth; + + Stats::Scalar<> commitCommittedInsts; + Stats::Scalar<> commitSquashedInsts; + Stats::Scalar<> commitSquashEvents; + Stats::Scalar<> commitNonSpecStalls; + Stats::Scalar<> commitCommittedBranches; + Stats::Scalar<> commitCommittedLoads; + Stats::Scalar<> commitCommittedMemRefs; + Stats::Scalar<> branchMispredicts; + + Stats::Distribution<> n_committed_dist; +}; + +#endif // __CPU_O3_CPU_SIMPLE_COMMIT_HH__ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh new file mode 100644 index 000000000..e289bc0c0 --- /dev/null +++ b/src/cpu/o3/commit_impl.hh @@ -0,0 +1,502 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/timebuf.hh" +#include "cpu/o3/commit.hh" +#include "cpu/exetrace.hh" + +template <class Impl> +SimpleCommit<Impl>::SimpleCommit(Params ¶ms) + : dcacheInterface(params.dcacheInterface), + iewToCommitDelay(params.iewToCommitDelay), + renameToROBDelay(params.renameToROBDelay), + renameWidth(params.renameWidth), + iewWidth(params.executeWidth), + commitWidth(params.commitWidth) +{ + _status = Idle; +} + +template <class Impl> +void +SimpleCommit<Impl>::regStats() +{ + commitCommittedInsts + .name(name() + ".commitCommittedInsts") + .desc("The number of committed instructions") + .prereq(commitCommittedInsts); + commitSquashedInsts + .name(name() + ".commitSquashedInsts") + .desc("The number of squashed insts skipped by commit") + .prereq(commitSquashedInsts); + commitSquashEvents + .name(name() + ".commitSquashEvents") + .desc("The number of times commit is told to squash") + .prereq(commitSquashEvents); + commitNonSpecStalls + .name(name() + ".commitNonSpecStalls") + .desc("The number of times commit has been forced to stall to " + "communicate backwards") + .prereq(commitNonSpecStalls); + commitCommittedBranches + .name(name() + ".commitCommittedBranches") + .desc("The number of committed branches") + .prereq(commitCommittedBranches); + commitCommittedLoads + .name(name() + ".commitCommittedLoads") + .desc("The number of committed loads") + .prereq(commitCommittedLoads); + commitCommittedMemRefs + .name(name() + ".commitCommittedMemRefs") + .desc("The number of committed memory references") + .prereq(commitCommittedMemRefs); + branchMispredicts + .name(name() + ".branchMispredicts") + .desc("The number of times a branch was mispredicted") + .prereq(branchMispredicts); + n_committed_dist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(Stats::pdf) + ; +} + +template <class Impl> +void +SimpleCommit<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Commit, "Commit: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template <class Impl> +void +SimpleCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(Commit, "Commit: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to send information back to IEW. + toIEW = timeBuffer->getWire(0); + + // Setup wire to read data from IEW (for the ROB). + robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay); +} + +template <class Impl> +void +SimpleCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr) +{ + DPRINTF(Commit, "Commit: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to get instructions from rename (for the ROB). + fromRename = renameQueue->getWire(-renameToROBDelay); +} + +template <class Impl> +void +SimpleCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) +{ + DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to get instructions from IEW. + fromIEW = iewQueue->getWire(-iewToCommitDelay); +} + +template <class Impl> +void +SimpleCommit<Impl>::setROB(ROB *rob_ptr) +{ + DPRINTF(Commit, "Commit: Setting ROB pointer.\n"); + rob = rob_ptr; +} + +template <class Impl> +void +SimpleCommit<Impl>::tick() +{ + // If the ROB is currently in its squash sequence, then continue + // to squash. In this case, commit does not do anything. Otherwise + // run commit. + if (_status == ROBSquashing) { + if (rob->isDoneSquashing()) { + _status = Running; + } else { + rob->doSquash(); + + // Send back sequence number of tail of ROB, so other stages + // can squash younger instructions. Note that really the only + // stage that this is important for is the IEW stage; other + // stages can just clear all their state as long as selective + // replay isn't used. + toIEW->commitInfo.doneSeqNum = rob->readTailSeqNum(); + toIEW->commitInfo.robSquashing = true; + } + } else { + commit(); + } + + markCompletedInsts(); + + // Writeback number of free ROB entries here. + DPRINTF(Commit, "Commit: ROB has %d free entries.\n", + rob->numFreeEntries()); + toIEW->commitInfo.freeROBEntries = rob->numFreeEntries(); +} + +template <class Impl> +void +SimpleCommit<Impl>::commit() +{ + ////////////////////////////////////// + // Check for interrupts + ////////////////////////////////////// + + // Process interrupts if interrupts are enabled and not in PAL mode. + // Take the PC from commit and write it to the IPR, then squash. The + // interrupt completing will take care of restoring the PC from that value + // in the IPR. Look at IPR[EXC_ADDR]; + // hwrei() is what resets the PC to the place where instruction execution + // beings again. +#if FULL_SYSTEM + if (//checkInterrupts && + cpu->check_interrupts() && + !cpu->inPalMode(readCommitPC())) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. + + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); + } +#endif // FULL_SYSTEM + + //////////////////////////////////// + // Check for squash signal, handle that first + //////////////////////////////////// + + // Want to mainly check if the IEW stage is telling the ROB to squash. + // Should I also check if the commit stage is telling the ROB to squah? + // This might be necessary to keep the same timing between the IQ and + // the ROB... + if (fromIEW->squash) { + DPRINTF(Commit, "Commit: Squashing instructions in the ROB.\n"); + + _status = ROBSquashing; + + InstSeqNum squashed_inst = fromIEW->squashedSeqNum; + + rob->squash(squashed_inst); + + // Send back the sequence number of the squashed instruction. + toIEW->commitInfo.doneSeqNum = squashed_inst; + + // Send back the squash signal to tell stages that they should squash. + toIEW->commitInfo.squash = true; + + // Send back the rob squashing signal so other stages know that the + // ROB is in the process of squashing. + toIEW->commitInfo.robSquashing = true; + + toIEW->commitInfo.branchMispredict = fromIEW->branchMispredict; + + toIEW->commitInfo.branchTaken = fromIEW->branchTaken; + + toIEW->commitInfo.nextPC = fromIEW->nextPC; + + toIEW->commitInfo.mispredPC = fromIEW->mispredPC; + + if (toIEW->commitInfo.branchMispredict) { + ++branchMispredicts; + } + } + + if (_status != ROBSquashing) { + // If we're not currently squashing, then get instructions. + getInsts(); + + // Try to commit any instructions. + commitInsts(); + } + + // If the ROB is empty, we can set this stage to idle. Use this + // in the future when the Idle status will actually be utilized. +#if 0 + if (rob->isEmpty()) { + DPRINTF(Commit, "Commit: ROB is empty. Status changed to idle.\n"); + _status = Idle; + // Schedule an event so that commit will actually wake up + // once something gets put in the ROB. + } +#endif +} + +// Loop that goes through as many instructions in the ROB as possible and +// tries to commit them. The actual work for committing is done by the +// commitHead() function. +template <class Impl> +void +SimpleCommit<Impl>::commitInsts() +{ + //////////////////////////////////// + // Handle commit + // Note that commit will be handled prior to the ROB so that the ROB + // only tries to commit instructions it has in this current cycle, and + // not instructions it is writing in during this cycle. + // Can't commit and squash things at the same time... + //////////////////////////////////// + + if (rob->isEmpty()) + return; + + DynInstPtr head_inst = rob->readHeadInst(); + + unsigned num_committed = 0; + + // Commit as many instructions as possible until the commit bandwidth + // limit is reached, or it becomes impossible to commit any more. + while (!rob->isEmpty() && + head_inst->readyToCommit() && + num_committed < commitWidth) + { + DPRINTF(Commit, "Commit: Trying to commit head instruction.\n"); + + // If the head instruction is squashed, it is ready to retire at any + // time. However, we need to avoid updating any other state + // incorrectly if it's already been squashed. + if (head_inst->isSquashed()) { + + DPRINTF(Commit, "Commit: Retiring squashed instruction from " + "ROB.\n"); + + // Tell ROB to retire head instruction. This retires the head + // inst in the ROB without affecting any other stages. + rob->retireHead(); + + ++commitSquashedInsts; + + } else { + // Increment the total number of non-speculative instructions + // executed. + // Hack for now: it really shouldn't happen until after the + // commit is deemed to be successful, but this count is needed + // for syscalls. + cpu->funcExeInst++; + + // Try to commit the head instruction. + bool commit_success = commitHead(head_inst, num_committed); + + // Update what instruction we are looking at if the commit worked. + if (commit_success) { + ++num_committed; + + // Send back which instruction has been committed. + // @todo: Update this later when a wider pipeline is used. + // Hmm, can't really give a pointer here...perhaps the + // sequence number instead (copy). + toIEW->commitInfo.doneSeqNum = head_inst->seqNum; + + ++commitCommittedInsts; + + if (!head_inst->isNop()) { + cpu->instDone(); + } + } else { + break; + } + } + + // Update the pointer to read the next instruction in the ROB. + head_inst = rob->readHeadInst(); + } + + DPRINTF(CommitRate, "%i\n", num_committed); + n_committed_dist.sample(num_committed); +} + +template <class Impl> +bool +SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) +{ + // Make sure instruction is valid + assert(head_inst); + + // If the instruction is not executed yet, then it is a non-speculative + // or store inst. Signal backwards that it should be executed. + if (!head_inst->isExecuted()) { + // Keep this number correct. We have not yet actually executed + // and committed this instruction. + cpu->funcExeInst--; + + if (head_inst->isNonSpeculative()) { + DPRINTF(Commit, "Commit: Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + head_inst->readPC()); + + toIEW->commitInfo.nonSpecSeqNum = head_inst->seqNum; + + // Change the instruction so it won't try to commit again until + // it is executed. + head_inst->clearCanCommit(); + + ++commitNonSpecStalls; + + return false; + } else { + panic("Commit: Trying to commit un-executed instruction " + "of unknown type!\n"); + } + } + + // Now check if it's one of the special trap or barrier or + // serializing instructions. + if (head_inst->isThreadSync() || + head_inst->isSerializing() || + head_inst->isMemBarrier() || + head_inst->isWriteBarrier() ) + { + // Not handled for now. Mem barriers and write barriers are safe + // to simply let commit as memory accesses only happen once they + // reach the head of commit. Not sure about the other two. + panic("Serializing or barrier instructions" + " are not handled yet.\n"); + } + + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = head_inst->getFault(); + + if (inst_fault != NoFault) { + if (!head_inst->isNop()) { +#if FULL_SYSTEM + cpu->trap(inst_fault); +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + head_inst->PC); +#endif // FULL_SYSTEM + } + } + + // Check if we're really ready to commit. If not then return false. + // I'm pretty sure all instructions should be able to commit if they've + // reached this far. For now leave this in as a check. + if (!rob->isHeadReady()) { + panic("Commit: Unable to commit head instruction!\n"); + return false; + } + + // If it's a branch, then send back branch prediction update info + // to the fetch stage. + // This should be handled in the iew stage if a mispredict happens... + + if (head_inst->isControl()) { + +#if 0 + toIEW->nextPC = head_inst->readPC(); + //Maybe switch over to BTB incorrect. + toIEW->btbMissed = head_inst->btbMiss(); + toIEW->target = head_inst->nextPC; + //Maybe also include global history information. + //This simple version will have no branch prediction however. +#endif + + ++commitCommittedBranches; + } + + // Now that the instruction is going to be committed, finalize its + // trace data. + if (head_inst->traceData) { + head_inst->traceData->finalize(); + } + + //Finally clear the head ROB entry. + rob->retireHead(); + + // Return true to indicate that we have committed an instruction. + return true; +} + +template <class Impl> +void +SimpleCommit<Impl>::getInsts() +{ + ////////////////////////////////////// + // Handle ROB functions + ////////////////////////////////////// + + // Read any issued instructions and place them into the ROB. Do this + // prior to squashing to avoid having instructions in the ROB that + // don't get squashed properly. + int insts_to_process = min((int)renameWidth, fromRename->size); + + for (int inst_num = 0; + inst_num < insts_to_process; + ++inst_num) + { + if (!fromRename->insts[inst_num]->isSquashed()) { + DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n", + fromRename->insts[inst_num]->readPC()); + rob->insertInst(fromRename->insts[inst_num]); + } else { + DPRINTF(Commit, "Commit: Instruction %i PC %#x was " + "squashed, skipping.\n", + fromRename->insts[inst_num]->seqNum, + fromRename->insts[inst_num]->readPC()); + } + } +} + +template <class Impl> +void +SimpleCommit<Impl>::markCompletedInsts() +{ + // Grab completed insts out of the IEW instruction queue, and mark + // instructions completed within the ROB. + for (int inst_num = 0; + inst_num < fromIEW->size && fromIEW->insts[inst_num]; + ++inst_num) + { + DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n", + fromIEW->insts[inst_num]->readPC(), + fromIEW->insts[inst_num]->seqNum); + + // Mark the instruction as ready to commit. + fromIEW->insts[inst_num]->setCanCommit(); + } +} + +template <class Impl> +uint64_t +SimpleCommit<Impl>::readCommitPC() +{ + return rob->readHeadPC(); +} diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc new file mode 100644 index 000000000..a268dbc23 --- /dev/null +++ b/src/cpu/o3/cpu.cc @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config/full_system.hh" + +#if FULL_SYSTEM +#include "sim/system.hh" +#else +#include "sim/process.hh" +#endif +#include "sim/root.hh" + +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/cpu.hh" + +using namespace std; + +BaseFullCPU::BaseFullCPU(Params ¶ms) + : BaseCPU(¶ms), cpu_id(0) +{ +} + +template <class Impl> +FullO3CPU<Impl>::TickEvent::TickEvent(FullO3CPU<Impl> *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::TickEvent::process() +{ + cpu->tick(); +} + +template <class Impl> +const char * +FullO3CPU<Impl>::TickEvent::description() +{ + return "FullO3CPU tick event"; +} + +//Call constructor to all the pipeline stages here +template <class Impl> +FullO3CPU<Impl>::FullO3CPU(Params ¶ms) +#if FULL_SYSTEM + : BaseFullCPU(params), +#else + : BaseFullCPU(params), +#endif // FULL_SYSTEM + tickEvent(this), + fetch(params), + decode(params), + rename(params), + iew(params), + commit(params), + + regFile(params.numPhysIntRegs, params.numPhysFloatRegs), + + freeList(TheISA::NumIntRegs, params.numPhysIntRegs, + TheISA::NumFloatRegs, params.numPhysFloatRegs), + + renameMap(TheISA::NumIntRegs, params.numPhysIntRegs, + TheISA::NumFloatRegs, params.numPhysFloatRegs, + TheISA::NumMiscRegs, + TheISA::ZeroReg, + TheISA::ZeroReg + TheISA::NumIntRegs), + + rob(params.numROBEntries, params.squashWidth), + + // What to pass to these time buffers? + // For now just have these time buffers be pretty big. + timeBuffer(5, 5), + fetchQueue(5, 5), + decodeQueue(5, 5), + renameQueue(5, 5), + iewQueue(5, 5), + + cpuXC(NULL), + + globalSeqNum(1), + +#if FULL_SYSTEM + system(params.system), + memCtrl(system->memctrl), + physmem(system->physmem), + itb(params.itb), + dtb(params.dtb), + mem(params.mem), +#else + // Hardcoded for a single thread!! + mem(params.workload[0]->getMemory()), +#endif // FULL_SYSTEM + + icacheInterface(params.icacheInterface), + dcacheInterface(params.dcacheInterface), + deferRegistration(params.defReg), + numInsts(0), + funcExeInst(0) +{ + _status = Idle; + +#if !FULL_SYSTEM + thread.resize(this->number_of_threads); +#endif + + for (int i = 0; i < this->number_of_threads; ++i) { +#if FULL_SYSTEM + assert(i == 0); + thread[i] = new CPUExecContext(this, 0, system, itb, dtb, mem); + system->execContexts[i] = thread[i]->getProxy(); + + execContexts.push_back(system->execContexts[i]); +#else + if (i < params.workload.size()) { + DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, " + "process is %#x", + i, params.workload[i]->prog_entry, thread[i]); + thread[i] = new CPUExecContext(this, i, params.workload[i], i); + } + assert(params.workload[i]->getMemory() != NULL); + assert(mem != NULL); + execContexts.push_back(thread[i]->getProxy()); +#endif // !FULL_SYSTEM + } + + // Note that this is a hack so that my code which still uses xc-> will + // still work. I should remove this eventually + cpuXC = thread[0]; + + // The stages also need their CPU pointer setup. However this must be + // done at the upper level CPU because they have pointers to the upper + // level CPU, and not this FullO3CPU. + + // Give each of the stages the time buffer they will use. + fetch.setTimeBuffer(&timeBuffer); + decode.setTimeBuffer(&timeBuffer); + rename.setTimeBuffer(&timeBuffer); + iew.setTimeBuffer(&timeBuffer); + commit.setTimeBuffer(&timeBuffer); + + // Also setup each of the stages' queues. + fetch.setFetchQueue(&fetchQueue); + decode.setFetchQueue(&fetchQueue); + decode.setDecodeQueue(&decodeQueue); + rename.setDecodeQueue(&decodeQueue); + rename.setRenameQueue(&renameQueue); + iew.setRenameQueue(&renameQueue); + iew.setIEWQueue(&iewQueue); + commit.setIEWQueue(&iewQueue); + commit.setRenameQueue(&renameQueue); + + // Setup the rename map for whichever stages need it. + rename.setRenameMap(&renameMap); + iew.setRenameMap(&renameMap); + + // Setup the free list for whichever stages need it. + rename.setFreeList(&freeList); + renameMap.setFreeList(&freeList); + + // Setup the ROB for whichever stages need it. + commit.setROB(&rob); +} + +template <class Impl> +FullO3CPU<Impl>::~FullO3CPU() +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::fullCPURegStats() +{ + // Register any of the FullCPU's stats here. +} + +template <class Impl> +void +FullO3CPU<Impl>::tick() +{ + DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n"); + + //Tick each of the stages if they're actually running. + //Will want to figure out a way to unschedule itself if they're all + //going to be idle for a long time. + fetch.tick(); + + decode.tick(); + + rename.tick(); + + iew.tick(); + + commit.tick(); + + // Now advance the time buffers, unless the stage is stalled. + timeBuffer.advance(); + + fetchQueue.advance(); + decodeQueue.advance(); + renameQueue.advance(); + iewQueue.advance(); + + if (_status == Running && !tickEvent.scheduled()) + tickEvent.schedule(curTick + 1); +} + +template <class Impl> +void +FullO3CPU<Impl>::init() +{ + if(!deferRegistration) + { + this->registerExecContexts(); + + // Need to do a copy of the xc->regs into the CPU's regfile so + // that it can start properly. +#if FULL_SYSTEM + ExecContext *src_xc = system->execContexts[0]; + TheISA::initCPU(src_xc, src_xc->readCpuId()); +#else + ExecContext *src_xc = thread[0]->getProxy(); +#endif + // First loop through the integer registers. + for (int i = 0; i < TheISA::NumIntRegs; ++i) + { + regFile.intRegFile[i] = src_xc->readIntReg(i); + } + + // Then loop through the floating point registers. + for (int i = 0; i < TheISA::NumFloatRegs; ++i) + { + regFile.floatRegFile.setRegBits(i, src_xc->readRegBits(i)) + } +/* + // Then loop through the misc registers. + regFile.miscRegs.fpcr = src_xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = src_xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = src_xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = src_xc->regs.miscRegs.lock_addr; +*/ + // Then finally set the PC and the next PC. + regFile.pc = src_xc->readPC(); + regFile.npc = src_xc->readNextPC(); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::activateContext(int thread_num, int delay) +{ + // Needs to set each stage to running as well. + + scheduleTickEvent(delay); + + _status = Running; +} + +template <class Impl> +void +FullO3CPU<Impl>::suspendContext(int thread_num) +{ + panic("suspendContext unimplemented!"); +} + +template <class Impl> +void +FullO3CPU<Impl>::deallocateContext(int thread_num) +{ + panic("deallocateContext unimplemented!"); +} + +template <class Impl> +void +FullO3CPU<Impl>::haltContext(int thread_num) +{ + panic("haltContext unimplemented!"); +} + +template <class Impl> +void +FullO3CPU<Impl>::switchOut() +{ + panic("FullO3CPU does not have a switch out function.\n"); +} + +template <class Impl> +void +FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) +{ + BaseCPU::takeOverFrom(oldCPU); + + assert(!tickEvent.scheduled()); + + // Set all status's to active, schedule the + // CPU's tick event. + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + if (xc->status() == ExecContext::Active && _status != Running) { + _status = Running; + tickEvent.schedule(curTick); + } + } +} + +template <class Impl> +InstSeqNum +FullO3CPU<Impl>::getAndIncrementInstSeq() +{ + // Hopefully this works right. + return globalSeqNum++; +} + +template <class Impl> +uint64_t +FullO3CPU<Impl>::readIntReg(int reg_idx) +{ + return regFile.readIntReg(reg_idx); +} + +template <class Impl> +FloatReg +FullO3CPU<Impl>::readFloatReg(int reg_idx, int width) +{ + return regFile.readFloatReg(reg_idx, width); +} + +template <class Impl> +FloatReg +FullO3CPU<Impl>::readFloatReg(int reg_idx) +{ + return regFile.readFloatReg(reg_idx); +} + +template <class Impl> +FloatRegBits +FullO3CPU<Impl>::readFloatRegBits(int reg_idx, int width) +{ + return regFile.readFloatRegBits(reg_idx, width); +} + +template <class Impl> +FloatRegBits +FullO3CPU<Impl>::readFloatRegBits(int reg_idx) +{ + return regFile.readFloatRegBits(reg_idx); +} + +template <class Impl> +void +FullO3CPU<Impl>::setIntReg(int reg_idx, uint64_t val) +{ + regFile.setIntReg(reg_idx, val); +} + +template <class Impl> +void +FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val, int width) +{ + regFile.setFloatReg(reg_idx, val, width); +} + +template <class Impl> +void +FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val) +{ + regFile.setFloatReg(reg_idx, val); +} + +template <class Impl> +void +FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val, int width) +{ + regFile.setFloatRegBits(reg_idx, val, width); +} + +template <class Impl> +void +FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val) +{ + regFile.setFloatRegBits(reg_idx, val); +} + +template <class Impl> +uint64_t +FullO3CPU<Impl>::readPC() +{ + return regFile.readPC(); +} + +template <class Impl> +void +FullO3CPU<Impl>::setNextPC(uint64_t val) +{ + regFile.setNextPC(val); +} + +template <class Impl> +void +FullO3CPU<Impl>::setPC(Addr new_PC) +{ + regFile.setPC(new_PC); +} + +template <class Impl> +void +FullO3CPU<Impl>::addInst(DynInstPtr &inst) +{ + instList.push_back(inst); +} + +template <class Impl> +void +FullO3CPU<Impl>::instDone() +{ + // Keep an instruction count. + numInsts++; + + // Check for instruction-count-based events. + comInstEventQueue[0]->serviceEvents(numInsts); +} + +template <class Impl> +void +FullO3CPU<Impl>::removeBackInst(DynInstPtr &inst) +{ + DynInstPtr inst_to_delete; + + // Walk through the instruction list, removing any instructions + // that were inserted after the given instruction, inst. + while (instList.back() != inst) + { + assert(!instList.empty()); + + // Obtain the pointer to the instruction. + inst_to_delete = instList.back(); + + DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", + inst_to_delete->seqNum, inst_to_delete->readPC()); + + // Remove the instruction from the list. + instList.pop_back(); + + // Mark it as squashed. + inst_to_delete->setSquashed(); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst) +{ + DynInstPtr inst_to_remove; + + // The front instruction should be the same one being asked to be removed. + assert(instList.front() == inst); + + // Remove the front instruction. + inst_to_remove = inst; + instList.pop_front(); + + DPRINTF(FullCPU, "FullCPU: Removing committed instruction %#x, PC %#x\n", + inst_to_remove, inst_to_remove->readPC()); +} + +template <class Impl> +void +FullO3CPU<Impl>::removeInstsNotInROB() +{ + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + "list.\n"); + + DynInstPtr rob_tail = rob.readTailInst(); + + removeBackInst(rob_tail); +} + +template <class Impl> +void +FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num) +{ + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + "list.\n"); + + DynInstPtr inst_to_delete; + + while (instList.back()->seqNum > seq_num) { + assert(!instList.empty()); + + // Obtain the pointer to the instruction. + inst_to_delete = instList.back(); + + DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", + inst_to_delete->seqNum, inst_to_delete->readPC()); + + // Remove the instruction from the list. + instList.back() = NULL; + instList.pop_back(); + + // Mark it as squashed. + inst_to_delete->setSquashed(); + } + +} + +template <class Impl> +void +FullO3CPU<Impl>::removeAllInsts() +{ + instList.clear(); +} + +template <class Impl> +void +FullO3CPU<Impl>::dumpInsts() +{ + int num = 0; + typename list<DynInstPtr>::iterator inst_list_it = instList.begin(); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\nPC:%#x\nSN:%lli\nIssued:%i\nSquashed:%i\n\n", + num, (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, + (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); + inst_list_it++; + ++num; + } +} + +template <class Impl> +void +FullO3CPU<Impl>::wakeDependents(DynInstPtr &inst) +{ + iew.wakeDependents(inst); +} + +// Forward declaration of FullO3CPU. +template class FullO3CPU<AlphaSimpleImpl>; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh new file mode 100644 index 000000000..f7c80e8a1 --- /dev/null +++ b/src/cpu/o3/cpu.hh @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +//Todo: Add in a lot of the functions that are ISA specific. Also define +//the functions that currently exist within the base cpu class. Define +//everything for the simobject stuff so it can be serialized and +//instantiated, add in debugging statements everywhere. Have CPU schedule +//itself properly. Threads! +// Avoid running stages and advancing queues if idle/stalled. + +#ifndef __CPU_O3_CPU_FULL_CPU_HH__ +#define __CPU_O3_CPU_FULL_CPU_HH__ + +#include <iostream> +#include <list> +#include <vector> + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/o3/comm.hh" +#include "cpu/o3/cpu_policy.hh" +#include "sim/process.hh" + +class ExecContext; +class FunctionalMemory; +class Process; + +class BaseFullCPU : public BaseCPU +{ + //Stuff that's pretty ISA independent will go here. + public: + typedef BaseCPU::Params Params; + +#if FULL_SYSTEM + BaseFullCPU(Params ¶ms); +#else + BaseFullCPU(Params ¶ms); +#endif // FULL_SYSTEM + + protected: + int cpu_id; +}; + +template <class Impl> +class FullO3CPU : public BaseFullCPU +{ + public: + //Put typedefs from the Impl here. + typedef typename Impl::CPUPol CPUPolicy; + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + public: + enum Status { + Running, + Idle, + Halted, + Blocked // ? + }; + + Status _status; + + private: + class TickEvent : public Event + { + private: + FullO3CPU<Impl> *cpu; + + public: + TickEvent(FullO3CPU<Impl> *c); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + /// Schedule tick event, regardless of its current state. + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) + tickEvent.reschedule(curTick + delay); + else if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + delay); + } + + /// Unschedule tick event, regardless of its current state. + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + public: + FullO3CPU(Params ¶ms); + ~FullO3CPU(); + + void fullCPURegStats(); + + void tick(); + + void init(); + + void activateContext(int thread_num, int delay); + void suspendContext(int thread_num); + void deallocateContext(int thread_num); + void haltContext(int thread_num); + + void switchOut(); + void takeOverFrom(BaseCPU *oldCPU); + + /** Get the current instruction sequence number, and increment it. */ + InstSeqNum getAndIncrementInstSeq(); + +#if FULL_SYSTEM + /** Check if this address is a valid instruction address. */ + bool validInstAddr(Addr addr) { return true; } + + /** Check if this address is a valid data address. */ + bool validDataAddr(Addr addr) { return true; } + + /** Get instruction asid. */ + int getInstAsid() + { return regFile.miscRegs.getInstAsid(); } + + /** Get data asid. */ + int getDataAsid() + { return regFile.miscRegs.getDataAsid(); } +#else + bool validInstAddr(Addr addr) + { return thread[0]->validInstAddr(addr); } + + bool validDataAddr(Addr addr) + { return thread[0]->validDataAddr(addr); } + + int getInstAsid() { return thread[0]->getInstAsid(); } + int getDataAsid() { return thread[0]->getDataAsid(); } + +#endif + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx); + + FloatReg readFloatReg(int reg_idx); + + FloatReg readFloatReg(int reg_idx, int width); + + FloatRegBits readFloatRegBits(int reg_idx); + + FloatRegBits readFloatRegBits(int reg_idx, int width); + + void setIntReg(int reg_idx, uint64_t val); + + void setFloatReg(int reg_idx, FloatReg val, int width); + + void setFloatReg(int reg_idx, FloatReg val, int width); + + void setFloatRegBits(int reg_idx, FloatRegBits val); + + void setFloatRegBits(int reg_idx, FloatRegBits val); + + uint64_t readPC(); + + void setNextPC(uint64_t val); + + void setPC(Addr new_PC); + + /** Function to add instruction onto the head of the list of the + * instructions. Used when new instructions are fetched. + */ + void addInst(DynInstPtr &inst); + + /** Function to tell the CPU that an instruction has completed. */ + void instDone(); + + /** Remove all instructions in back of the given instruction, but leave + * that instruction in the list. This is useful in a squash, when there + * are instructions in this list that don't exist in structures such as + * the ROB. The instruction doesn't have to be the last instruction in + * the list, but will be once this function completes. + * @todo: Remove only up until that inst? Squashed inst is most likely + * valid. + */ + void removeBackInst(DynInstPtr &inst); + + /** Remove an instruction from the front of the list. It is expected + * that there are no instructions in front of it (that is, none are older + * than the instruction being removed). Used when retiring instructions. + * @todo: Remove the argument to this function, and just have it remove + * last instruction once it's verified that commit has the same ordering + * as the instruction list. + */ + void removeFrontInst(DynInstPtr &inst); + + /** Remove all instructions that are not currently in the ROB. */ + void removeInstsNotInROB(); + + /** Remove all instructions younger than the given sequence number. */ + void removeInstsUntil(const InstSeqNum &seq_num); + + /** Remove all instructions from the list. */ + void removeAllInsts(); + + void dumpInsts(); + + /** Basically a wrapper function so that instructions executed at + * commit can tell the instruction queue that they have completed. + * Eventually this hack should be removed. + */ + void wakeDependents(DynInstPtr &inst); + + public: + /** List of all the instructions in flight. */ + list<DynInstPtr> instList; + + //not sure these should be private. + protected: + /** The fetch stage. */ + typename CPUPolicy::Fetch fetch; + + /** The fetch stage's status. */ + typename CPUPolicy::Fetch::Status fetchStatus; + + /** The decode stage. */ + typename CPUPolicy::Decode decode; + + /** The decode stage's status. */ + typename CPUPolicy::Decode::Status decodeStatus; + + /** The dispatch stage. */ + typename CPUPolicy::Rename rename; + + /** The dispatch stage's status. */ + typename CPUPolicy::Rename::Status renameStatus; + + /** The issue/execute/writeback stages. */ + typename CPUPolicy::IEW iew; + + /** The issue/execute/writeback stage's status. */ + typename CPUPolicy::IEW::Status iewStatus; + + /** The commit stage. */ + typename CPUPolicy::Commit commit; + + /** The fetch stage's status. */ + typename CPUPolicy::Commit::Status commitStatus; + + //Might want to just pass these objects in to the constructors of the + //appropriate stage. regFile is in iew, freeList in dispatch, renameMap + //in dispatch, and the rob in commit. + /** The register file. */ + typename CPUPolicy::RegFile regFile; + + /** The free list. */ + typename CPUPolicy::FreeList freeList; + + /** The rename map. */ + typename CPUPolicy::RenameMap renameMap; + + /** The re-order buffer. */ + typename CPUPolicy::ROB rob; + + public: + /** Typedefs from the Impl to get the structs that each of the + * time buffers should use. + */ + typedef typename CPUPolicy::TimeStruct TimeStruct; + + typedef typename CPUPolicy::FetchStruct FetchStruct; + + typedef typename CPUPolicy::DecodeStruct DecodeStruct; + + typedef typename CPUPolicy::RenameStruct RenameStruct; + + typedef typename CPUPolicy::IEWStruct IEWStruct; + + /** The main time buffer to do backwards communication. */ + TimeBuffer<TimeStruct> timeBuffer; + + /** The fetch stage's instruction queue. */ + TimeBuffer<FetchStruct> fetchQueue; + + /** The decode stage's instruction queue. */ + TimeBuffer<DecodeStruct> decodeQueue; + + /** The rename stage's instruction queue. */ + TimeBuffer<RenameStruct> renameQueue; + + /** The IEW stage's instruction queue. */ + TimeBuffer<IEWStruct> iewQueue; + + public: + /** The temporary exec context to support older accessors. */ + CPUExecContext *cpuXC; + + /** Temporary function to get pointer to exec context. */ + ExecContext *xcBase() + { + return thread[0]->getProxy(); + } + + CPUExecContext *cpuXCBase() + { + return thread[0]; + } + + InstSeqNum globalSeqNum; + +#if FULL_SYSTEM + System *system; + + MemoryController *memCtrl; + PhysicalMemory *physmem; + + AlphaITB *itb; + AlphaDTB *dtb; + +// SWContext *swCtx; +#endif + std::vector<CPUExecContext *> thread; + + FunctionalMemory *mem; + + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + bool deferRegistration; + + Counter numInsts; + + Counter funcExeInst; +}; + +#endif diff --git a/src/cpu/o3/cpu_policy.hh b/src/cpu/o3/cpu_policy.hh new file mode 100644 index 000000000..41f06f81b --- /dev/null +++ b/src/cpu/o3/cpu_policy.hh @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_CPU_POLICY_HH__ +#define __CPU_O3_CPU_CPU_POLICY_HH__ + +#include "cpu/o3/bpred_unit.hh" +#include "cpu/o3/free_list.hh" +#include "cpu/o3/inst_queue.hh" +#include "cpu/o3/ldstq.hh" +#include "cpu/o3/mem_dep_unit.hh" +#include "cpu/o3/regfile.hh" +#include "cpu/o3/rename_map.hh" +#include "cpu/o3/rob.hh" +#include "cpu/o3/store_set.hh" + +#include "cpu/o3/commit.hh" +#include "cpu/o3/decode.hh" +#include "cpu/o3/fetch.hh" +#include "cpu/o3/iew.hh" +#include "cpu/o3/rename.hh" + +#include "cpu/o3/comm.hh" + +template<class Impl> +struct SimpleCPUPolicy +{ + typedef TwobitBPredUnit<Impl> BPredUnit; + typedef PhysRegFile<Impl> RegFile; + typedef SimpleFreeList FreeList; + typedef SimpleRenameMap RenameMap; + typedef ROB<Impl> ROB; + typedef InstructionQueue<Impl> IQ; + typedef MemDepUnit<StoreSet, Impl> MemDepUnit; + typedef LDSTQ<Impl> LDSTQ; + + typedef SimpleFetch<Impl> Fetch; + typedef SimpleDecode<Impl> Decode; + typedef SimpleRename<Impl> Rename; + typedef SimpleIEW<Impl> IEW; + typedef SimpleCommit<Impl> Commit; + + /** The struct for communication between fetch and decode. */ + typedef SimpleFetchSimpleDecode<Impl> FetchStruct; + + /** The struct for communication between decode and rename. */ + typedef SimpleDecodeSimpleRename<Impl> DecodeStruct; + + /** The struct for communication between rename and IEW. */ + typedef SimpleRenameSimpleIEW<Impl> RenameStruct; + + /** The struct for communication between IEW and commit. */ + typedef SimpleIEWSimpleCommit<Impl> IEWStruct; + + /** The struct for communication within the IEW stage. */ + typedef IssueStruct<Impl> IssueStruct; + + /** The struct for all backwards communication. */ + typedef TimeBufStruct TimeStruct; + +}; + +#endif //__CPU_O3_CPU_CPU_POLICY_HH__ diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc new file mode 100644 index 000000000..290648318 --- /dev/null +++ b/src/cpu/o3/decode.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/decode_impl.hh" + +template class SimpleDecode<AlphaSimpleImpl>; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh new file mode 100644 index 000000000..5b9a0f822 --- /dev/null +++ b/src/cpu/o3/decode.hh @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_SIMPLE_DECODE_HH__ +#define __CPU_O3_CPU_SIMPLE_DECODE_HH__ + +#include <queue> + +#include "base/statistics.hh" +#include "base/timebuf.hh" + +template<class Impl> +class SimpleDecode +{ + private: + // Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; + + // Typedefs from the CPU policy. + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + public: + // The only time decode will become blocked is if dispatch becomes + // blocked, which means IQ or ROB is probably full. + enum Status { + Running, + Idle, + Squashing, + Blocked, + Unblocking + }; + + private: + // May eventually need statuses on a per thread basis. + Status _status; + + public: + SimpleDecode(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr); + + void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr); + + void tick(); + + void decode(); + + private: + inline bool fetchInstsValid(); + + void block(); + + inline void unblock(); + + void squash(DynInstPtr &inst); + + public: + // Might want to make squash a friend function. + void squash(); + + private: + // Interfaces to objects outside of decode. + /** CPU interface. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get rename's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + // Might not be the best name as not only fetch will read it. + typename TimeBuffer<TimeStruct>::wire toFetch; + + /** Decode instruction queue. */ + TimeBuffer<DecodeStruct> *decodeQueue; + + /** Wire used to write any information heading to rename. */ + typename TimeBuffer<DecodeStruct>::wire toRename; + + /** Fetch instruction queue interface. */ + TimeBuffer<FetchStruct> *fetchQueue; + + /** Wire to get fetch's output from fetch queue. */ + typename TimeBuffer<FetchStruct>::wire fromFetch; + + /** Skid buffer between fetch and decode. */ + std::queue<FetchStruct> skidBuffer; + + //Consider making these unsigned to avoid any confusion. + /** Rename to decode delay, in ticks. */ + unsigned renameToDecodeDelay; + + /** IEW to decode delay, in ticks. */ + unsigned iewToDecodeDelay; + + /** Commit to decode delay, in ticks. */ + unsigned commitToDecodeDelay; + + /** Fetch to decode delay, in ticks. */ + unsigned fetchToDecodeDelay; + + /** The width of decode, in instructions. */ + unsigned decodeWidth; + + /** The instruction that decode is currently on. It needs to have + * persistent state so that when a stall occurs in the middle of a + * group of instructions, it can restart at the proper instruction. + */ + unsigned numInst; + + Stats::Scalar<> decodeIdleCycles; + Stats::Scalar<> decodeBlockedCycles; + Stats::Scalar<> decodeUnblockCycles; + Stats::Scalar<> decodeSquashCycles; + Stats::Scalar<> decodeBranchMispred; + Stats::Scalar<> decodeControlMispred; + Stats::Scalar<> decodeDecodedInsts; + Stats::Scalar<> decodeSquashedInsts; +}; + +#endif // __CPU_O3_CPU_SIMPLE_DECODE_HH__ diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh new file mode 100644 index 000000000..463f0ddac --- /dev/null +++ b/src/cpu/o3/decode_impl.hh @@ -0,0 +1,425 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/decode.hh" + +template<class Impl> +SimpleDecode<Impl>::SimpleDecode(Params ¶ms) + : renameToDecodeDelay(params.renameToDecodeDelay), + iewToDecodeDelay(params.iewToDecodeDelay), + commitToDecodeDelay(params.commitToDecodeDelay), + fetchToDecodeDelay(params.fetchToDecodeDelay), + decodeWidth(params.decodeWidth), + numInst(0) +{ + DPRINTF(Decode, "Decode: decodeWidth=%i.\n", decodeWidth); + _status = Idle; +} + +template <class Impl> +void +SimpleDecode<Impl>::regStats() +{ + decodeIdleCycles + .name(name() + ".decodeIdleCycles") + .desc("Number of cycles decode is idle") + .prereq(decodeIdleCycles); + decodeBlockedCycles + .name(name() + ".decodeBlockedCycles") + .desc("Number of cycles decode is blocked") + .prereq(decodeBlockedCycles); + decodeUnblockCycles + .name(name() + ".decodeUnblockCycles") + .desc("Number of cycles decode is unblocking") + .prereq(decodeUnblockCycles); + decodeSquashCycles + .name(name() + ".decodeSquashCycles") + .desc("Number of cycles decode is squashing") + .prereq(decodeSquashCycles); + decodeBranchMispred + .name(name() + ".decodeBranchMispred") + .desc("Number of times decode detected a branch misprediction") + .prereq(decodeBranchMispred); + decodeControlMispred + .name(name() + ".decodeControlMispred") + .desc("Number of times decode detected an instruction incorrectly" + " predicted as a control") + .prereq(decodeControlMispred); + decodeDecodedInsts + .name(name() + ".decodeDecodedInsts") + .desc("Number of instructions handled by decode") + .prereq(decodeDecodedInsts); + decodeSquashedInsts + .name(name() + ".decodeSquashedInsts") + .desc("Number of squashed instructions handled by decode") + .prereq(decodeSquashedInsts); +} + +template<class Impl> +void +SimpleDecode<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Decode, "Decode: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template<class Impl> +void +SimpleDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(Decode, "Decode: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to write information back to fetch. + toFetch = timeBuffer->getWire(0); + + // Create wires to get information from proper places in time buffer. + fromRename = timeBuffer->getWire(-renameToDecodeDelay); + fromIEW = timeBuffer->getWire(-iewToDecodeDelay); + fromCommit = timeBuffer->getWire(-commitToDecodeDelay); +} + +template<class Impl> +void +SimpleDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr) +{ + DPRINTF(Decode, "Decode: Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to write information to proper place in decode queue. + toRename = decodeQueue->getWire(0); +} + +template<class Impl> +void +SimpleDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) +{ + DPRINTF(Decode, "Decode: Setting fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Setup wire to read information from fetch queue. + fromFetch = fetchQueue->getWire(-fetchToDecodeDelay); +} + +template<class Impl> +inline bool +SimpleDecode<Impl>::fetchInstsValid() +{ + return fromFetch->size > 0; +} + +template<class Impl> +void +SimpleDecode<Impl>::block() +{ + DPRINTF(Decode, "Decode: Blocking.\n"); + + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromFetch); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template<class Impl> +inline void +SimpleDecode<Impl>::unblock() +{ + DPRINTF(Decode, "Decode: Unblocking, going to remove " + "instructions from skid buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toFetch->decodeInfo.stall = true; + } else { + DPRINTF(Decode, "Decode: Finished unblocking.\n"); + _status = Running; + } +} + +// This squash is specifically for when Decode detects a PC-relative branch +// was predicted incorrectly. +template<class Impl> +void +SimpleDecode<Impl>::squash(DynInstPtr &inst) +{ + DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction " + "detected at decode.\n"); + Addr new_PC = inst->readNextPC(); + + toFetch->decodeInfo.branchMispredict = true; + toFetch->decodeInfo.doneSeqNum = inst->seqNum; + toFetch->decodeInfo.predIncorrect = true; + toFetch->decodeInfo.squash = true; + toFetch->decodeInfo.nextPC = new_PC; + toFetch->decodeInfo.branchTaken = true; + + // Set status to squashing. + _status = Squashing; + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) { + skidBuffer.pop(); + } + + // Squash instructions up until this one + // Slightly unrealistic! + cpu->removeInstsUntil(inst->seqNum); +} + +template<class Impl> +void +SimpleDecode<Impl>::squash() +{ + DPRINTF(Decode, "Decode: Squashing.\n"); + // Set status to squashing. + _status = Squashing; + + // Maybe advance the time buffer? Not sure what to do in the normal + // case. + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } +} + +template<class Impl> +void +SimpleDecode<Impl>::tick() +{ + // Decode should try to execute as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + if (_status != Blocked && _status != Squashing) { + DPRINTF(Decode, "Decode: Not blocked, so attempting to run " + "stage.\n"); + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(_status == Unblocking ? !skidBuffer.empty() : 1); + + decode(); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + if (_status == Unblocking) { + ++decodeUnblockCycles; + + if (fetchInstsValid()) { + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromFetch); + } + + unblock(); + } + } else if (_status == Blocked) { + ++decodeBlockedCycles; + + if (fetchInstsValid()) { + block(); + } + + if (!fromRename->renameInfo.stall && + !fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall) { + DPRINTF(Decode, "Decode: Stall signals cleared, going to " + "unblock.\n"); + _status = Unblocking; + + // Continue to tell previous stage to block until this + // stage is done unblocking. + toFetch->decodeInfo.stall = true; + } else { + DPRINTF(Decode, "Decode: Still blocked.\n"); + toFetch->decodeInfo.stall = true; + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + } + } else if (_status == Squashing) { + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + _status = Running; + } else if (fromCommit->commitInfo.squash) { + ++decodeSquashCycles; + + squash(); + } + } +} + +template<class Impl> +void +SimpleDecode<Impl>::decode() +{ + // Check time buffer if being told to squash. + if (fromCommit->commitInfo.squash) { + squash(); + return; + } + + // Check time buffer if being told to stall. + if (fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) { + block(); + return; + } + + // Check fetch queue to see if instructions are available. + // If no available instructions, do nothing, unless this stage is + // currently unblocking. + if (!fetchInstsValid() && _status != Unblocking) { + DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n"); + // Should I change the status to idle? + ++decodeIdleCycles; + return; + } + + // Might be better to use a base DynInst * instead? + DynInstPtr inst; + + unsigned to_rename_index = 0; + + int insts_available = _status == Unblocking ? + skidBuffer.front().size - numInst : + fromFetch->size; + + // Debug block... +#if 0 + if (insts_available) { + DPRINTF(Decode, "Decode: Instructions available.\n"); + } else { + if (_status == Unblocking && skidBuffer.empty()) { + DPRINTF(Decode, "Decode: No instructions available, skid buffer " + "empty.\n"); + } else if (_status != Unblocking && + !fromFetch->insts[0]) { + DPRINTF(Decode, "Decode: No instructions available, fetch queue " + "empty.\n"); + } else { + panic("Decode: No instructions available, unexpected condition!" + "\n"); + } + } +#endif + + while (insts_available > 0) + { + DPRINTF(Decode, "Decode: Sending instruction to rename.\n"); + + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : + fromFetch->insts[numInst]; + + DPRINTF(Decode, "Decode: Processing instruction %i with PC %#x\n", + inst->seqNum, inst->readPC()); + + if (inst->isSquashed()) { + DPRINTF(Decode, "Decode: Instruction %i with PC %#x is " + "squashed, skipping.\n", + inst->seqNum, inst->readPC()); + + ++decodeSquashedInsts; + + ++numInst; + --insts_available; + + continue; + } + + + // Also check if instructions have no source registers. Mark + // them as ready to issue at any time. Not sure if this check + // should exist here or at a later stage; however it doesn't matter + // too much for function correctness. + // Isn't this handled by the inst queue? + if (inst->numSrcRegs() == 0) { + inst->setCanIssue(); + } + + // This current instruction is valid, so add it into the decode + // queue. The next instruction may not be valid, so check to + // see if branches were predicted correctly. + toRename->insts[to_rename_index] = inst; + + ++(toRename->size); + + // Ensure that if it was predicted as a branch, it really is a + // branch. + if (inst->predTaken() && !inst->isControl()) { + panic("Instruction predicted as a branch!"); + + ++decodeControlMispred; + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst); + break; + } + + // Go ahead and compute any PC-relative branches. + + if (inst->isDirectCtrl() && inst->isUncondCtrl()) { + + inst->setNextPC(inst->branchTarget()); + + if (inst->mispredicted()) { + ++decodeBranchMispred; + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst); + break; + } + } + + // Normally can check if a direct branch has the right target + // addr (either the immediate, or the branch PC + 4) and redirect + // fetch if it's incorrect. + + // Increment which instruction we're looking at. + ++numInst; + ++to_rename_index; + ++decodeDecodedInsts; + + --insts_available; + } + + numInst = 0; +} diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc new file mode 100644 index 000000000..8ad5e6565 --- /dev/null +++ b/src/cpu/o3/fetch.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/fetch_impl.hh" + +template class SimpleFetch<AlphaSimpleImpl>; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh new file mode 100644 index 000000000..cc64800d9 --- /dev/null +++ b/src/cpu/o3/fetch.hh @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: SMT fetch, +// Add a way to get a stage's current status. + +#ifndef __CPU_O3_CPU_SIMPLE_FETCH_HH__ +#define __CPU_O3_CPU_SIMPLE_FETCH_HH__ + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/pc_event.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" + +/** + * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch + * will stall if there's an Icache miss, but otherwise assumes a one cycle + * Icache hit. + */ + +template <class Impl> +class SimpleFetch +{ + public: + /** Typedefs from Impl. */ + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename CPUPol::BPredUnit BPredUnit; + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + /** Typedefs from ISA. */ + typedef TheISA::MachInst MachInst; + + public: + enum Status { + Running, + Idle, + Squashing, + Blocked, + IcacheMissStall, + IcacheMissComplete + }; + + // May eventually need statuses on a per thread basis. + Status _status; + + bool stalled; + + public: + class CacheCompletionEvent : public Event + { + private: + SimpleFetch *fetch; + + public: + CacheCompletionEvent(SimpleFetch *_fetch); + + virtual void process(); + virtual const char *description(); + }; + + public: + /** SimpleFetch constructor. */ + SimpleFetch(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer); + + void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr); + + void processCacheCompletion(); + + private: + /** + * Looks up in the branch predictor to see if the next PC should be + * either next PC+=MachInst or a branch target. + * @param next_PC Next PC variable passed in by reference. It is + * expected to be set to the current PC; it will be updated with what + * the next PC will be. + * @return Whether or not a branch was predicted as taken. + */ + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC); + + /** + * Fetches the cache line that contains fetch_PC. Returns any + * fault that happened. Puts the data into the class variable + * cacheData. + * @param fetch_PC The PC address that is being fetched from. + * @return Any fault that occured. + */ + Fault fetchCacheLine(Addr fetch_PC); + + inline void doSquash(const Addr &new_PC); + + void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num); + + public: + // Figure out PC vs next PC and how it should be updated + void squash(const Addr &new_PC); + + void tick(); + + void fetch(); + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = TheISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + private: + /** Pointer to the FullCPU. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get decode's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromDecode; + + /** Wire to get rename's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Internal fetch instruction queue. */ + TimeBuffer<FetchStruct> *fetchQueue; + + //Might be annoying how this name is different than the queue. + /** Wire used to write any information heading to decode. */ + typename TimeBuffer<FetchStruct>::wire toDecode; + + /** Icache interface. */ + MemInterface *icacheInterface; + + /** BPredUnit. */ + BPredUnit branchPred; + + /** Memory request used to access cache. */ + MemReqPtr memReq; + + /** Decode to fetch delay, in ticks. */ + unsigned decodeToFetchDelay; + + /** Rename to fetch delay, in ticks. */ + unsigned renameToFetchDelay; + + /** IEW to fetch delay, in ticks. */ + unsigned iewToFetchDelay; + + /** Commit to fetch delay, in ticks. */ + unsigned commitToFetchDelay; + + /** The width of fetch in instructions. */ + unsigned fetchWidth; + + /** Cache block size. */ + int cacheBlkSize; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + /** The cache line being fetched. */ + uint8_t *cacheData; + + /** Size of instructions. */ + int instSize; + + /** Icache stall statistics. */ + Counter lastIcacheStall; + + Stats::Scalar<> icacheStallCycles; + Stats::Scalar<> fetchedInsts; + Stats::Scalar<> predictedBranches; + Stats::Scalar<> fetchCycles; + Stats::Scalar<> fetchSquashCycles; + Stats::Scalar<> fetchBlockedCycles; + Stats::Scalar<> fetchedCacheLines; + + Stats::Distribution<> fetch_nisn_dist; +}; + +#endif //__CPU_O3_CPU_SIMPLE_FETCH_HH__ diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh new file mode 100644 index 000000000..8029fc732 --- /dev/null +++ b/src/cpu/o3/fetch_impl.hh @@ -0,0 +1,617 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Remove this later; used only for debugging. +#define OPCODE(X) (X >> 26) & 0x3f + +#include "arch/isa_traits.hh" +#include "sim/byteswap.hh" +#include "cpu/exetrace.hh" +#include "mem/base_mem.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "cpu/o3/fetch.hh" + +#include "sim/root.hh" + +template<class Impl> +SimpleFetch<Impl>::CacheCompletionEvent +::CacheCompletionEvent(SimpleFetch *_fetch) + : Event(&mainEventQueue), + fetch(_fetch) +{ +} + +template<class Impl> +void +SimpleFetch<Impl>::CacheCompletionEvent::process() +{ + fetch->processCacheCompletion(); +} + +template<class Impl> +const char * +SimpleFetch<Impl>::CacheCompletionEvent::description() +{ + return "SimpleFetch cache completion event"; +} + +template<class Impl> +SimpleFetch<Impl>::SimpleFetch(Params ¶ms) + : icacheInterface(params.icacheInterface), + branchPred(params), + decodeToFetchDelay(params.decodeToFetchDelay), + renameToFetchDelay(params.renameToFetchDelay), + iewToFetchDelay(params.iewToFetchDelay), + commitToFetchDelay(params.commitToFetchDelay), + fetchWidth(params.fetchWidth) +{ + DPRINTF(Fetch, "Fetch: Fetch constructor called\n"); + + // Set status to idle. + _status = Idle; + + // Create a new memory request. + memReq = new MemReq(); + // Not sure of this parameter. I think it should be based on the + // thread number. +#if !FULL_SYSTEM + memReq->asid = 0; +#else + memReq->asid = 0; +#endif // FULL_SYSTEM + memReq->data = new uint8_t[64]; + + // Size of cache block. + cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + // Create mask to get rid of offset bits. + cacheBlkMask = (cacheBlkSize - 1); + + // Get the size of an instruction. + instSize = sizeof(MachInst); + + // Create space to store a cache line. + cacheData = new uint8_t[cacheBlkSize]; +} + +template <class Impl> +void +SimpleFetch<Impl>::regStats() +{ + icacheStallCycles + .name(name() + ".icacheStallCycles") + .desc("Number of cycles fetch is stalled on an Icache miss") + .prereq(icacheStallCycles); + + fetchedInsts + .name(name() + ".fetchedInsts") + .desc("Number of instructions fetch has processed") + .prereq(fetchedInsts); + predictedBranches + .name(name() + ".predictedBranches") + .desc("Number of branches that fetch has predicted taken") + .prereq(predictedBranches); + fetchCycles + .name(name() + ".fetchCycles") + .desc("Number of cycles fetch has run and was not squashing or" + " blocked") + .prereq(fetchCycles); + fetchSquashCycles + .name(name() + ".fetchSquashCycles") + .desc("Number of cycles fetch has spent squashing") + .prereq(fetchSquashCycles); + fetchBlockedCycles + .name(name() + ".fetchBlockedCycles") + .desc("Number of cycles fetch has spent blocked") + .prereq(fetchBlockedCycles); + fetchedCacheLines + .name(name() + ".fetchedCacheLines") + .desc("Number of cache lines fetched") + .prereq(fetchedCacheLines); + + fetch_nisn_dist + .init(/* base value */ 0, + /* last value */ fetchWidth, + /* bucket size */ 1) + .name(name() + ".FETCH:rate_dist") + .desc("Number of instructions fetched each cycle (Total)") + .flags(Stats::pdf) + ; + + branchPred.regStats(); +} + +template<class Impl> +void +SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n"); + cpu = cpu_ptr; + // This line will be removed eventually. + memReq->xc = cpu->xcBase(); +} + +template<class Impl> +void +SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) +{ + DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n"); + timeBuffer = time_buffer; + + // Create wires to get information from proper places in time buffer. + fromDecode = timeBuffer->getWire(-decodeToFetchDelay); + fromRename = timeBuffer->getWire(-renameToFetchDelay); + fromIEW = timeBuffer->getWire(-iewToFetchDelay); + fromCommit = timeBuffer->getWire(-commitToFetchDelay); +} + +template<class Impl> +void +SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) +{ + DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Create wire to write information to proper place in fetch queue. + toDecode = fetchQueue->getWire(0); +} + +template<class Impl> +void +SimpleFetch<Impl>::processCacheCompletion() +{ + DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n"); + + // Only change the status if it's still waiting on the icache access + // to return. + // Can keep track of how many cache accesses go unused due to + // misspeculation here. + if (_status == IcacheMissStall) + _status = IcacheMissComplete; +} + +template <class Impl> +bool +SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) +{ + // Do branch prediction check here. + // A bit of a misnomer...next_PC is actually the current PC until + // this function updates it. + bool predict_taken; + + if (!inst->isControl()) { + next_PC = next_PC + instSize; + inst->setPredTarg(next_PC); + return false; + } + + predict_taken = branchPred.predict(inst, next_PC); + + if (predict_taken) { + ++predictedBranches; + } + + return predict_taken; +} + +template <class Impl> +Fault +SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC) +{ + // Check if the instruction exists within the cache. + // If it does, then proceed on to read the instruction and the rest + // of the instructions in the cache line until either the end of the + // cache line or a predicted taken branch is encountered. + +#if FULL_SYSTEM + // Flag to say whether or not address is physical addr. + unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + + Fault fault = NoFault; + + // Align the fetch PC so it's at the start of a cache block. + fetch_PC = icacheBlockAlignPC(fetch_PC); + + // Setup the memReq to do a read of the first isntruction's address. + // Set the appropriate read size and flags as well. + memReq->cmd = Read; + memReq->reset(fetch_PC, cacheBlkSize, flags); + + // Translate the instruction request. + // Should this function be + // in the CPU class ? Probably...ITB/DTB should exist within the + // CPU. + + fault = cpu->translateInstReq(memReq); + + // In the case of faults, the fetch stage may need to stall and wait + // on what caused the fetch (ITB or Icache miss). + + // If translation was successful, attempt to read the first + // instruction. + if (fault == NoFault) { + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); + fault = cpu->mem->read(memReq, cacheData); + // This read may change when the mem interface changes. + + fetchedCacheLines++; + } + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (icacheInterface && fault == NoFault) { + DPRINTF(Fetch, "Fetch: Doing timing memory access.\n"); + memReq->completionEvent = NULL; + + memReq->time = curTick; + + MemAccessResult result = icacheInterface->access(memReq); + + // If the cache missed (in this model functional and timing + // memories are different), then schedule an event to wake + // up this stage once the cache miss completes. + if (result != MA_HIT && icacheInterface->doEvents()) { + memReq->completionEvent = new CacheCompletionEvent(this); + + // How does current model work as far as individual + // stages scheduling/unscheduling? + // Perhaps have only the main CPU scheduled/unscheduled, + // and have it choose what stages to run appropriately. + + DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n"); + _status = IcacheMissStall; + } + } + + return fault; +} + +template <class Impl> +inline void +SimpleFetch<Impl>::doSquash(const Addr &new_PC) +{ + DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); + + cpu->setNextPC(new_PC + instSize); + cpu->setPC(new_PC); + + // Clear the icache miss if it's outstanding. + if (_status == IcacheMissStall && icacheInterface) { + DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n"); + // @todo: Use an actual thread number here. + icacheInterface->squash(0); + } + + _status = Squashing; + + ++fetchSquashCycles; +} + +template<class Impl> +void +SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC, + const InstSeqNum &seq_num) +{ + DPRINTF(Fetch, "Fetch: Squashing from decode.\n"); + + doSquash(new_PC); + + // Tell the CPU to remove any instructions that are in flight between + // fetch and decode. + cpu->removeInstsUntil(seq_num); +} + +template <class Impl> +void +SimpleFetch<Impl>::squash(const Addr &new_PC) +{ + DPRINTF(Fetch, "Fetch: Squash from commit.\n"); + + doSquash(new_PC); + + // Tell the CPU to remove any instructions that are not in the ROB. + cpu->removeInstsNotInROB(); +} + +template<class Impl> +void +SimpleFetch<Impl>::tick() +{ + // Check squash signals from commit. + if (fromCommit->commitInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from commit.\n"); + + // In any case, squash. + squash(fromCommit->commitInfo.nextPC); + + // Also check if there's a mispredict that happened. + if (fromCommit->commitInfo.branchMispredict) { + branchPred.squash(fromCommit->commitInfo.doneSeqNum, + fromCommit->commitInfo.nextPC, + fromCommit->commitInfo.branchTaken); + } else { + branchPred.squash(fromCommit->commitInfo.doneSeqNum); + } + + return; + } else if (fromCommit->commitInfo.doneSeqNum) { + // Update the branch predictor if it wasn't a squashed instruction + // that was braodcasted. + branchPred.update(fromCommit->commitInfo.doneSeqNum); + } + + // Check ROB squash signals from commit. + if (fromCommit->commitInfo.robSquashing) { + DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); + + // Continue to squash. + _status = Squashing; + + ++fetchSquashCycles; + return; + } + + // Check squash signals from decode. + if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + + // Update the branch predictor. + if (fromDecode->decodeInfo.branchMispredict) { + branchPred.squash(fromDecode->decodeInfo.doneSeqNum, + fromDecode->decodeInfo.nextPC, + fromDecode->decodeInfo.branchTaken); + } else { + branchPred.squash(fromDecode->decodeInfo.doneSeqNum); + } + + if (_status != Squashing) { + // Squash unless we're already squashing? + squashFromDecode(fromDecode->decodeInfo.nextPC, + fromDecode->decodeInfo.doneSeqNum); + return; + } + } + + // Check if any of the stall signals are high. + if (fromDecode->decodeInfo.stall || + fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + // Block stage, regardless of current status. + + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); + DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " + "Commit: %i\n", + fromDecode->decodeInfo.stall, + fromRename->renameInfo.stall, + fromIEW->iewInfo.stall, + fromCommit->commitInfo.stall); + + _status = Blocked; + + ++fetchBlockedCycles; + return; + } else if (_status == Blocked) { + // Unblock stage if status is currently blocked and none of the + // stall signals are being held high. + _status = Running; + + ++fetchBlockedCycles; + return; + } + + // If fetch has reached this point, then there are no squash signals + // still being held high. Check if fetch is in the squashing state; + // if so, fetch can switch to running. + // Similarly, there are no blocked signals still being held high. + // Check if fetch is in the blocked state; if so, fetch can switch to + // running. + if (_status == Squashing) { + DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n"); + + // Switch status to running + _status = Running; + + ++fetchCycles; + + fetch(); + } else if (_status != IcacheMissStall) { + DPRINTF(Fetch, "Fetch: Running stage.\n"); + + ++fetchCycles; + + fetch(); + } +} + +template<class Impl> +void +SimpleFetch<Impl>::fetch() +{ + ////////////////////////////////////////// + // Start actual fetch + ////////////////////////////////////////// + + // The current PC. + Addr fetch_PC = cpu->readPC(); + + // Fault code for memory access. + Fault fault = NoFault; + + // If returning from the delay of a cache miss, then update the status + // to running, otherwise do the cache access. Possibly move this up + // to tick() function. + if (_status == IcacheMissComplete) { + DPRINTF(Fetch, "Fetch: Icache miss is complete.\n"); + + // Reset the completion event to NULL. + memReq->completionEvent = NULL; + + _status = Running; + } else { + DPRINTF(Fetch, "Fetch: Attempting to translate and read " + "instruction, starting at PC %08p.\n", + fetch_PC); + + fault = fetchCacheLine(fetch_PC); + } + + // If we had a stall due to an icache miss, then return. It'd + // be nicer if this were handled through the kind of fault that + // is returned by the function. + if (_status == IcacheMissStall) { + return; + } + + // As far as timing goes, the CPU will need to send an event through + // the MemReq in order to be woken up once the memory access completes. + // Probably have a status on a per thread basis so each thread can + // block independently and be woken up independently. + + Addr next_PC = fetch_PC; + InstSeqNum inst_seq; + MachInst inst; + unsigned offset = fetch_PC & cacheBlkMask; + unsigned fetched; + + if (fault == NoFault) { + // If the read of the first instruction was successful, then grab the + // instructions from the rest of the cache line and put them into the + // queue heading to decode. + + DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); + + ////////////////////////// + // Fetch first instruction + ////////////////////////// + + // Need to keep track of whether or not a predicted branch + // ended this fetch block. + bool predicted_branch = false; + + for (fetched = 0; + offset < cacheBlkSize && + fetched < fetchWidth && + !predicted_branch; + ++fetched) + { + + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); + + // Make sure this is a valid index. + assert(offset <= cacheBlkSize - instSize); + + // Get the instruction from the array of the cache line. + inst = gtoh(*reinterpret_cast<MachInst *> + (&cacheData[offset])); + + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC, + inst_seq, cpu); + + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", + inst_seq, instruction->readPC()); + + DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", + OPCODE(inst)); + + instruction->traceData = + Trace::getInstRecord(curTick, cpu->xcBase(), cpu, + instruction->staticInst, + instruction->readPC(), 0); + + predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); + + // Add instruction to the CPU's list of instructions. + cpu->addInst(instruction); + + // Write the instruction to the first slot in the queue + // that heads to decode. + toDecode->insts[fetched] = instruction; + + toDecode->size++; + + // Increment stat of fetched instructions. + ++fetchedInsts; + + // Move to the next instruction, unless we have a branch. + fetch_PC = next_PC; + + offset+= instSize; + } + + fetch_nisn_dist.sample(fetched); + } + + // Now that fetching is completed, update the PC to signify what the next + // cycle will be. Might want to move this to the beginning of this + // function so that the PC updates at the beginning of everything. + // Or might want to leave setting the PC to the main CPU, with fetch + // only changing the nextPC (will require correct determination of + // next PC). + if (fault == NoFault) { + DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC); + cpu->setPC(next_PC); + cpu->setNextPC(next_PC + instSize); + } else { + // If the issue was an icache miss, then we can just return and + // wait until it is handled. + if (_status == IcacheMissStall) { + return; + } + + // Handle the fault. + // This stage will not be able to continue until all the ROB + // slots are empty, at which point the fault can be handled. + // The only other way it can wake up is if a squash comes along + // and changes the PC. Not sure how to handle that case...perhaps + // have it handled by the upper level CPU class which peeks into the + // time buffer and sees if a squash comes along, in which case it + // changes the status. + + DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n"); + + _status = Blocked; +#if FULL_SYSTEM +// cpu->trap(fault); + // Send a signal to the ROB indicating that there's a trap from the + // fetch stage that needs to be handled. Need to indicate that + // there's a fault, and the fault type. +#else // !FULL_SYSTEM + fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC()); +#endif // FULL_SYSTEM + } +} diff --git a/src/cpu/o3/free_list.cc b/src/cpu/o3/free_list.cc new file mode 100644 index 000000000..6f0b4be1e --- /dev/null +++ b/src/cpu/o3/free_list.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/trace.hh" + +#include "cpu/o3/free_list.hh" + +SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs) +{ + DPRINTF(FreeList, "FreeList: Creating new free list object.\n"); + + // DEBUG stuff. + freeIntRegsScoreboard.resize(numPhysicalIntRegs); + + freeFloatRegsScoreboard.resize(numPhysicalRegs); + + for (PhysRegIndex i = 0; i < numLogicalIntRegs; ++i) { + freeIntRegsScoreboard[i] = 0; + } + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. + for (PhysRegIndex i = numLogicalIntRegs; + i < numPhysicalIntRegs; ++i) + { + freeIntRegs.push(i); + + freeIntRegsScoreboard[i] = 1; + } + + for (PhysRegIndex i = 0; i < numPhysicalIntRegs + numLogicalFloatRegs; + ++i) + { + freeFloatRegsScoreboard[i] = 0; + } + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. Because the + // float registers' indices start where the physical registers end, + // some math must be done to determine where the free registers start. + for (PhysRegIndex i = numPhysicalIntRegs + numLogicalFloatRegs; + i < numPhysicalRegs; ++i) + { + freeFloatRegs.push(i); + + freeFloatRegsScoreboard[i] = 1; + } +} + diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh new file mode 100644 index 000000000..0b85dba1e --- /dev/null +++ b/src/cpu/o3/free_list.hh @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_FREE_LIST_HH__ +#define __CPU_O3_CPU_FREE_LIST_HH__ + +#include <iostream> +#include <queue> + +#include "arch/isa_traits.hh" +#include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/o3/comm.hh" + +/** + * FreeList class that simply holds the list of free integer and floating + * point registers. Can request for a free register of either type, and + * also send back free registers of either type. This is a very simple + * class, but it should be sufficient for most implementations. Like all + * other classes, it assumes that the indices for the floating point + * registers starts after the integer registers end. Hence the variable + * numPhysicalIntRegs is logically equivalent to the baseFP dependency. + * Note that + * while this most likely should be called FreeList, the name "FreeList" + * is used in a typedef within the CPU Policy, and therefore no class + * can be named simply "FreeList". + * @todo: Give a better name to the base FP dependency. + */ +class SimpleFreeList +{ + private: + /** The list of free integer registers. */ + std::queue<PhysRegIndex> freeIntRegs; + + /** The list of free floating point registers. */ + std::queue<PhysRegIndex> freeFloatRegs; + + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Total number of physical registers. */ + int numPhysicalRegs; + + /** DEBUG stuff below. */ + std::vector<int> freeIntRegsScoreboard; + + std::vector<bool> freeFloatRegsScoreboard; + + public: + SimpleFreeList(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs); + + inline PhysRegIndex getIntReg(); + + inline PhysRegIndex getFloatReg(); + + inline void addReg(PhysRegIndex freed_reg); + + inline void addIntReg(PhysRegIndex freed_reg); + + inline void addFloatReg(PhysRegIndex freed_reg); + + bool hasFreeIntRegs() + { return !freeIntRegs.empty(); } + + bool hasFreeFloatRegs() + { return !freeFloatRegs.empty(); } + + int numFreeIntRegs() + { return freeIntRegs.size(); } + + int numFreeFloatRegs() + { return freeFloatRegs.size(); } +}; + +inline PhysRegIndex +SimpleFreeList::getIntReg() +{ + DPRINTF(Rename, "FreeList: Trying to get free integer register.\n"); + if (freeIntRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeIntRegs.front(); + + freeIntRegs.pop(); + + // DEBUG + assert(freeIntRegsScoreboard[free_reg]); + freeIntRegsScoreboard[free_reg] = 0; + + return(free_reg); +} + +inline PhysRegIndex +SimpleFreeList::getFloatReg() +{ + DPRINTF(Rename, "FreeList: Trying to get free float register.\n"); + if (freeFloatRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeFloatRegs.front(); + + freeFloatRegs.pop(); + + // DEBUG + assert(freeFloatRegsScoreboard[free_reg]); + freeFloatRegsScoreboard[free_reg] = 0; + + return(free_reg); +} + +inline void +SimpleFreeList::addReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing register %i.\n", freed_reg); + //Might want to add in a check for whether or not this register is + //already in there. A bit vector or something similar would be useful. + if (freed_reg < numPhysicalIntRegs) { + freeIntRegs.push(freed_reg); + + // DEBUG + assert(freeIntRegsScoreboard[freed_reg] == false); + freeIntRegsScoreboard[freed_reg] = 1; + } else if (freed_reg < numPhysicalRegs) { + freeFloatRegs.push(freed_reg); + + // DEBUG + assert(freeFloatRegsScoreboard[freed_reg] == false); + freeFloatRegsScoreboard[freed_reg] = 1; + } +} + +inline void +SimpleFreeList::addIntReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing int register %i.\n", freed_reg); + + // DEBUG + assert(!freeIntRegsScoreboard[freed_reg]); + freeIntRegsScoreboard[freed_reg] = 1; + + freeIntRegs.push(freed_reg); +} + +inline void +SimpleFreeList::addFloatReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing float register %i.\n", freed_reg); + + // DEBUG + assert(!freeFloatRegsScoreboard[freed_reg]); + freeFloatRegsScoreboard[freed_reg] = 1; + + freeFloatRegs.push(freed_reg); +} + +#endif // __CPU_O3_CPU_FREE_LIST_HH__ diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc new file mode 100644 index 000000000..45b5610e7 --- /dev/null +++ b/src/cpu/o3/iew.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/iew_impl.hh" +#include "cpu/o3/inst_queue.hh" + +template class SimpleIEW<AlphaSimpleImpl>; diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh new file mode 100644 index 000000000..1e370d4e6 --- /dev/null +++ b/src/cpu/o3/iew.hh @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +//Todo: Update with statuses. +//Need to handle delaying writes to the writeback bus if it's full at the +//given time. + +#ifndef __CPU_O3_CPU_SIMPLE_IEW_HH__ +#define __CPU_O3_CPU_SIMPLE_IEW_HH__ + +#include <queue> + +#include "config/full_system.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/o3/comm.hh" + +template<class Impl> +class SimpleIEW +{ + private: + //Typedefs from Impl + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename CPUPol::IQ IQ; + typedef typename CPUPol::RenameMap RenameMap; + typedef typename CPUPol::LDSTQ LDSTQ; + + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::IssueStruct IssueStruct; + + friend class Impl::FullCPU; + public: + enum Status { + Running, + Blocked, + Idle, + Squashing, + Unblocking + }; + + private: + Status _status; + Status _issueStatus; + Status _exeStatus; + Status _wbStatus; + + public: + class WritebackEvent : public Event { + private: + DynInstPtr inst; + SimpleIEW<Impl> *iewStage; + + public: + WritebackEvent(DynInstPtr &_inst, SimpleIEW<Impl> *_iew); + + virtual void process(); + virtual const char *description(); + }; + + public: + SimpleIEW(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr); + + void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); + + void setRenameMap(RenameMap *rm_ptr); + + void squash(); + + void squashDueToBranch(DynInstPtr &inst); + + void squashDueToMem(DynInstPtr &inst); + + void block(); + + inline void unblock(); + + void wakeDependents(DynInstPtr &inst); + + void instToCommit(DynInstPtr &inst); + + private: + void dispatchInsts(); + + void executeInsts(); + + public: + void tick(); + + void iew(); + + //Interfaces to objects inside and outside of IEW. + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer<TimeStruct>::wire toRename; + + /** Rename instruction queue interface. */ + TimeBuffer<RenameStruct> *renameQueue; + + /** Wire to get rename's output from rename queue. */ + typename TimeBuffer<RenameStruct>::wire fromRename; + + /** Issue stage queue. */ + TimeBuffer<IssueStruct> issueToExecQueue; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer<IssueStruct>::wire fromIssue; + + /** + * IEW stage time buffer. Holds ROB indices of instructions that + * can be marked as completed. + */ + TimeBuffer<IEWStruct> *iewQueue; + + /** Wire to write infromation heading to commit. */ + typename TimeBuffer<IEWStruct>::wire toCommit; + + //Will need internal queue to hold onto instructions coming from + //the rename stage in case of a stall. + /** Skid buffer between rename and IEW. */ + std::queue<RenameStruct> skidBuffer; + + protected: + /** Instruction queue. */ + IQ instQueue; + + LDSTQ ldstQueue; + +#if !FULL_SYSTEM + public: + void lsqWriteback(); +#endif + + private: + /** Pointer to rename map. Might not want this stage to directly + * access this though... + */ + RenameMap *renameMap; + + /** CPU interface. */ + FullCPU *cpu; + + private: + /** Commit to IEW delay, in ticks. */ + unsigned commitToIEWDelay; + + /** Rename to IEW delay, in ticks. */ + unsigned renameToIEWDelay; + + /** + * Issue to execute delay, in ticks. What this actually represents is + * the amount of time it takes for an instruction to wake up, be + * scheduled, and sent to a FU for execution. + */ + unsigned issueToExecuteDelay; + + /** Width of issue's read path, in instructions. The read path is both + * the skid buffer and the rename instruction queue. + * Note to self: is this really different than issueWidth? + */ + unsigned issueReadWidth; + + /** Width of issue, in instructions. */ + unsigned issueWidth; + + /** Width of execute, in instructions. Might make more sense to break + * down into FP vs int. + */ + unsigned executeWidth; + + /** Number of cycles stage has been squashing. Used so that the stage + * knows when it can start unblocking, which is when the previous stage + * has received the stall signal and clears up its outputs. + */ + unsigned cyclesSquashing; + + Stats::Scalar<> iewIdleCycles; + Stats::Scalar<> iewSquashCycles; + Stats::Scalar<> iewBlockCycles; + Stats::Scalar<> iewUnblockCycles; +// Stats::Scalar<> iewWBInsts; + Stats::Scalar<> iewDispatchedInsts; + Stats::Scalar<> iewDispSquashedInsts; + Stats::Scalar<> iewDispLoadInsts; + Stats::Scalar<> iewDispStoreInsts; + Stats::Scalar<> iewDispNonSpecInsts; + Stats::Scalar<> iewIQFullEvents; + Stats::Scalar<> iewExecutedInsts; + Stats::Scalar<> iewExecLoadInsts; + Stats::Scalar<> iewExecStoreInsts; + Stats::Scalar<> iewExecSquashedInsts; + Stats::Scalar<> memOrderViolationEvents; + Stats::Scalar<> predictedTakenIncorrect; +}; + +#endif // __CPU_O3_CPU_IEW_HH__ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh new file mode 100644 index 000000000..85217dd10 --- /dev/null +++ b/src/cpu/o3/iew_impl.hh @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// @todo: Fix the instantaneous communication among all the stages within +// iew. There's a clear delay between issue and execute, yet backwards +// communication happens simultaneously. +// Update the statuses for each stage. + +#include <queue> + +#include "base/timebuf.hh" +#include "cpu/o3/iew.hh" + +template<class Impl> +SimpleIEW<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, + SimpleIEW<Impl> *_iew) + : Event(&mainEventQueue, CPU_Tick_Pri), inst(_inst), iewStage(_iew) +{ + this->setFlags(Event::AutoDelete); +} + +template<class Impl> +void +SimpleIEW<Impl>::WritebackEvent::process() +{ + DPRINTF(IEW, "IEW: WRITEBACK EVENT!!!!\n"); + + // Need to insert instruction into queue to commit + iewStage->instToCommit(inst); + // Need to execute second half of the instruction, do actual writing to + // registers and such + inst->execute(); +} + +template<class Impl> +const char * +SimpleIEW<Impl>::WritebackEvent::description() +{ + return "LSQ writeback event"; +} + +template<class Impl> +SimpleIEW<Impl>::SimpleIEW(Params ¶ms) + : // Just make this time buffer really big for now + issueToExecQueue(5, 5), + instQueue(params), + ldstQueue(params), + commitToIEWDelay(params.commitToIEWDelay), + renameToIEWDelay(params.renameToIEWDelay), + issueToExecuteDelay(params.issueToExecuteDelay), + issueReadWidth(params.issueWidth), + issueWidth(params.issueWidth), + executeWidth(params.executeWidth) +{ + DPRINTF(IEW, "IEW: executeIntWidth: %i.\n", params.executeIntWidth); + _status = Idle; + _issueStatus = Idle; + _exeStatus = Idle; + _wbStatus = Idle; + + // Setup wire to read instructions coming from issue. + fromIssue = issueToExecQueue.getWire(-issueToExecuteDelay); + + // Instruction queue needs the queue between issue and execute. + instQueue.setIssueToExecuteQueue(&issueToExecQueue); + + ldstQueue.setIEW(this); +} + +template <class Impl> +void +SimpleIEW<Impl>::regStats() +{ + instQueue.regStats(); + + iewIdleCycles + .name(name() + ".iewIdleCycles") + .desc("Number of cycles IEW is idle"); + + iewSquashCycles + .name(name() + ".iewSquashCycles") + .desc("Number of cycles IEW is squashing"); + + iewBlockCycles + .name(name() + ".iewBlockCycles") + .desc("Number of cycles IEW is blocking"); + + iewUnblockCycles + .name(name() + ".iewUnblockCycles") + .desc("Number of cycles IEW is unblocking"); + +// iewWBInsts; + + iewDispatchedInsts + .name(name() + ".iewDispatchedInsts") + .desc("Number of instructions dispatched to IQ"); + + iewDispSquashedInsts + .name(name() + ".iewDispSquashedInsts") + .desc("Number of squashed instructions skipped by dispatch"); + + iewDispLoadInsts + .name(name() + ".iewDispLoadInsts") + .desc("Number of dispatched load instructions"); + + iewDispStoreInsts + .name(name() + ".iewDispStoreInsts") + .desc("Number of dispatched store instructions"); + + iewDispNonSpecInsts + .name(name() + ".iewDispNonSpecInsts") + .desc("Number of dispatched non-speculative instructions"); + + iewIQFullEvents + .name(name() + ".iewIQFullEvents") + .desc("Number of times the IQ has become full, causing a stall"); + + iewExecutedInsts + .name(name() + ".iewExecutedInsts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .name(name() + ".iewExecLoadInsts") + .desc("Number of load instructions executed"); + + iewExecStoreInsts + .name(name() + ".iewExecStoreInsts") + .desc("Number of store instructions executed"); + + iewExecSquashedInsts + .name(name() + ".iewExecSquashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + memOrderViolationEvents + .name(name() + ".memOrderViolationEvents") + .desc("Number of memory order violations"); + + predictedTakenIncorrect + .name(name() + ".predictedTakenIncorrect") + .desc("Number of branches that were predicted taken incorrectly"); +} + +template<class Impl> +void +SimpleIEW<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(IEW, "IEW: Setting CPU pointer.\n"); + cpu = cpu_ptr; + + instQueue.setCPU(cpu_ptr); + ldstQueue.setCPU(cpu_ptr); +} + +template<class Impl> +void +SimpleIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(IEW, "IEW: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from commit. + fromCommit = timeBuffer->getWire(-commitToIEWDelay); + + // Setup wire to write information back to previous stages. + toRename = timeBuffer->getWire(0); + + // Instruction queue also needs main time buffer. + instQueue.setTimeBuffer(tb_ptr); +} + +template<class Impl> +void +SimpleIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr) +{ + DPRINTF(IEW, "IEW: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to read information from rename queue. + fromRename = renameQueue->getWire(-renameToIEWDelay); +} + +template<class Impl> +void +SimpleIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) +{ + DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to write instructions to commit. + toCommit = iewQueue->getWire(0); +} + +template<class Impl> +void +SimpleIEW<Impl>::setRenameMap(RenameMap *rm_ptr) +{ + DPRINTF(IEW, "IEW: Setting rename map pointer.\n"); + renameMap = rm_ptr; +} + +template<class Impl> +void +SimpleIEW<Impl>::squash() +{ + DPRINTF(IEW, "IEW: Squashing all instructions.\n"); + _status = Squashing; + + // Tell the IQ to start squashing. + instQueue.squash(); + + // Tell the LDSTQ to start squashing. + ldstQueue.squash(fromCommit->commitInfo.doneSeqNum); +} + +template<class Impl> +void +SimpleIEW<Impl>::squashDueToBranch(DynInstPtr &inst) +{ + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", + inst->PC); + // Perhaps leave the squashing up to the ROB stage to tell it when to + // squash? + _status = Squashing; + + // Tell rename to squash through the time buffer. + toCommit->squash = true; + // Also send PC update information back to prior stages. + toCommit->squashedSeqNum = inst->seqNum; + toCommit->mispredPC = inst->readPC(); + toCommit->nextPC = inst->readNextPC(); + toCommit->branchMispredict = true; + // Prediction was incorrect, so send back inverse. + toCommit->branchTaken = inst->readNextPC() != + (inst->readPC() + sizeof(TheISA::MachInst)); +} + +template<class Impl> +void +SimpleIEW<Impl>::squashDueToMem(DynInstPtr &inst) +{ + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", + inst->PC); + // Perhaps leave the squashing up to the ROB stage to tell it when to + // squash? + _status = Squashing; + + // Tell rename to squash through the time buffer. + toCommit->squash = true; + // Also send PC update information back to prior stages. + toCommit->squashedSeqNum = inst->seqNum; + toCommit->nextPC = inst->readNextPC(); +} + +template<class Impl> +void +SimpleIEW<Impl>::block() +{ + DPRINTF(IEW, "IEW: Blocking.\n"); + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromRename); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template<class Impl> +inline void +SimpleIEW<Impl>::unblock() +{ + // Check if there's information in the skid buffer. If there is, then + // set status to unblocking, otherwise set it directly to running. + DPRINTF(IEW, "IEW: Reading instructions out of the skid " + "buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toRename->iewInfo.stall = true; + } else { + DPRINTF(IEW, "IEW: Stage is done unblocking.\n"); + _status = Running; + } +} + +template<class Impl> +void +SimpleIEW<Impl>::wakeDependents(DynInstPtr &inst) +{ + instQueue.wakeDependents(inst); +} + + +template<class Impl> +void +SimpleIEW<Impl>::instToCommit(DynInstPtr &inst) +{ + +} + +template <class Impl> +void +SimpleIEW<Impl>::dispatchInsts() +{ + //////////////////////////////////////// + // DISPATCH/ISSUE stage + //////////////////////////////////////// + + //Put into its own function? + //Add instructions to IQ if there are any instructions there + + // Check if there are any instructions coming from rename, and we're. + // not squashing. + if (fromRename->size > 0) { + int insts_to_add = fromRename->size; + + // Loop through the instructions, putting them in the instruction + // queue. + for (int inst_num = 0; inst_num < insts_to_add; ++inst_num) + { + DynInstPtr inst = fromRename->insts[inst_num]; + + // Make sure there's a valid instruction there. + assert(inst); + + DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n", + inst->readPC()); + + // Be sure to mark these instructions as ready so that the + // commit stage can go ahead and execute them, and mark + // them as issued so the IQ doesn't reprocess them. + if (inst->isSquashed()) { + ++iewDispSquashedInsts; + continue; + } else if (instQueue.isFull()) { + DPRINTF(IEW, "IEW: Issue: IQ has become full.\n"); + // Call function to start blocking. + block(); + // Tell previous stage to stall. + toRename->iewInfo.stall = true; + + ++iewIQFullEvents; + break; + } else if (inst->isLoad()) { + DPRINTF(IEW, "IEW: Issue: Memory instruction " + "encountered, adding to LDSTQ.\n"); + + // Reserve a spot in the load store queue for this + // memory access. + ldstQueue.insertLoad(inst); + + ++iewDispLoadInsts; + } else if (inst->isStore()) { + ldstQueue.insertStore(inst); + + ++iewDispStoreInsts; + } else if (inst->isNonSpeculative()) { + DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " + "encountered, skipping.\n"); + + // Same hack as with stores. + inst->setCanCommit(); + + // Specificall insert it as nonspeculative. + instQueue.insertNonSpec(inst); + + ++iewDispNonSpecInsts; + + continue; + } else if (inst->isNop()) { + DPRINTF(IEW, "IEW: Issue: Nop instruction encountered " + ", skipping.\n"); + + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + + continue; + } else if (inst->isExecuted()) { + assert(0 && "Instruction shouldn't be executed.\n"); + DPRINTF(IEW, "IEW: Issue: Executed branch encountered, " + "skipping.\n"); + + inst->setIssued(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + + continue; + } + + // If the instruction queue is not full, then add the + // instruction. + instQueue.insert(fromRename->insts[inst_num]); + + ++iewDispatchedInsts; + } + } +} + +template <class Impl> +void +SimpleIEW<Impl>::executeInsts() +{ + //////////////////////////////////////// + //EXECUTE/WRITEBACK stage + //////////////////////////////////////// + + //Put into its own function? + //Similarly should probably have separate execution for int vs FP. + // Above comment is handled by the issue queue only issuing a valid + // mix of int/fp instructions. + //Actually okay to just have one execution, buuuuuut will need + //somewhere that defines the execution latency of all instructions. + // @todo: Move to the FU pool used in the current full cpu. + + int fu_usage = 0; + bool fetch_redirect = false; + int inst_slot = 0; + int time_slot = 0; + + // Execute/writeback any instructions that are available. + for (int inst_num = 0; + fu_usage < executeWidth && /* Haven't exceeded available FU's. */ + inst_num < issueWidth && + fromIssue->insts[inst_num]; + ++inst_num) { + + DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n"); + + // Get instruction from issue's queue. + DynInstPtr inst = fromIssue->insts[inst_num]; + + DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC()); + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n"); + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + toCommit->insts[inst_num] = inst; + + ++iewExecSquashedInsts; + + continue; + } + + inst->setExecuted(); + + // If an instruction is executed, then count it towards FU usage. + ++fu_usage; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + if (inst->isMemRef()) { + DPRINTF(IEW, "IEW: Execute: Calculating address for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { + ldstQueue.executeLoad(inst); + + ++iewExecLoadInsts; + } else if (inst->isStore()) { + ldstQueue.executeStore(inst); + + ++iewExecStoreInsts; + } else { + panic("IEW: Unexpected memory type!\n"); + } + + } else { + inst->execute(); + + ++iewExecutedInsts; + } + + // First check the time slot that this instruction will write + // to. If there are free write ports at the time, then go ahead + // and write the instruction to that time. If there are not, + // keep looking back to see where's the first time there's a + // free slot. What happens if you run out of free spaces? + // For now naively assume that all instructions take one cycle. + // Otherwise would have to look into the time buffer based on the + // latency of the instruction. + (*iewQueue)[time_slot].insts[inst_slot]; + while ((*iewQueue)[time_slot].insts[inst_slot]) { + if (inst_slot < issueWidth) { + ++inst_slot; + } else { + ++time_slot; + inst_slot = 0; + } + + assert(time_slot < 5); + } + + // May actually have to work this out, especially with loads and stores + + // Add finished instruction to queue to commit. + (*iewQueue)[time_slot].insts[inst_slot] = inst; + (*iewQueue)[time_slot].size++; + + // Check if branch was correct. This check happens after the + // instruction is added to the queue because even if the branch + // is mispredicted, the branch instruction itself is still valid. + // Only handle this if there hasn't already been something that + // redirects fetch in this group of instructions. + if (!fetch_redirect) { + if (inst->mispredicted()) { + fetch_redirect = true; + + DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); + DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); + + // If incorrect, then signal the ROB that it must be squashed. + squashDueToBranch(inst); + + if (inst->predTaken()) { + predictedTakenIncorrect++; + } + } else if (ldstQueue.violation()) { + fetch_redirect = true; + + // Get the DynInst that caused the violation. + DynInstPtr violator = ldstQueue.getMemDepViolator(); + + DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Tell the instruction queue that a violation has occured. + instQueue.violation(inst, violator); + + // Squash. + squashDueToMem(inst); + + ++memOrderViolationEvents; + } + } + } +} + +template<class Impl> +void +SimpleIEW<Impl>::tick() +{ + // Considering putting all the state-determining stuff in this section. + + // Try to fill up issue queue with as many instructions as bandwidth + // allows. + // Decode should try to execute as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + + // Check if the stage is in a running status. + if (_status != Blocked && _status != Squashing) { + DPRINTF(IEW, "IEW: Status is not blocked, attempting to run " + "stage.\n"); + iew(); + + // If it's currently unblocking, check to see if it should switch + // to running. + if (_status == Unblocking) { + unblock(); + + ++iewUnblockCycles; + } + } else if (_status == Squashing) { + + DPRINTF(IEW, "IEW: Still squashing.\n"); + + // Check if stage should remain squashing. Stop squashing if the + // squash signal clears. + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + DPRINTF(IEW, "IEW: Done squashing, changing status to " + "running.\n"); + + _status = Running; + instQueue.stopSquash(); + } else { + instQueue.doSquash(); + } + + ++iewSquashCycles; + } else if (_status == Blocked) { + // Continue to tell previous stage to stall. + toRename->iewInfo.stall = true; + + // Check if possible stall conditions have cleared. + if (!fromCommit->commitInfo.stall && + !instQueue.isFull()) { + DPRINTF(IEW, "IEW: Stall signals cleared, going to unblock.\n"); + _status = Unblocking; + } + + // If there's still instructions coming from rename, continue to + // put them on the skid buffer. + if (fromRename->size == 0) { + block(); + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + } + + ++iewBlockCycles; + } + + // @todo: Maybe put these at the beginning, so if it's idle it can + // return early. + // Write back number of free IQ entries here. + toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries(); + + ldstQueue.writebackStores(); + + // Check the committed load/store signals to see if there's a load + // or store to commit. Also check if it's being told to execute a + // nonspeculative instruction. + // This is pretty inefficient... + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum); + ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum); + } + + if (fromCommit->commitInfo.nonSpecSeqNum != 0) { + instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum); + } + + DPRINTF(IEW, "IEW: IQ has %i free entries.\n", + instQueue.numFreeEntries()); +} + +template<class Impl> +void +SimpleIEW<Impl>::iew() +{ + // Might want to put all state checks in the tick() function. + // Check if being told to stall from commit. + if (fromCommit->commitInfo.stall) { + block(); + return; + } else if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + // Also check if commit is telling this stage to squash. + squash(); + return; + } + + dispatchInsts(); + + // Have the instruction queue try to schedule any ready instructions. + instQueue.scheduleReadyInsts(); + + executeInsts(); + + // Loop through the head of the time buffer and wake any dependents. + // These instructions are about to write back. In the simple model + // this loop can really happen within the previous loop, but when + // instructions have actual latencies, this loop must be separate. + // Also mark scoreboard that this instruction is finally complete. + // Either have IEW have direct access to rename map, or have this as + // part of backwards communication. + for (int inst_num = 0; inst_num < issueWidth && + toCommit->insts[inst_num]; inst_num++) + { + DynInstPtr inst = toCommit->insts[inst_num]; + + DPRINTF(IEW, "IEW: Sending instructions to commit, PC %#x.\n", + inst->readPC()); + + if(!inst->isSquashed()) { + instQueue.wakeDependents(inst); + + for (int i = 0; i < inst->numDestRegs(); i++) + { + renameMap->markAsReady(inst->renamedDestRegIdx(i)); + } + } + } + + // Also should advance its own time buffers if the stage ran. + // Not the best place for it, but this works (hopefully). + issueToExecQueue.advance(); +} + +#if !FULL_SYSTEM +template<class Impl> +void +SimpleIEW<Impl>::lsqWriteback() +{ + ldstQueue.writebackAllInsts(); +} +#endif diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc new file mode 100644 index 000000000..2ff2282b4 --- /dev/null +++ b/src/cpu/o3/inst_queue.cc @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/inst_queue_impl.hh" + +// Force instantiation of InstructionQueue. +template class InstructionQueue<AlphaSimpleImpl>; + +template<> +unsigned +InstructionQueue<AlphaSimpleImpl>::DependencyEntry::mem_alloc_counter = 0; diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh new file mode 100644 index 000000000..43fe96c49 --- /dev/null +++ b/src/cpu/o3/inst_queue.hh @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_INST_QUEUE_HH__ +#define __CPU_O3_CPU_INST_QUEUE_HH__ + +#include <list> +#include <map> +#include <queue> +#include <vector> + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "sim/host.hh" + +/** + * A standard instruction queue class. It holds ready instructions, in + * order, in seperate priority queues to facilitate the scheduling of + * instructions. The IQ uses a separate linked list to track dependencies. + * Similar to the rename map and the free list, it expects that + * floating point registers have their indices start after the integer + * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer + * and 96-191 are fp). This remains true even for both logical and + * physical register indices. + */ +template <class Impl> +class InstructionQueue +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + + typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + typedef typename Impl::CPUPol::TimeStruct TimeStruct; + + // Typedef of iterator through the list of instructions. Might be + // better to untie this from the FullCPU or pass its information to + // the stages. + typedef typename std::list<DynInstPtr>::iterator ListIt; + + /** + * Struct for comparing entries to be added to the priority queue. This + * gives reverse ordering to the instructions in terms of sequence + * numbers: the instructions with smaller sequence numbers (and hence + * are older) will be at the top of the priority queue. + */ + struct pqCompare + { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + + /** + * Struct for comparing entries to be added to the set. This gives + * standard ordering in terms of sequence numbers. + */ + struct setCompare + { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum < rhs->seqNum; + } + }; + + typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare> + ReadyInstQueue; + + InstructionQueue(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu); + + void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + unsigned numFreeEntries(); + + bool isFull(); + + void insert(DynInstPtr &new_inst); + + void insertNonSpec(DynInstPtr &new_inst); + + void advanceTail(DynInstPtr &inst); + + void scheduleReadyInsts(); + + void scheduleNonSpec(const InstSeqNum &inst); + + void wakeDependents(DynInstPtr &completed_inst); + + void violation(DynInstPtr &store, DynInstPtr &faulting_load); + + // Change this to take in the sequence number + void squash(); + + void doSquash(); + + void stopSquash(); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** The memory dependence unit, which tracks/predicts memory dependences + * between instructions. + */ + MemDepUnit memDepUnit; + + /** The queue to the execute stage. Issued instructions will be written + * into it. + */ + TimeBuffer<IssueStruct> *issueToExecuteQueue; + + /** The backwards time buffer. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to read information from timebuffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + enum InstList { + Int, + Float, + Branch, + Memory, + Misc, + Squashed, + None + }; + + /** List of ready int instructions. Used to keep track of the order in + * which instructions should issue. + */ + ReadyInstQueue readyIntInsts; + + /** List of ready floating point instructions. */ + ReadyInstQueue readyFloatInsts; + + /** List of ready branch instructions. */ + ReadyInstQueue readyBranchInsts; + + /** List of ready miscellaneous instructions. */ + ReadyInstQueue readyMiscInsts; + + /** List of squashed instructions (which are still valid and in IQ). + * Implemented using a priority queue; the entries must contain both + * the IQ index and sequence number of each instruction so that + * ordering based on sequence numbers can be used. + */ + ReadyInstQueue squashedInsts; + + /** List of non-speculative instructions that will be scheduled + * once the IQ gets a signal from commit. While it's redundant to + * have the key be a part of the value (the sequence number is stored + * inside of DynInst), when these instructions are woken up only + * the sequence number will be available. Thus it is most efficient to be + * able to search by the sequence number alone. + */ + std::map<InstSeqNum, DynInstPtr> nonSpecInsts; + + typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t; + + /** Number of free IQ entries left. */ + unsigned freeEntries; + + /** The number of entries in the instruction queue. */ + unsigned numEntries; + + /** The number of integer instructions that can be issued in one + * cycle. + */ + unsigned intWidth; + + /** The number of floating point instructions that can be issued + * in one cycle. + */ + unsigned floatWidth; + + /** The number of branches that can be issued in one cycle. */ + unsigned branchWidth; + + /** The number of memory instructions that can be issued in one cycle. */ + unsigned memoryWidth; + + /** The total number of instructions that can be issued in one cycle. */ + unsigned totalWidth; + + //The number of physical registers in the CPU. + unsigned numPhysRegs; + + /** The number of physical integer registers in the CPU. */ + unsigned numPhysIntRegs; + + /** The number of floating point registers in the CPU. */ + unsigned numPhysFloatRegs; + + /** Delay between commit stage and the IQ. + * @todo: Make there be a distinction between the delays within IEW. + */ + unsigned commitToIEWDelay; + + ////////////////////////////////// + // Variables needed for squashing + ////////////////////////////////// + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Iterator that points to the youngest instruction in the IQ. */ + ListIt tail; + + /** Iterator that points to the last instruction that has been squashed. + * This will not be valid unless the IQ is in the process of squashing. + */ + ListIt squashIt; + + /////////////////////////////////// + // Dependency graph stuff + /////////////////////////////////// + + class DependencyEntry + { + public: + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry *next; + + //This function, and perhaps this whole class, stand out a little + //bit as they don't fit a classification well. I want access + //to the underlying structure of the linked list, yet at + //the same time it feels like this should be something abstracted + //away. So for now it will sit here, within the IQ, until + //a better implementation is decided upon. + // This function probably shouldn't be within the entry... + void insert(DynInstPtr &new_inst); + + void remove(DynInstPtr &inst_to_remove); + + // Debug variable, remove when done testing. + static unsigned mem_alloc_counter; + }; + + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DependencyEntry *dependGraph; + + /** A cache of the recently woken registers. It is 1 if the register + * has been woken up recently, and 0 if the register has been added + * to the dependency graph and has not yet received its value. It + * is basically a secondary scoreboard, and should pretty much mirror + * the scoreboard that exists in the rename map. + */ + vector<bool> regScoreboard; + + bool addToDependents(DynInstPtr &new_inst); + void insertDependency(DynInstPtr &new_inst); + void createDependency(DynInstPtr &new_inst); + + void addIfReady(DynInstPtr &inst); + + private: + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); + + /** Debugging function to dump out the dependency graph. + */ + void dumpDependGraph(); + + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + + Stats::Scalar<> iqInstsAdded; + Stats::Scalar<> iqNonSpecInstsAdded; +// Stats::Scalar<> iqIntInstsAdded; + Stats::Scalar<> iqIntInstsIssued; +// Stats::Scalar<> iqFloatInstsAdded; + Stats::Scalar<> iqFloatInstsIssued; +// Stats::Scalar<> iqBranchInstsAdded; + Stats::Scalar<> iqBranchInstsIssued; +// Stats::Scalar<> iqMemInstsAdded; + Stats::Scalar<> iqMemInstsIssued; +// Stats::Scalar<> iqMiscInstsAdded; + Stats::Scalar<> iqMiscInstsIssued; + Stats::Scalar<> iqSquashedInstsIssued; + Stats::Scalar<> iqLoopSquashStalls; + Stats::Scalar<> iqSquashedInstsExamined; + Stats::Scalar<> iqSquashedOperandsExamined; + Stats::Scalar<> iqSquashedNonSpecRemoved; + +}; + +#endif //__CPU_O3_CPU_INST_QUEUE_HH__ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh new file mode 100644 index 000000000..048dc7c00 --- /dev/null +++ b/src/cpu/o3/inst_queue_impl.hh @@ -0,0 +1,1136 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: +// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake +// it; either do in reverse order, or have added instructions put into a +// different ready queue that, in scheduleRreadyInsts(), gets put onto the +// normal ready queue. This would however give only a one cycle delay, +// but probably is more flexible to actually add in a delay parameter than +// just running it backwards. + +#include <limits> +#include <vector> + +#include "sim/root.hh" + +#include "cpu/o3/inst_queue.hh" + +// Either compile error or max int due to sign extension. +// Hack to avoid compile warnings. +const InstSeqNum MaxInstSeqNum = std::numeric_limits<InstSeqNum>::max(); + +template <class Impl> +InstructionQueue<Impl>::InstructionQueue(Params ¶ms) + : memDepUnit(params), + numEntries(params.numIQEntries), + intWidth(params.executeIntWidth), + floatWidth(params.executeFloatWidth), + branchWidth(params.executeBranchWidth), + memoryWidth(params.executeMemoryWidth), + totalWidth(params.issueWidth), + numPhysIntRegs(params.numPhysIntRegs), + numPhysFloatRegs(params.numPhysFloatRegs), + commitToIEWDelay(params.commitToIEWDelay) +{ + // Initialize the number of free IQ entries. + freeEntries = numEntries; + + // Set the number of physical registers as the number of int + float + numPhysRegs = numPhysIntRegs + numPhysFloatRegs; + + DPRINTF(IQ, "IQ: There are %i physical registers.\n", numPhysRegs); + + //Create an entry for each physical register within the + //dependency graph. + dependGraph = new DependencyEntry[numPhysRegs]; + + // Resize the register scoreboard. + regScoreboard.resize(numPhysRegs); + + // Initialize all the head pointers to point to NULL, and all the + // entries as unready. + // Note that in actuality, the registers corresponding to the logical + // registers start off as ready. However this doesn't matter for the + // IQ as the instruction should have been correctly told if those + // registers are ready in rename. Thus it can all be initialized as + // unready. + for (int i = 0; i < numPhysRegs; ++i) + { + dependGraph[i].next = NULL; + dependGraph[i].inst = NULL; + regScoreboard[i] = false; + } + +} + +template <class Impl> +void +InstructionQueue<Impl>::regStats() +{ + iqInstsAdded + .name(name() + ".iqInstsAdded") + .desc("Number of instructions added to the IQ (excludes non-spec)") + .prereq(iqInstsAdded); + + iqNonSpecInstsAdded + .name(name() + ".iqNonSpecInstsAdded") + .desc("Number of non-speculative instructions added to the IQ") + .prereq(iqNonSpecInstsAdded); + +// iqIntInstsAdded; + + iqIntInstsIssued + .name(name() + ".iqIntInstsIssued") + .desc("Number of integer instructions issued") + .prereq(iqIntInstsIssued); + +// iqFloatInstsAdded; + + iqFloatInstsIssued + .name(name() + ".iqFloatInstsIssued") + .desc("Number of float instructions issued") + .prereq(iqFloatInstsIssued); + +// iqBranchInstsAdded; + + iqBranchInstsIssued + .name(name() + ".iqBranchInstsIssued") + .desc("Number of branch instructions issued") + .prereq(iqBranchInstsIssued); + +// iqMemInstsAdded; + + iqMemInstsIssued + .name(name() + ".iqMemInstsIssued") + .desc("Number of memory instructions issued") + .prereq(iqMemInstsIssued); + +// iqMiscInstsAdded; + + iqMiscInstsIssued + .name(name() + ".iqMiscInstsIssued") + .desc("Number of miscellaneous instructions issued") + .prereq(iqMiscInstsIssued); + + iqSquashedInstsIssued + .name(name() + ".iqSquashedInstsIssued") + .desc("Number of squashed instructions issued") + .prereq(iqSquashedInstsIssued); + + iqLoopSquashStalls + .name(name() + ".iqLoopSquashStalls") + .desc("Number of times issue loop had to restart due to squashed " + "inst; mainly for profiling") + .prereq(iqLoopSquashStalls); + + iqSquashedInstsExamined + .name(name() + ".iqSquashedInstsExamined") + .desc("Number of squashed instructions iterated over during squash;" + " mainly for profiling") + .prereq(iqSquashedInstsExamined); + + iqSquashedOperandsExamined + .name(name() + ".iqSquashedOperandsExamined") + .desc("Number of squashed operands that are examined and possibly " + "removed from graph") + .prereq(iqSquashedOperandsExamined); + + iqSquashedNonSpecRemoved + .name(name() + ".iqSquashedNonSpecRemoved") + .desc("Number of squashed non-spec instructions that were removed") + .prereq(iqSquashedNonSpecRemoved); + + // Tell mem dependence unit to reg stats as well. + memDepUnit.regStats(); +} + +template <class Impl> +void +InstructionQueue<Impl>::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + tail = cpu->instList.begin(); +} + +template <class Impl> +void +InstructionQueue<Impl>::setIssueToExecuteQueue( + TimeBuffer<IssueStruct> *i2e_ptr) +{ + DPRINTF(IQ, "IQ: Set the issue to execute queue.\n"); + issueToExecuteQueue = i2e_ptr; +} + +template <class Impl> +void +InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(IQ, "IQ: Set the time buffer.\n"); + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToIEWDelay); +} + +template <class Impl> +unsigned +InstructionQueue<Impl>::numFreeEntries() +{ + return freeEntries; +} + +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +template <class Impl> +bool +InstructionQueue<Impl>::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + +template <class Impl> +void +InstructionQueue<Impl>::insert(DynInstPtr &new_inst) +{ + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != new_inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + + // Decrease the number of free entries. + --freeEntries; + + // Look through its source registers (physical regs), and mark any + // dependencies. + addToDependents(new_inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (new_inst->isMemRef()) { + memDepUnit.insert(new_inst); + // Uh..forgot to look it up and put it on the proper dependency list + // if the instruction should not go yet. + } else { + // If the instruction is ready then add it to the ready list. + addIfReady(new_inst); + } + + ++iqInstsAdded; + + assert(freeEntries == (numEntries - countInsts())); +} + +template <class Impl> +void +InstructionQueue<Impl>::insertNonSpec(DynInstPtr &inst) +{ + nonSpecInsts[inst->seqNum] = inst; + + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + + // Make sure the instruction is valid + assert(inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + // Decrease the number of free entries. + --freeEntries; + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (inst->isMemRef()) { + memDepUnit.insertNonSpec(inst); + } + + ++iqNonSpecInstsAdded; +} + +// Slightly hack function to advance the tail iterator in the case that +// the IEW stage issues an instruction that is not added to the IQ. This +// is needed in case a long chain of such instructions occurs. +// I don't think this is used anymore. +template <class Impl> +void +InstructionQueue<Impl>::advanceTail(DynInstPtr &inst) +{ + // Make sure the instruction is valid + assert(inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + assert(freeEntries <= numEntries); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); +} + +// Need to make sure the number of float and integer instructions +// issued does not exceed the total issue bandwidth. +// @todo: Figure out a better way to remove the squashed items from the +// lists. Checking the top item of each list to see if it's squashed +// wastes time and forces jumps. +template <class Impl> +void +InstructionQueue<Impl>::scheduleReadyInsts() +{ + DPRINTF(IQ, "IQ: Attempting to schedule ready instructions from " + "the IQ.\n"); + + int int_issued = 0; + int float_issued = 0; + int branch_issued = 0; + int memory_issued = 0; + int squashed_issued = 0; + int total_issued = 0; + + IssueStruct *i2e_info = issueToExecuteQueue->access(0); + + bool insts_available = !readyBranchInsts.empty() || + !readyIntInsts.empty() || + !readyFloatInsts.empty() || + !memDepUnit.empty() || + !readyMiscInsts.empty() || + !squashedInsts.empty(); + + // Note: Requires a globally defined constant. + InstSeqNum oldest_inst = MaxInstSeqNum; + InstList list_with_oldest = None; + + // Temporary values. + DynInstPtr int_head_inst; + DynInstPtr float_head_inst; + DynInstPtr branch_head_inst; + DynInstPtr mem_head_inst; + DynInstPtr misc_head_inst; + DynInstPtr squashed_head_inst; + + // Somewhat nasty code to look at all of the lists where issuable + // instructions are located, and choose the oldest instruction among + // those lists. Consider a rewrite in the future. + while (insts_available && total_issued < totalWidth) + { + // Set this to false. Each if-block is required to set it to true + // if there were instructions available this check. This will cause + // this loop to run once more than necessary, but avoids extra calls. + insts_available = false; + + oldest_inst = MaxInstSeqNum; + + list_with_oldest = None; + + if (!readyIntInsts.empty() && + int_issued < intWidth) { + + insts_available = true; + + int_head_inst = readyIntInsts.top(); + + if (int_head_inst->isSquashed()) { + readyIntInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } + + oldest_inst = int_head_inst->seqNum; + + list_with_oldest = Int; + } + + if (!readyFloatInsts.empty() && + float_issued < floatWidth) { + + insts_available = true; + + float_head_inst = readyFloatInsts.top(); + + if (float_head_inst->isSquashed()) { + readyFloatInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (float_head_inst->seqNum < oldest_inst) { + oldest_inst = float_head_inst->seqNum; + + list_with_oldest = Float; + } + } + + if (!readyBranchInsts.empty() && + branch_issued < branchWidth) { + + insts_available = true; + + branch_head_inst = readyBranchInsts.top(); + + if (branch_head_inst->isSquashed()) { + readyBranchInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (branch_head_inst->seqNum < oldest_inst) { + oldest_inst = branch_head_inst->seqNum; + + list_with_oldest = Branch; + } + + } + + if (!memDepUnit.empty() && + memory_issued < memoryWidth) { + + insts_available = true; + + mem_head_inst = memDepUnit.top(); + + if (mem_head_inst->isSquashed()) { + memDepUnit.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (mem_head_inst->seqNum < oldest_inst) { + oldest_inst = mem_head_inst->seqNum; + + list_with_oldest = Memory; + } + } + + if (!readyMiscInsts.empty()) { + + insts_available = true; + + misc_head_inst = readyMiscInsts.top(); + + if (misc_head_inst->isSquashed()) { + readyMiscInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (misc_head_inst->seqNum < oldest_inst) { + oldest_inst = misc_head_inst->seqNum; + + list_with_oldest = Misc; + } + } + + if (!squashedInsts.empty()) { + + insts_available = true; + + squashed_head_inst = squashedInsts.top(); + + if (squashed_head_inst->seqNum < oldest_inst) { + list_with_oldest = Squashed; + } + + } + + DynInstPtr issuing_inst = NULL; + + switch (list_with_oldest) { + case None: + DPRINTF(IQ, "IQ: Not able to schedule any instructions. Issuing " + "inst is %#x.\n", issuing_inst); + break; + + case Int: + issuing_inst = int_head_inst; + readyIntInsts.pop(); + ++int_issued; + DPRINTF(IQ, "IQ: Issuing integer instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Float: + issuing_inst = float_head_inst; + readyFloatInsts.pop(); + ++float_issued; + DPRINTF(IQ, "IQ: Issuing float instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Branch: + issuing_inst = branch_head_inst; + readyBranchInsts.pop(); + ++branch_issued; + DPRINTF(IQ, "IQ: Issuing branch instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Memory: + issuing_inst = mem_head_inst; + + memDepUnit.pop(); + ++memory_issued; + DPRINTF(IQ, "IQ: Issuing memory instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Misc: + issuing_inst = misc_head_inst; + readyMiscInsts.pop(); + + ++iqMiscInstsIssued; + + DPRINTF(IQ, "IQ: Issuing a miscellaneous instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Squashed: + assert(0 && "Squashed insts should not issue any more!"); + squashedInsts.pop(); + // Set the squashed instruction as able to commit so that commit + // can just drop it from the ROB. This is a bit faked. + ++squashed_issued; + ++freeEntries; + + DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n", + squashed_head_inst->readPC()); + break; + } + + if (list_with_oldest != None && list_with_oldest != Squashed) { + i2e_info->insts[total_issued] = issuing_inst; + i2e_info->size++; + + issuing_inst->setIssued(); + + ++freeEntries; + ++total_issued; + } + + assert(freeEntries == (numEntries - countInsts())); + } + + iqIntInstsIssued += int_issued; + iqFloatInstsIssued += float_issued; + iqBranchInstsIssued += branch_issued; + iqMemInstsIssued += memory_issued; + iqSquashedInstsIssued += squashed_issued; +} + +template <class Impl> +void +InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst) +{ + DPRINTF(IQ, "IQ: Marking nonspeculative instruction with sequence " + "number %i as ready to execute.\n", inst); + + non_spec_it_t inst_it = nonSpecInsts.find(inst); + + assert(inst_it != nonSpecInsts.end()); + + // Mark this instruction as ready to issue. + (*inst_it).second->setCanIssue(); + + // Now schedule the instruction. + if (!(*inst_it).second->isMemRef()) { + addIfReady((*inst_it).second); + } else { + memDepUnit.nonSpecInstReady((*inst_it).second); + } + + nonSpecInsts.erase(inst_it); +} + +template <class Impl> +void +InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst) +{ + DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n"); + //Look at the physical destination register of the DynInst + //and look it up on the dependency graph. Then mark as ready + //any instructions within the instruction queue. + DependencyEntry *curr; + + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. + + if (completed_inst->isMemRef()) { + memDepUnit.wakeDependents(completed_inst); + } + + for (int dest_reg_idx = 0; + dest_reg_idx < completed_inst->numDestRegs(); + dest_reg_idx++) + { + PhysRegIndex dest_reg = + completed_inst->renamedDestRegIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + // @todo Figure out a cleaner way to handle this. + if (dest_reg >= numPhysRegs) { + continue; + } + + DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n", + (int) dest_reg); + + //Maybe abstract this part into a function. + //Go through the dependency chain, marking the registers as ready + //within the waiting instructions. + while (dependGraph[dest_reg].next) { + + curr = dependGraph[dest_reg].next; + + DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n", + curr->inst->readPC()); + + // Might want to give more information to the instruction + // so that it knows which of its source registers is ready. + // However that would mean that the dependency graph entries + // would need to hold the src_reg_idx. + curr->inst->markSrcRegReady(); + + addIfReady(curr->inst); + + dependGraph[dest_reg].next = curr->next; + + DependencyEntry::mem_alloc_counter--; + + curr->inst = NULL; + + delete curr; + } + + // Reset the head node now that all of its dependents have been woken + // up. + dependGraph[dest_reg].next = NULL; + dependGraph[dest_reg].inst = NULL; + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg] = true; + } +} + +template <class Impl> +void +InstructionQueue<Impl>::violation(DynInstPtr &store, + DynInstPtr &faulting_load) +{ + memDepUnit.violation(store, faulting_load); +} + +template <class Impl> +void +InstructionQueue<Impl>::squash() +{ + DPRINTF(IQ, "IQ: Starting to squash instructions in the IQ.\n"); + + // Read instruction sequence number of last instruction out of the + // time buffer. + squashedSeqNum = fromCommit->commitInfo.doneSeqNum; + + // Setup the squash iterator to point to the tail. + squashIt = tail; + + // Call doSquash if there are insts in the IQ + if (freeEntries != numEntries) { + doSquash(); + } + + // Also tell the memory dependence unit to squash. + memDepUnit.squash(squashedSeqNum); +} + +template <class Impl> +void +InstructionQueue<Impl>::doSquash() +{ + // Make sure the squash iterator isn't pointing to nothing. + assert(squashIt != cpu->instList.end()); + // Make sure the squashed sequence number is valid. + assert(squashedSeqNum != 0); + + DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n"); + + // Squash any instructions younger than the squashed sequence number + // given. + while ((*squashIt)->seqNum > squashedSeqNum) { + DynInstPtr squashed_inst = (*squashIt); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (!squashed_inst->isIssued() && + !squashed_inst->isSquashedInIQ()) { + + // Remove the instruction from the dependency list. + // Hack for now: These below don't add themselves to the + // dependency list, so don't try to remove them. + if (!squashed_inst->isNonSpeculative()/* && + !squashed_inst->isStore()*/ + ) { + + for (int src_reg_idx = 0; + src_reg_idx < squashed_inst->numSrcRegs(); + src_reg_idx++) + { + PhysRegIndex src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + + // Only remove it from the dependency graph if it was + // placed there in the first place. + // HACK: This assumes that instructions woken up from the + // dependency chain aren't informed that a specific src + // register has become ready. This may not always be true + // in the future. + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && + src_reg < numPhysRegs) { + dependGraph[src_reg].remove(squashed_inst); + } + + ++iqSquashedOperandsExamined; + } + + // Might want to remove producers as well. + } else { + nonSpecInsts[squashed_inst->seqNum] = NULL; + + nonSpecInsts.erase(squashed_inst->seqNum); + + ++iqSquashedNonSpecRemoved; + } + + // Might want to also clear out the head of the dependency graph. + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + +// squashedInsts.push(squashed_inst); + squashed_inst->setIssued(); + squashed_inst->setCanCommit(); + + ++freeEntries; + + DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", + squashed_inst->readPC()); + } + + --squashIt; + ++iqSquashedInstsExamined; + } + + assert(freeEntries <= numEntries); + + if (freeEntries == numEntries) { + tail = cpu->instList.end(); + } + +} + +template <class Impl> +void +InstructionQueue<Impl>::stopSquash() +{ + // Clear up the squash variables to ensure that squashing doesn't + // get called improperly. + squashedSeqNum = 0; + + squashIt = cpu->instList.end(); +} + +template <class Impl> +void +InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DependencyEntry *new_entry = new DependencyEntry; + new_entry->next = this->next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + this->next = new_entry; + + ++mem_alloc_counter; +} + +template <class Impl> +void +InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove) +{ + DependencyEntry *prev = this; + DependencyEntry *curr = this->next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + // Find the instruction to remove within the dependency linked list. + while(curr->inst != inst_to_remove) + { + prev = curr; + curr = curr->next; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --mem_alloc_counter; + + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + + delete curr; +} + +template <class Impl> +bool +InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) +{ + // Loop through the instruction's source registers, adding + // them to the dependency list if they are not ready. + int8_t total_src_regs = new_inst->numSrcRegs(); + bool return_val = false; + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only add it to the dependency graph if it's not ready. + if (!new_inst->isReadySrcRegIdx(src_reg_idx)) { + PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx); + + // Check the IQ's scoreboard to make sure the register + // hasn't become ready while the instruction was in flight + // between stages. Only if it really isn't ready should + // it be added to the dependency graph. + if (src_reg >= numPhysRegs) { + continue; + } else if (regScoreboard[src_reg] == false) { + DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " + "is being added to the dependency chain.\n", + new_inst->readPC(), src_reg); + + dependGraph[src_reg].insert(new_inst); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + } else { + DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " + "became ready before it reached the IQ.\n", + new_inst->readPC(), src_reg); + // Mark a register ready within the instruction. + new_inst->markSrcRegReady(); + } + } + } + + return return_val; +} + +template <class Impl> +void +InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst) +{ + //Actually nothing really needs to be marked when an + //instruction becomes the producer of a register's value, + //but for convenience a ptr to the producing instruction will + //be placed in the head node of the dependency links. + int8_t total_dest_regs = new_inst->numDestRegs(); + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + + // Instructions that use the misc regs will have a reg number + // higher than the normal physical registers. In this case these + // registers are not renamed, and there is no need to track + // dependencies as these instructions must be executed at commit. + if (dest_reg >= numPhysRegs) { + continue; + } + + dependGraph[dest_reg].inst = new_inst; + + if (dependGraph[dest_reg].next) { + dumpDependGraph(); + panic("IQ: Dependency graph not empty!"); + } + + // Mark the scoreboard to say it's not yet ready. + regScoreboard[dest_reg] = false; + } +} + +template <class Impl> +void +InstructionQueue<Impl>::addIfReady(DynInstPtr &inst) +{ + //If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + if (inst->readyToIssue()) { + + //Add the instruction to the proper ready list. + if (inst->isControl()) { + + DPRINTF(IQ, "IQ: Branch instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyBranchInsts.push(inst); + + } else if (inst->isMemRef()) { + + DPRINTF(IQ, "IQ: Checking if memory instruction can issue.\n"); + + // Message to the mem dependence unit that this instruction has + // its registers ready. + + memDepUnit.regsReady(inst); + +#if 0 + if (memDepUnit.readyToIssue(inst)) { + DPRINTF(IQ, "IQ: Memory instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyMemInsts.push(inst); + } else { + // Make dependent on the store. + // Will need some way to get the store instruction it should + // be dependent upon; then when the store issues it can + // put the instruction on the ready list. + // Yet another tree? + assert(0 && "Instruction has no way to actually issue"); + } +#endif + + } else if (inst->isInteger()) { + + DPRINTF(IQ, "IQ: Integer instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyIntInsts.push(inst); + + } else if (inst->isFloating()) { + + DPRINTF(IQ, "IQ: Floating instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyFloatInsts.push(inst); + + } else { + DPRINTF(IQ, "IQ: Miscellaneous instruction is ready to issue, " + "putting it onto the ready list, PC %#x..\n", + inst->readPC()); + + readyMiscInsts.push(inst); + } + } +} + +/* + * Caution, this function must not be called prior to tail being updated at + * least once, otherwise it will fail the assertion. This is because + * instList.begin() actually changes upon the insertion of an element into the + * list when the list is empty. + */ +template <class Impl> +int +InstructionQueue<Impl>::countInsts() +{ + ListIt count_it = cpu->instList.begin(); + int total_insts = 0; + + if (tail == cpu->instList.end()) + return 0; + + while (count_it != tail) { + if (!(*count_it)->isIssued()) { + ++total_insts; + } + + ++count_it; + + assert(count_it != cpu->instList.end()); + } + + // Need to count the tail iterator as well. + if (count_it != cpu->instList.end() && + (*count_it) && + !(*count_it)->isIssued()) { + ++total_insts; + } + + return total_insts; +} + +template <class Impl> +void +InstructionQueue<Impl>::dumpDependGraph() +{ + DependencyEntry *curr; + + for (int i = 0; i < numPhysRegs; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x consumer: ", i, + curr->inst->readPC()); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x ", curr->inst->readPC()); + } + + cprintf("\n"); + } +} + +template <class Impl> +void +InstructionQueue<Impl>::dumpLists() +{ + cprintf("Ready integer list size: %i\n", readyIntInsts.size()); + + cprintf("Ready float list size: %i\n", readyFloatInsts.size()); + + cprintf("Ready branch list size: %i\n", readyBranchInsts.size()); + + cprintf("Ready misc list size: %i\n", readyMiscInsts.size()); + + cprintf("Squashed list size: %i\n", squashedInsts.size()); + + cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); + + non_spec_it_t non_spec_it = nonSpecInsts.begin(); + + cprintf("Non speculative list: "); + + while (non_spec_it != nonSpecInsts.end()) { + cprintf("%#x ", (*non_spec_it).second->readPC()); + ++non_spec_it; + } + + cprintf("\n"); + +} diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc new file mode 100644 index 000000000..9c1e7f9d8 --- /dev/null +++ b/src/cpu/o3/mem_dep_unit.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/store_set.hh" +#include "cpu/o3/mem_dep_unit_impl.hh" + +// Force instantation of memory dependency unit using store sets and +// AlphaSimpleImpl. +template class MemDepUnit<StoreSet, AlphaSimpleImpl>; diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh new file mode 100644 index 000000000..ca63577a1 --- /dev/null +++ b/src/cpu/o3/mem_dep_unit.hh @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_MEM_DEP_UNIT_HH__ +#define __CPU_O3_CPU_MEM_DEP_UNIT_HH__ + +#include <map> +#include <set> + +#include "base/statistics.hh" +#include "cpu/inst_seq.hh" + +/** + * Memory dependency unit class. This holds the memory dependence predictor. + * As memory operations are issued to the IQ, they are also issued to this + * unit, which then looks up the prediction as to what they are dependent + * upon. This unit must be checked prior to a memory operation being able + * to issue. Although this is templated, it's somewhat hard to make a generic + * memory dependence unit. This one is mostly for store sets; it will be + * quite limited in what other memory dependence predictions it can also + * utilize. Thus this class should be most likely be rewritten for other + * dependence prediction schemes. + */ +template <class MemDepPred, class Impl> +class MemDepUnit { + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + public: + MemDepUnit(Params ¶ms); + + void regStats(); + + void insert(DynInstPtr &inst); + + void insertNonSpec(DynInstPtr &inst); + + // Will want to make this operation relatively fast. Right now it + // is somewhat slow. + DynInstPtr &top(); + + void pop(); + + void regsReady(DynInstPtr &inst); + + void nonSpecInstReady(DynInstPtr &inst); + + void issue(DynInstPtr &inst); + + void wakeDependents(DynInstPtr &inst); + + void squash(const InstSeqNum &squashed_num); + + void violation(DynInstPtr &store_inst, DynInstPtr &violating_load); + + inline bool empty() + { return readyInsts.empty(); } + + private: + typedef typename std::set<InstSeqNum>::iterator sn_it_t; + typedef typename std::map<InstSeqNum, DynInstPtr>::iterator dyn_it_t; + + // Forward declarations so that the following two typedefs work. + class Dependency; + class ltDependency; + + typedef typename std::set<Dependency, ltDependency>::iterator dep_it_t; + typedef typename std::map<InstSeqNum, vector<dep_it_t> >::iterator + sd_it_t; + + struct Dependency { + Dependency(const InstSeqNum &_seqNum) + : seqNum(_seqNum), regsReady(0), memDepReady(0) + { } + + Dependency(const InstSeqNum &_seqNum, bool _regsReady, + bool _memDepReady) + : seqNum(_seqNum), regsReady(_regsReady), + memDepReady(_memDepReady) + { } + + InstSeqNum seqNum; + mutable bool regsReady; + mutable bool memDepReady; + mutable sd_it_t storeDep; + }; + + struct ltDependency { + bool operator() (const Dependency &lhs, const Dependency &rhs) + { + return lhs.seqNum < rhs.seqNum; + } + }; + + inline void moveToReady(dep_it_t &woken_inst); + + /** List of instructions that have passed through rename, yet are still + * waiting on either a memory dependence to resolve or source registers to + * become available before they can issue. + */ + std::set<Dependency, ltDependency> waitingInsts; + + /** List of instructions that have all their predicted memory dependences + * resolved and their source registers ready. + */ + std::set<InstSeqNum> readyInsts; + + // Change this to hold a vector of iterators, which will point to the + // entry of the waiting instructions. + /** List of stores' sequence numbers, each of which has a vector of + * iterators. The iterators point to the appropriate node within + * waitingInsts that has the depenendent instruction. + */ + std::map<InstSeqNum, vector<dep_it_t> > storeDependents; + + // For now will implement this as a map...hash table might not be too + // bad, or could move to something that mimics the current dependency + // graph. + std::map<InstSeqNum, DynInstPtr> memInsts; + + // Iterator pointer to the top instruction which has is ready. + // Is set by the top() call. + dyn_it_t topInst; + + /** The memory dependence predictor. It is accessed upon new + * instructions being added to the IQ, and responds by telling + * this unit what instruction the newly added instruction is dependent + * upon. + */ + MemDepPred depPred; + + Stats::Scalar<> insertedLoads; + Stats::Scalar<> insertedStores; + Stats::Scalar<> conflictingLoads; + Stats::Scalar<> conflictingStores; +}; + +#endif // __CPU_O3_CPU_MEM_DEP_UNIT_HH__ diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh new file mode 100644 index 000000000..296db4c4e --- /dev/null +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <map> + +#include "cpu/o3/mem_dep_unit.hh" + +template <class MemDepPred, class Impl> +MemDepUnit<MemDepPred, Impl>::MemDepUnit(Params ¶ms) + : depPred(params.SSITSize, params.LFSTSize) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Creating MemDepUnit object.\n"); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::regStats() +{ + insertedLoads + .name(name() + ".memDep.insertedLoads") + .desc("Number of loads inserted to the mem dependence unit."); + + insertedStores + .name(name() + ".memDep.insertedStores") + .desc("Number of stores inserted to the mem dependence unit."); + + conflictingLoads + .name(name() + ".memDep.conflictingLoads") + .desc("Number of conflicting loads."); + + conflictingStores + .name(name() + ".memDep.conflictingStores") + .desc("Number of conflicting stores."); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency unresolved_dependencies(inst_seq_num); + + InstSeqNum producing_store = depPred.checkInst(inst->readPC()); + + if (producing_store == 0 || + storeDependents.find(producing_store) == storeDependents.end()) { + + DPRINTF(MemDepUnit, "MemDepUnit: No dependency for inst PC " + "%#x.\n", inst->readPC()); + + unresolved_dependencies.storeDep = storeDependents.end(); + + if (inst->readyToIssue()) { + readyInsts.insert(inst_seq_num); + } else { + unresolved_dependencies.memDepReady = true; + + waitingInsts.insert(unresolved_dependencies); + } + } else { + DPRINTF(MemDepUnit, "MemDepUnit: Adding to dependency list; " + "inst PC %#x is dependent on seq num %i.\n", + inst->readPC(), producing_store); + + if (inst->readyToIssue()) { + unresolved_dependencies.regsReady = true; + } + + // Find the store that this instruction is dependent on. + sd_it_t store_loc = storeDependents.find(producing_store); + + assert(store_loc != storeDependents.end()); + + // Record the location of the store that this instruction is + // dependent on. + unresolved_dependencies.storeDep = store_loc; + + // If it's not already ready, then add it to the renamed + // list and the dependencies. + dep_it_t inst_loc = + (waitingInsts.insert(unresolved_dependencies)).first; + + // Add this instruction to the list of dependents. + (*store_loc).second.push_back(inst_loc); + + assert(!(*store_loc).second.empty()); + + if (inst->isLoad()) { + ++conflictingLoads; + } else { + ++conflictingStores; + } + } + + if (inst->isStore()) { + DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n", + inst->readPC()); + + depPred.insertStore(inst->readPC(), inst_seq_num); + + // Make sure this store isn't already in this list. + assert(storeDependents.find(inst_seq_num) == storeDependents.end()); + + // Put a dependency entry in at the store's sequence number. + // Uh, not sure how this works...I want to create an entry but + // I don't have anything to put into the value yet. + storeDependents[inst_seq_num]; + + assert(storeDependents.size() != 0); + + ++insertedStores; + + } else if (inst->isLoad()) { + ++insertedLoads; + } else { + panic("MemDepUnit: Unknown type! (most likely a barrier)."); + } + + memInsts[inst_seq_num] = inst; +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency non_spec_inst(inst_seq_num); + + non_spec_inst.storeDep = storeDependents.end(); + + waitingInsts.insert(non_spec_inst); + + // Might want to turn this part into an inline function or something. + // It's shared between both insert functions. + if (inst->isStore()) { + DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n", + inst->readPC()); + + depPred.insertStore(inst->readPC(), inst_seq_num); + + // Make sure this store isn't already in this list. + assert(storeDependents.find(inst_seq_num) == storeDependents.end()); + + // Put a dependency entry in at the store's sequence number. + // Uh, not sure how this works...I want to create an entry but + // I don't have anything to put into the value yet. + storeDependents[inst_seq_num]; + + assert(storeDependents.size() != 0); + + ++insertedStores; + + } else if (inst->isLoad()) { + ++insertedLoads; + } else { + panic("MemDepUnit: Unknown type! (most likely a barrier)."); + } + + memInsts[inst_seq_num] = inst; +} + +template <class MemDepPred, class Impl> +typename Impl::DynInstPtr & +MemDepUnit<MemDepPred, Impl>::top() +{ + topInst = memInsts.find( (*readyInsts.begin()) ); + + DPRINTF(MemDepUnit, "MemDepUnit: Top instruction is PC %#x.\n", + (*topInst).second->readPC()); + + return (*topInst).second; +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::pop() +{ + DPRINTF(MemDepUnit, "MemDepUnit: Removing instruction PC %#x.\n", + (*topInst).second->readPC()); + + wakeDependents((*topInst).second); + + issue((*topInst).second); + + memInsts.erase(topInst); + + topInst = memInsts.end(); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::regsReady(DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Marking registers as ready for " + "instruction PC %#x.\n", + inst->readPC()); + + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency inst_to_find(inst_seq_num); + + dep_it_t waiting_inst = waitingInsts.find(inst_to_find); + + assert(waiting_inst != waitingInsts.end()); + + if ((*waiting_inst).memDepReady) { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction has its memory " + "dependencies resolved, adding it to the ready list.\n"); + + moveToReady(waiting_inst); + } else { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction still waiting on " + "memory dependency.\n"); + + (*waiting_inst).regsReady = true; + } +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Marking non speculative " + "instruction PC %#x as ready.\n", + inst->readPC()); + + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency inst_to_find(inst_seq_num); + + dep_it_t waiting_inst = waitingInsts.find(inst_to_find); + + assert(waiting_inst != waitingInsts.end()); + + moveToReady(waiting_inst); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::issue(DynInstPtr &inst) +{ + assert(readyInsts.find(inst->seqNum) != readyInsts.end()); + + DPRINTF(MemDepUnit, "MemDepUnit: Issuing instruction PC %#x.\n", + inst->readPC()); + + // Remove the instruction from the ready list. + readyInsts.erase(inst->seqNum); + + depPred.issued(inst->readPC(), inst->seqNum, inst->isStore()); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst) +{ + // Only stores have dependents. + if (!inst->isStore()) { + return; + } + + // Wake any dependencies. + sd_it_t sd_it = storeDependents.find(inst->seqNum); + + // If there's no entry, then return. Really there should only be + // no entry if the instruction is a load. + if (sd_it == storeDependents.end()) { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction PC %#x, sequence " + "number %i has no dependents.\n", + inst->readPC(), inst->seqNum); + + return; + } + + for (int i = 0; i < (*sd_it).second.size(); ++i ) { + dep_it_t woken_inst = (*sd_it).second[i]; + + DPRINTF(MemDepUnit, "MemDepUnit: Waking up a dependent inst, " + "sequence number %i.\n", + (*woken_inst).seqNum); +#if 0 + // Should we have reached instructions that are actually squashed, + // there will be no more useful instructions in this dependency + // list. Break out early. + if (waitingInsts.find(woken_inst) == waitingInsts.end()) { + DPRINTF(MemDepUnit, "MemDepUnit: Dependents on inst PC %#x " + "are squashed, starting at SN %i. Breaking early.\n", + inst->readPC(), woken_inst); + break; + } +#endif + + if ((*woken_inst).regsReady) { + moveToReady(woken_inst); + } else { + (*woken_inst).memDepReady = true; + } + } + + storeDependents.erase(sd_it); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num) +{ + + if (!waitingInsts.empty()) { + dep_it_t waiting_it = waitingInsts.end(); + + --waiting_it; + + // Remove entries from the renamed list as long as we haven't reached + // the end and the entries continue to be younger than the squashed. + while (!waitingInsts.empty() && + (*waiting_it).seqNum > squashed_num) + { + if (!(*waiting_it).memDepReady && + (*waiting_it).storeDep != storeDependents.end()) { + sd_it_t sd_it = (*waiting_it).storeDep; + + // Make sure the iterator that the store has pointing + // back is actually to this instruction. + assert((*sd_it).second.back() == waiting_it); + + // Now remove this from the store's list of dependent + // instructions. + (*sd_it).second.pop_back(); + } + + waitingInsts.erase(waiting_it--); + } + } + + if (!readyInsts.empty()) { + sn_it_t ready_it = readyInsts.end(); + + --ready_it; + + // Same for the ready list. + while (!readyInsts.empty() && + (*ready_it) > squashed_num) + { + readyInsts.erase(ready_it--); + } + } + + if (!storeDependents.empty()) { + sd_it_t dep_it = storeDependents.end(); + + --dep_it; + + // Same for the dependencies list. + while (!storeDependents.empty() && + (*dep_it).first > squashed_num) + { + // This store's list of dependent instructions should be empty. + assert((*dep_it).second.empty()); + + storeDependents.erase(dep_it--); + } + } + + // Tell the dependency predictor to squash as well. + depPred.squash(squashed_num); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::violation(DynInstPtr &store_inst, + DynInstPtr &violating_load) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Passing violating PCs to store sets," + " load: %#x, store: %#x\n", violating_load->readPC(), + store_inst->readPC()); + // Tell the memory dependence unit of the violation. + depPred.violation(violating_load->readPC(), store_inst->readPC()); +} + +template <class MemDepPred, class Impl> +inline void +MemDepUnit<MemDepPred, Impl>::moveToReady(dep_it_t &woken_inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Adding instruction sequence number %i " + "to the ready list.\n", (*woken_inst).seqNum); + + // Add it to the ready list. + readyInsts.insert((*woken_inst).seqNum); + + // Remove it from the waiting instructions. + waitingInsts.erase(woken_inst); +} diff --git a/src/cpu/o3/ras.cc b/src/cpu/o3/ras.cc new file mode 100644 index 000000000..0a7d6ca63 --- /dev/null +++ b/src/cpu/o3/ras.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/ras.hh" + +ReturnAddrStack::ReturnAddrStack(unsigned _numEntries) + : numEntries(_numEntries), usedEntries(0), + tos(0) +{ + addrStack = new Addr[numEntries]; + + for (int i = 0; i < numEntries; ++i) + addrStack[i] = 0; +} + +void +ReturnAddrStack::push(const Addr &return_addr) +{ + incrTos(); + + addrStack[tos] = return_addr; + + if (usedEntries != numEntries) { + ++usedEntries; + } +} + +void +ReturnAddrStack::pop() +{ + // Not sure it's possible to really track usedEntries properly. +// assert(usedEntries > 0); + + if (usedEntries > 0) { + --usedEntries; + } + + decrTos(); +} + +void +ReturnAddrStack::restore(unsigned top_entry_idx, + const Addr &restored_target) +{ + tos = top_entry_idx; + + addrStack[tos] = restored_target; +} diff --git a/src/cpu/o3/ras.hh b/src/cpu/o3/ras.hh new file mode 100644 index 000000000..46d98181e --- /dev/null +++ b/src/cpu/o3/ras.hh @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_RAS_HH__ +#define __CPU_O3_CPU_RAS_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" + +class ReturnAddrStack +{ + public: + ReturnAddrStack(unsigned numEntries); + + Addr top() + { return addrStack[tos]; } + + unsigned topIdx() + { return tos; } + + void push(const Addr &return_addr); + + void pop(); + + void restore(unsigned top_entry_idx, const Addr &restored_target); + + private: + inline void incrTos() + { if (++tos == numEntries) tos = 0; } + + inline void decrTos() + { tos = (tos == 0 ? numEntries - 1 : tos - 1); } + + Addr *addrStack; + + unsigned numEntries; + + unsigned usedEntries; + + unsigned tos; +}; + +#endif // __CPU_O3_CPU_RAS_HH__ diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh new file mode 100644 index 000000000..a5cfa8f3c --- /dev/null +++ b/src/cpu/o3/regfile.hh @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_REGFILE_HH__ +#define __CPU_O3_CPU_REGFILE_HH__ + +// @todo: Destructor + +#include "arch/isa_traits.hh" +#include "arch/faults.hh" +#include "base/trace.hh" +#include "config/full_system.hh" +#include "cpu/o3/comm.hh" + +#if FULL_SYSTEM +#include "kern/kernel_stats.hh" + +#endif + +// This really only depends on the ISA, and not the Impl. It might be nicer +// to see if I can make it depend on nothing... +// Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, +// and should go in the AlphaFullCPU. + +template <class Impl> +class PhysRegFile +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::MiscRegFile MiscRegFile; + typedef TheISA::MiscReg MiscReg; + + //Note that most of the definitions of the IntReg, FloatReg, etc. exist + //within the Impl/ISA class and not within this PhysRegFile class. + + //Will need some way to allow stuff like swap_palshadow to access the + //correct registers. Might require code changes to swap_palshadow and + //other execution contexts. + + //Will make these registers public for now, but they probably should + //be private eventually with some accessor functions. + public: + typedef typename Impl::FullCPU FullCPU; + + PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs); + + //Everything below should be pretty well identical to the normal + //register file that exists within AlphaISA class. + //The duplication is unfortunate but it's better than having + //different ways to access certain registers. + + //Add these in later when everything else is in place +// void serialize(std::ostream &os); +// void unserialize(Checkpoint *cp, const std::string §ion); + + uint64_t readIntReg(PhysRegIndex reg_idx) + { + assert(reg_idx < numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Access to int register %i, has data " + "%i\n", int(reg_idx), intRegFile[reg_idx]); + return intRegFile[reg_idx]; + } + + FloatReg readFloatReg(PhysRegIndex reg_idx, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatReg floatReg = floatRegFile.readReg(reg_idx, width); + + DPRINTF(IEW, "RegFile: Access to %d byte float register %i, has " + "data %8.8d\n", int(reg_idx), (double)floatReg); + + return floatReg; + } + + FloatReg readFloatReg(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatReg floatReg = floatRegFile.readReg(reg_idx); + + DPRINTF(IEW, "RegFile: Access to float register %i, has " + "data %8.8d\n", int(reg_idx), (double)floatReg); + + return floatReg; + } + + FloatRegBits readFloatRegBits(PhysRegIndex reg_idx, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx, width); + + DPRINTF(IEW, "RegFile: Access to %d byte float register %i as int, " + "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits); + + return floatRegBits; + } + + FloatRegBits readFloatRegBits(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx); + + DPRINTF(IEW, "RegFile: Access to float register %i as int, " + "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits); + + return floatRegBits; + } + + void setIntReg(PhysRegIndex reg_idx, uint64_t val) + { + assert(reg_idx < numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting int register %i to %lli\n", + int(reg_idx), val); + + intRegFile[reg_idx] = val; + } + + void setFloatReg(PhysRegIndex reg_idx, FloatReg val, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8d\n", + int(reg_idx), (double)val); + + floatRegFile.setReg(reg_idx, val, width); + } + + void setFloatReg(PhysRegIndex reg_idx, FloatReg val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8d\n", + int(reg_idx), (double)val); + + floatRegFile.setReg(reg_idx, val); + } + + void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", + int(reg_idx), (uint64_t)val); + + floatRegFile.setRegBits(reg_idx, val, width); + } + + void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", + int(reg_idx), (uint64_t)val); + + floatRegFile.setRegBits(reg_idx, val); + } + + uint64_t readPC() + { + return pc; + } + + void setPC(uint64_t val) + { + pc = val; + } + + void setNextPC(uint64_t val) + { + npc = val; + } + + //Consider leaving this stuff and below in some implementation specific + //file as opposed to the general register file. Or have a derived class. + MiscReg readMiscReg(int misc_reg) + { + // Dummy function for now. + // @todo: Fix this once proxy XC is used. + return 0; + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + // Dummy function for now. + // @todo: Fix this once proxy XC is used. + return NoFault; + } + +#if FULL_SYSTEM + int readIntrFlag() { return intrflag; } + void setIntrFlag(int val) { intrflag = val; } +#endif + + // These should be private eventually, but will be public for now + // so that I can hack around the initregs issue. + public: + /** (signed) integer register file. */ + IntReg *intRegFile; + + /** Floating point register file. */ + FloatReg *floatRegFile; + + /** Miscellaneous register file. */ + MiscRegFile miscRegs; + + /** Program counter. */ + Addr pc; + + /** Next-cycle program counter. */ + Addr npc; + +#if FULL_SYSTEM + private: + // This is ISA specifc stuff; remove it eventually once ISAImpl is used +// IntReg palregs[NumIntRegs]; // PAL shadow registers + int intrflag; // interrupt flag + bool pal_shadow; // using pal_shadow registers +#endif + + private: + FullCPU *cpu; + + public: + void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; } + + unsigned numPhysicalIntRegs; + unsigned numPhysicalFloatRegs; +}; + +template <class Impl> +PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs) + : numPhysicalIntRegs(_numPhysicalIntRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs) +{ + intRegFile = new IntReg[numPhysicalIntRegs]; + floatRegFile = new FloatReg[numPhysicalFloatRegs]; + + memset(intRegFile, 0, sizeof(*intRegFile)); + memset(floatRegFile, 0, sizeof(*floatRegFile)); +} + +#endif // __CPU_O3_CPU_REGFILE_HH__ diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc new file mode 100644 index 000000000..6e9ee23da --- /dev/null +++ b/src/cpu/o3/rename.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/rename_impl.hh" + +template class SimpleRename<AlphaSimpleImpl>; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh new file mode 100644 index 000000000..07b442964 --- /dev/null +++ b/src/cpu/o3/rename.hh @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: +// Fix up trap and barrier handling. +// May want to have different statuses to differentiate the different stall +// conditions. + +#ifndef __CPU_O3_CPU_SIMPLE_RENAME_HH__ +#define __CPU_O3_CPU_SIMPLE_RENAME_HH__ + +#include <list> + +#include "base/statistics.hh" +#include "base/timebuf.hh" + +// Will need rename maps for both the int reg file and fp reg file. +// Or change rename map class to handle both. (RegFile handles both.) +template<class Impl> +class SimpleRename +{ + public: + // Typedefs from the Impl. + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + // Typedefs from the CPUPol + typedef typename CPUPol::FreeList FreeList; + typedef typename CPUPol::RenameMap RenameMap; + + // Typedefs from the ISA. + typedef TheISA::RegIndex RegIndex; + + public: + // Rename will block if ROB becomes full or issue queue becomes full, + // or there are no free registers to rename to. + // Only case where rename squashes is if IEW squashes. + enum Status { + Running, + Idle, + Squashing, + Blocked, + Unblocking, + BarrierStall + }; + + private: + Status _status; + + public: + SimpleRename(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr); + + void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr); + + void setRenameMap(RenameMap *rm_ptr); + + void setFreeList(FreeList *fl_ptr); + + void dumpHistory(); + + void tick(); + + void rename(); + + void squash(); + + private: + void block(); + + inline void unblock(); + + void doSquash(); + + void removeFromHistory(InstSeqNum inst_seq_num); + + inline void renameSrcRegs(DynInstPtr &inst); + + inline void renameDestRegs(DynInstPtr &inst); + + inline int calcFreeROBEntries(); + + inline int calcFreeIQEntries(); + + /** Holds the previous information for each rename. + * Note that often times the inst may have been deleted, so only access + * the pointer for the address and do not dereference it. + */ + struct RenameHistory { + RenameHistory(InstSeqNum _instSeqNum, RegIndex _archReg, + PhysRegIndex _newPhysReg, PhysRegIndex _prevPhysReg) + : instSeqNum(_instSeqNum), archReg(_archReg), + newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg), + placeHolder(false) + { + } + + /** Constructor used specifically for cases where a place holder + * rename history entry is being made. + */ + RenameHistory(InstSeqNum _instSeqNum) + : instSeqNum(_instSeqNum), archReg(0), newPhysReg(0), + prevPhysReg(0), placeHolder(true) + { + } + + InstSeqNum instSeqNum; + RegIndex archReg; + PhysRegIndex newPhysReg; + PhysRegIndex prevPhysReg; + bool placeHolder; + }; + + std::list<RenameHistory> historyBuffer; + + /** CPU interface. */ + FullCPU *cpu; + + // Interfaces to objects outside of rename. + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get IEW's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromIEW; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Wire to write infromation heading to previous stages. */ + // Might not be the best name as not only decode will read it. + typename TimeBuffer<TimeStruct>::wire toDecode; + + /** Rename instruction queue. */ + TimeBuffer<RenameStruct> *renameQueue; + + /** Wire to write any information heading to IEW. */ + typename TimeBuffer<RenameStruct>::wire toIEW; + + /** Decode instruction queue interface. */ + TimeBuffer<DecodeStruct> *decodeQueue; + + /** Wire to get decode's output from decode queue. */ + typename TimeBuffer<DecodeStruct>::wire fromDecode; + + /** Skid buffer between rename and decode. */ + std::queue<DecodeStruct> skidBuffer; + + /** Rename map interface. */ + SimpleRenameMap *renameMap; + + /** Free list interface. */ + FreeList *freeList; + + /** Delay between iew and rename, in ticks. */ + int iewToRenameDelay; + + /** Delay between decode and rename, in ticks. */ + int decodeToRenameDelay; + + /** Delay between commit and rename, in ticks. */ + unsigned commitToRenameDelay; + + /** Rename width, in instructions. */ + unsigned renameWidth; + + /** Commit width, in instructions. Used so rename knows how many + * instructions might have freed registers in the previous cycle. + */ + unsigned commitWidth; + + /** The instruction that rename is currently on. It needs to have + * persistent state so that when a stall occurs in the middle of a + * group of instructions, it can restart at the proper instruction. + */ + unsigned numInst; + + Stats::Scalar<> renameSquashCycles; + Stats::Scalar<> renameIdleCycles; + Stats::Scalar<> renameBlockCycles; + Stats::Scalar<> renameUnblockCycles; + Stats::Scalar<> renameRenamedInsts; + Stats::Scalar<> renameSquashedInsts; + Stats::Scalar<> renameROBFullEvents; + Stats::Scalar<> renameIQFullEvents; + Stats::Scalar<> renameFullRegistersEvents; + Stats::Scalar<> renameRenamedOperands; + Stats::Scalar<> renameRenameLookups; + Stats::Scalar<> renameHBPlaceHolders; + Stats::Scalar<> renameCommittedMaps; + Stats::Scalar<> renameUndoneMaps; + Stats::Scalar<> renameValidUndoneMaps; +}; + +#endif // __CPU_O3_CPU_SIMPLE_RENAME_HH__ diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh new file mode 100644 index 000000000..2068b36ab --- /dev/null +++ b/src/cpu/o3/rename_impl.hh @@ -0,0 +1,754 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <list> + +#include "config/full_system.hh" +#include "cpu/o3/rename.hh" + +template <class Impl> +SimpleRename<Impl>::SimpleRename(Params ¶ms) + : iewToRenameDelay(params.iewToRenameDelay), + decodeToRenameDelay(params.decodeToRenameDelay), + commitToRenameDelay(params.commitToRenameDelay), + renameWidth(params.renameWidth), + commitWidth(params.commitWidth), + numInst(0) +{ + _status = Idle; +} + +template <class Impl> +void +SimpleRename<Impl>::regStats() +{ + renameSquashCycles + .name(name() + ".renameSquashCycles") + .desc("Number of cycles rename is squashing") + .prereq(renameSquashCycles); + renameIdleCycles + .name(name() + ".renameIdleCycles") + .desc("Number of cycles rename is idle") + .prereq(renameIdleCycles); + renameBlockCycles + .name(name() + ".renameBlockCycles") + .desc("Number of cycles rename is blocking") + .prereq(renameBlockCycles); + renameUnblockCycles + .name(name() + ".renameUnblockCycles") + .desc("Number of cycles rename is unblocking") + .prereq(renameUnblockCycles); + renameRenamedInsts + .name(name() + ".renameRenamedInsts") + .desc("Number of instructions processed by rename") + .prereq(renameRenamedInsts); + renameSquashedInsts + .name(name() + ".renameSquashedInsts") + .desc("Number of squashed instructions processed by rename") + .prereq(renameSquashedInsts); + renameROBFullEvents + .name(name() + ".renameROBFullEvents") + .desc("Number of times rename has considered the ROB 'full'") + .prereq(renameROBFullEvents); + renameIQFullEvents + .name(name() + ".renameIQFullEvents") + .desc("Number of times rename has considered the IQ 'full'") + .prereq(renameIQFullEvents); + renameFullRegistersEvents + .name(name() + ".renameFullRegisterEvents") + .desc("Number of times there has been no free registers") + .prereq(renameFullRegistersEvents); + renameRenamedOperands + .name(name() + ".renameRenamedOperands") + .desc("Number of destination operands rename has renamed") + .prereq(renameRenamedOperands); + renameRenameLookups + .name(name() + ".renameRenameLookups") + .desc("Number of register rename lookups that rename has made") + .prereq(renameRenameLookups); + renameHBPlaceHolders + .name(name() + ".renameHBPlaceHolders") + .desc("Number of place holders added to the history buffer") + .prereq(renameHBPlaceHolders); + renameCommittedMaps + .name(name() + ".renameCommittedMaps") + .desc("Number of HB maps that are committed") + .prereq(renameCommittedMaps); + renameUndoneMaps + .name(name() + ".renameUndoneMaps") + .desc("Number of HB maps that are undone due to squashing") + .prereq(renameUndoneMaps); + renameValidUndoneMaps + .name(name() + ".renameValidUndoneMaps") + .desc("Number of HB maps that are undone, and are not place holders") + .prereq(renameValidUndoneMaps); +} + +template <class Impl> +void +SimpleRename<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Rename, "Rename: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template <class Impl> +void +SimpleRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(Rename, "Rename: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from IEW stage. + fromIEW = timeBuffer->getWire(-iewToRenameDelay); + + // Setup wire to read infromation from time buffer, from commit stage. + fromCommit = timeBuffer->getWire(-commitToRenameDelay); + + // Setup wire to write information to previous stages. + toDecode = timeBuffer->getWire(0); +} + +template <class Impl> +void +SimpleRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr) +{ + DPRINTF(Rename, "Rename: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to write information to future stages. + toIEW = renameQueue->getWire(0); +} + +template <class Impl> +void +SimpleRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr) +{ + DPRINTF(Rename, "Rename: Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to get information from decode. + fromDecode = decodeQueue->getWire(-decodeToRenameDelay); +} + +template <class Impl> +void +SimpleRename<Impl>::setRenameMap(RenameMap *rm_ptr) +{ + DPRINTF(Rename, "Rename: Setting rename map pointer.\n"); + renameMap = rm_ptr; +} + +template <class Impl> +void +SimpleRename<Impl>::setFreeList(FreeList *fl_ptr) +{ + DPRINTF(Rename, "Rename: Setting free list pointer.\n"); + freeList = fl_ptr; +} + +template <class Impl> +void +SimpleRename<Impl>::dumpHistory() +{ + typename list<RenameHistory>::iterator buf_it = historyBuffer.begin(); + + while (buf_it != historyBuffer.end()) + { + cprintf("Seq num: %i\nArch reg: %i New phys reg: %i Old phys " + "reg: %i\n", (*buf_it).instSeqNum, (int)(*buf_it).archReg, + (int)(*buf_it).newPhysReg, (int)(*buf_it).prevPhysReg); + + buf_it++; + } +} + +template <class Impl> +void +SimpleRename<Impl>::block() +{ + DPRINTF(Rename, "Rename: Blocking.\n"); + // Set status to Blocked. + _status = Blocked; + + // Add the current inputs onto the skid buffer, so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromDecode); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template <class Impl> +inline void +SimpleRename<Impl>::unblock() +{ + DPRINTF(Rename, "Rename: Read instructions out of skid buffer this " + "cycle.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toDecode->renameInfo.stall = true; + } else { + DPRINTF(Rename, "Rename: Done unblocking.\n"); + _status = Running; + } +} + +template <class Impl> +void +SimpleRename<Impl>::doSquash() +{ + typename list<RenameHistory>::iterator hb_it = historyBuffer.begin(); + + InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum; + +#if FULL_SYSTEM + assert(!historyBuffer.empty()); +#else + // After a syscall squashes everything, the history buffer may be empty + // but the ROB may still be squashing instructions. + if (historyBuffer.empty()) { + return; + } +#endif // FULL_SYSTEM + + // Go through the most recent instructions, undoing the mappings + // they did and freeing up the registers. + while ((*hb_it).instSeqNum > squashed_seq_num) + { + assert(hb_it != historyBuffer.end()); + + DPRINTF(Rename, "Rename: Removing history entry with sequence " + "number %i.\n", (*hb_it).instSeqNum); + + // If it's not simply a place holder, then add the registers. + if (!(*hb_it).placeHolder) { + // Tell the rename map to set the architected register to the + // previous physical register that it was renamed to. + renameMap->setEntry(hb_it->archReg, hb_it->prevPhysReg); + + // Put the renamed physical register back on the free list. + freeList->addReg(hb_it->newPhysReg); + + ++renameValidUndoneMaps; + } + + historyBuffer.erase(hb_it++); + + ++renameUndoneMaps; + } +} + +template <class Impl> +void +SimpleRename<Impl>::squash() +{ + DPRINTF(Rename, "Rename: Squashing instructions.\n"); + // Set the status to Squashing. + _status = Squashing; + + numInst = 0; + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } + + doSquash(); +} + +template<class Impl> +void +SimpleRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num) +{ + DPRINTF(Rename, "Rename: Removing a committed instruction from the " + "history buffer, until sequence number %lli.\n", inst_seq_num); + typename list<RenameHistory>::iterator hb_it = historyBuffer.end(); + + --hb_it; + + if (hb_it->instSeqNum > inst_seq_num) { + DPRINTF(Rename, "Rename: Old sequence number encountered. Ensure " + "that a syscall happened recently.\n"); + return; + } + + while ((*hb_it).instSeqNum != inst_seq_num) + { + // Make sure we haven't gone off the end of the list. + assert(hb_it != historyBuffer.end()); + + // In theory instructions at the end of the history buffer + // should be older than the instruction being removed, which + // means they will have a lower sequence number. Also the + // instruction being removed from the history really should + // be the last instruction in the list, as it is the instruction + // that was just committed that is being removed. + assert(hb_it->instSeqNum < inst_seq_num); + DPRINTF(Rename, "Rename: Freeing up older rename of reg %i, sequence" + " number %i.\n", + (*hb_it).prevPhysReg, (*hb_it).instSeqNum); + + if (!(*hb_it).placeHolder) { + freeList->addReg((*hb_it).prevPhysReg); + ++renameCommittedMaps; + } + + historyBuffer.erase(hb_it--); + } + + // Finally free up the previous register of the finished instruction + // itself. + if (!(*hb_it).placeHolder) { + freeList->addReg(hb_it->prevPhysReg); + ++renameCommittedMaps; + } + + historyBuffer.erase(hb_it); +} + +template <class Impl> +inline void +SimpleRename<Impl>::renameSrcRegs(DynInstPtr &inst) +{ + unsigned num_src_regs = inst->numSrcRegs(); + + // Get the architectual register numbers from the source and + // destination operands, and redirect them to the right register. + // Will need to mark dependencies though. + for (int src_idx = 0; src_idx < num_src_regs; src_idx++) + { + RegIndex src_reg = inst->srcRegIdx(src_idx); + + // Look up the source registers to get the phys. register they've + // been renamed to, and set the sources to those registers. + PhysRegIndex renamed_reg = renameMap->lookup(src_reg); + + DPRINTF(Rename, "Rename: Looking up arch reg %i, got " + "physical reg %i.\n", (int)src_reg, (int)renamed_reg); + + inst->renameSrcReg(src_idx, renamed_reg); + + // Either incorporate it into the info passed back, + // or make another function call to see if that register is + // ready or not. + if (renameMap->isReady(renamed_reg)) { + DPRINTF(Rename, "Rename: Register is ready.\n"); + + inst->markSrcRegReady(src_idx); + } + + ++renameRenameLookups; + } +} + +template <class Impl> +inline void +SimpleRename<Impl>::renameDestRegs(DynInstPtr &inst) +{ + typename SimpleRenameMap::RenameInfo rename_result; + + unsigned num_dest_regs = inst->numDestRegs(); + + // If it's an instruction with no destination registers, then put + // a placeholder within the history buffer. It might be better + // to not put it in the history buffer at all (other than branches, + // which always need at least a place holder), and differentiate + // between instructions with and without destination registers + // when getting from commit the instructions that committed. + if (num_dest_regs == 0) { + RenameHistory hb_entry(inst->seqNum); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding placeholder instruction to " + "history buffer, sequence number %lli.\n", + inst->seqNum); + + ++renameHBPlaceHolders; + } else { + + // Rename the destination registers. + for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) + { + RegIndex dest_reg = inst->destRegIdx(dest_idx); + + // Get the physical register that the destination will be + // renamed to. + rename_result = renameMap->rename(dest_reg); + + DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " + "reg %i.\n", (int)dest_reg, + (int)rename_result.first); + + // Record the rename information so that a history can be kept. + RenameHistory hb_entry(inst->seqNum, dest_reg, + rename_result.first, + rename_result.second); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding instruction to history buffer, " + "sequence number %lli.\n", + (*historyBuffer.begin()).instSeqNum); + + // Tell the instruction to rename the appropriate destination + // register (dest_idx) to the new physical register + // (rename_result.first), and record the previous physical + // register that the same logical register was renamed to + // (rename_result.second). + inst->renameDestReg(dest_idx, + rename_result.first, + rename_result.second); + + ++renameRenamedOperands; + } + } +} + +template <class Impl> +inline int +SimpleRename<Impl>::calcFreeROBEntries() +{ + return fromCommit->commitInfo.freeROBEntries - + renameWidth * iewToRenameDelay; +} + +template <class Impl> +inline int +SimpleRename<Impl>::calcFreeIQEntries() +{ + return fromIEW->iewInfo.freeIQEntries - renameWidth * iewToRenameDelay; +} + +template<class Impl> +void +SimpleRename<Impl>::tick() +{ + // Rename will need to try to rename as many instructions as it + // has bandwidth, unless it is blocked. + + // Check if _status is BarrierStall. If so, then check if the number + // of free ROB entries is equal to the number of total ROB entries. + // Once equal then wake this stage up. Set status to unblocking maybe. + + if (_status != Blocked && _status != Squashing) { + DPRINTF(Rename, "Rename: Status is not blocked, will attempt to " + "run stage.\n"); + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(_status == Unblocking ? !skidBuffer.empty() : 1); + + rename(); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + if (_status == Unblocking) { + ++renameUnblockCycles; + + if (fromDecode->size > 0) { + // Add the current inputs onto the skid buffer, so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromDecode); + } + + unblock(); + } + } else if (_status == Blocked) { + ++renameBlockCycles; + + // If stage is blocked and still receiving valid instructions, + // make sure to store them in the skid buffer. + if (fromDecode->size > 0) { + + block(); + + // Continue to tell previous stage to stall. + toDecode->renameInfo.stall = true; + } + + if (!fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall && + calcFreeROBEntries() > 0 && + calcFreeIQEntries() > 0 && + renameMap->numFreeEntries() > 0) { + + // Need to be sure to check all blocking conditions above. + // If they have cleared, then start unblocking. + DPRINTF(Rename, "Rename: Stall signals cleared, going to " + "unblock.\n"); + _status = Unblocking; + + // Continue to tell previous stage to block until this stage + // is done unblocking. + toDecode->renameInfo.stall = true; + } else { + // Otherwise no conditions have changed. Tell previous + // stage to continue blocking. + toDecode->renameInfo.stall = true; + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + return; + } + } else if (_status == Squashing) { + ++renameSquashCycles; + + if (fromCommit->commitInfo.squash) { + squash(); + } else if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + + DPRINTF(Rename, "Rename: Done squashing, going to running.\n"); + _status = Running; + rename(); + } else { + doSquash(); + } + } + + // Ugly code, revamp all of the tick() functions eventually. + if (fromCommit->commitInfo.doneSeqNum != 0 && _status != Squashing) { +#if !FULL_SYSTEM + if (!fromCommit->commitInfo.squash) { + removeFromHistory(fromCommit->commitInfo.doneSeqNum); + } +#else + removeFromHistory(fromCommit->commitInfo.doneSeqNum); +#endif + } + +} + +template<class Impl> +void +SimpleRename<Impl>::rename() +{ + // Check if any of the stages ahead of rename are telling rename + // to squash. The squash() function will also take care of fixing up + // the rename map and the free list. + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + DPRINTF(Rename, "Rename: Receiving signal from Commit to squash.\n"); + squash(); + return; + } + + // Check if time buffer is telling this stage to stall. + if (fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) { + DPRINTF(Rename, "Rename: Receiving signal from IEW/Commit to " + "stall.\n"); + block(); + return; + } + + // Check if the current status is squashing. If so, set its status + // to running and resume execution the next cycle. + if (_status == Squashing) { + DPRINTF(Rename, "Rename: Done squashing.\n"); + _status = Running; + return; + } + + // Check the decode queue to see if instructions are available. + // If there are no available instructions to rename, then do nothing. + // Or, if the stage is currently unblocking, then go ahead and run it. + if (fromDecode->size == 0 && _status != Unblocking) { + DPRINTF(Rename, "Rename: Nothing to do, breaking out early.\n"); + // Should I change status to idle? + return; + } + + //////////////////////////////////// + // Actual rename part. + //////////////////////////////////// + + DynInstPtr inst; + + // If we're unblocking, then we may be in the middle of an instruction + // group. Subtract off numInst to get the proper number of instructions + // left. + int insts_available = _status == Unblocking ? + skidBuffer.front().size - numInst : + fromDecode->size; + + bool block_this_cycle = false; + + // Will have to do a different calculation for the number of free + // entries. Number of free entries recorded on this cycle - + // renameWidth * renameToDecodeDelay + int free_rob_entries = calcFreeROBEntries(); + int free_iq_entries = calcFreeIQEntries(); + int min_iq_rob = min(free_rob_entries, free_iq_entries); + + unsigned to_iew_index = 0; + + // Check if there's any space left. + if (min_iq_rob <= 0) { + DPRINTF(Rename, "Rename: Blocking due to no free ROB or IQ " + "entries.\n" + "Rename: ROB has %d free entries.\n" + "Rename: IQ has %d free entries.\n", + free_rob_entries, + free_iq_entries); + block(); + // Tell previous stage to stall. + toDecode->renameInfo.stall = true; + + if (free_rob_entries <= 0) { + ++renameROBFullEvents; + } else { + ++renameIQFullEvents; + } + + return; + } else if (min_iq_rob < insts_available) { + DPRINTF(Rename, "Rename: Will have to block this cycle. Only " + "%i insts can be renamed due to IQ/ROB limits.\n", + min_iq_rob); + + insts_available = min_iq_rob; + + block_this_cycle = true; + + if (free_rob_entries < free_iq_entries) { + ++renameROBFullEvents; + } else { + ++renameIQFullEvents; + } + } + + while (insts_available > 0) { + DPRINTF(Rename, "Rename: Sending instructions to iew.\n"); + + // Get the next instruction either from the skid buffer or the + // decode queue. + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : + fromDecode->insts[numInst]; + + if (inst->isSquashed()) { + DPRINTF(Rename, "Rename: instruction %i with PC %#x is " + "squashed, skipping.\n", + inst->seqNum, inst->readPC()); + + // Go to the next instruction. + ++numInst; + + ++renameSquashedInsts; + + // Decrement how many instructions are available. + --insts_available; + + continue; + } + + DPRINTF(Rename, "Rename: Processing instruction %i with PC %#x.\n", + inst->seqNum, inst->readPC()); + + // If it's a trap instruction, then it needs to wait here within + // rename until the ROB is empty. Needs a way to detect that the + // ROB is empty. Maybe an event? + // Would be nice if it could be avoided putting this into a + // specific stage and instead just put it into the AlphaFullCPU. + // Might not really be feasible though... + // (EXCB, TRAPB) + if (inst->isSerializing()) { + panic("Rename: Serializing instruction encountered.\n"); + DPRINTF(Rename, "Rename: Serializing instruction " + "encountered.\n"); + + // Change status over to BarrierStall so that other stages know + // what this is blocked on. + _status = BarrierStall; + + block_this_cycle = true; + + break; + } + + // Check here to make sure there are enough destination registers + // to rename to. Otherwise block. + if (renameMap->numFreeEntries() < inst->numDestRegs()) + { + DPRINTF(Rename, "Rename: Blocking due to lack of free " + "physical registers to rename to.\n"); + // Need some sort of event based on a register being freed. + + block_this_cycle = true; + + ++renameFullRegistersEvents; + + break; + } + + renameSrcRegs(inst); + + renameDestRegs(inst); + + // Put instruction in rename queue. + toIEW->insts[to_iew_index] = inst; + ++(toIEW->size); + + // Decrease the number of free ROB and IQ entries. + --free_rob_entries; + --free_iq_entries; + + // Increment which instruction we're on. + ++to_iew_index; + ++numInst; + + ++renameRenamedInsts; + + // Decrement how many instructions are available. + --insts_available; + } + + // Check if there's any instructions left that haven't yet been renamed. + // If so then block. + if (block_this_cycle) { + block(); + + toDecode->renameInfo.stall = true; + } else { + // If we had a successful rename and didn't have to exit early, then + // reset numInst so it will refer to the correct instruction on next + // run. + numInst = 0; + } +} diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc new file mode 100644 index 000000000..10963f7de --- /dev/null +++ b/src/cpu/o3/rename_map.cc @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <vector> + +#include "cpu/o3/rename_map.hh" + +using namespace std; + +// Todo: Consider making functions inline. Avoid having things that are +// using the zero register or misc registers from adding on the registers +// to the free list. Possibly remove the direct communication between +// this and the freelist. Considering making inline bool functions that +// determine if the register is a logical int, logical fp, physical int, +// physical fp, etc. + +SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + RegIndex _intZeroReg, + RegIndex _floatZeroReg) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numMiscRegs(_numMiscRegs), + intZeroReg(_intZeroReg), + floatZeroReg(_floatZeroReg) +{ + DPRINTF(Rename, "Rename: Creating rename map. Phys: %i / %i, Float: " + "%i / %i.\n", numLogicalIntRegs, numPhysicalIntRegs, + numLogicalFloatRegs, numPhysicalFloatRegs); + + numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs; + + numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs; + + //Create the rename maps, and their scoreboards. + intRenameMap = new RenameEntry[numLogicalIntRegs]; + floatRenameMap = new RenameEntry[numLogicalRegs]; + + // Should combine this into one scoreboard. + intScoreboard.resize(numPhysicalIntRegs); + floatScoreboard.resize(numPhysicalRegs); + miscScoreboard.resize(numPhysicalRegs + numMiscRegs); + + // Initialize the entries in the integer rename map to point to the + // physical registers of the same index, and consider each register + // ready until the first rename occurs. + for (RegIndex index = 0; index < numLogicalIntRegs; ++index) + { + intRenameMap[index].physical_reg = index; + intScoreboard[index] = 1; + } + + // Initialize the rest of the physical registers (the ones that don't + // directly map to a logical register) as unready. + for (PhysRegIndex index = numLogicalIntRegs; + index < numPhysicalIntRegs; + ++index) + { + intScoreboard[index] = 0; + } + + int float_reg_idx = numPhysicalIntRegs; + + // Initialize the entries in the floating point rename map to point to + // the physical registers of the same index, and consider each register + // ready until the first rename occurs. + // Although the index refers purely to architected registers, because + // the floating reg indices come after the integer reg indices, they + // may exceed the size of a normal RegIndex (short). + for (PhysRegIndex index = numLogicalIntRegs; + index < numLogicalRegs; ++index) + { + floatRenameMap[index].physical_reg = float_reg_idx++; + } + + for (PhysRegIndex index = numPhysicalIntRegs; + index < numPhysicalIntRegs + numLogicalFloatRegs; ++index) + { + floatScoreboard[index] = 1; + } + + // Initialize the rest of the physical registers (the ones that don't + // directly map to a logical register) as unready. + for (PhysRegIndex index = numPhysicalIntRegs + numLogicalFloatRegs; + index < numPhysicalRegs; + ++index) + { + floatScoreboard[index] = 0; + } + + // Initialize the entries in the misc register scoreboard to be ready. + for (PhysRegIndex index = numPhysicalRegs; + index < numPhysicalRegs + numMiscRegs; ++index) + { + miscScoreboard[index] = 1; + } +} + +SimpleRenameMap::~SimpleRenameMap() +{ + // Delete the rename maps as they were allocated with new. + delete [] intRenameMap; + delete [] floatRenameMap; +} + +void +SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr) +{ + //Setup the interface to the freelist. + freeList = fl_ptr; +} + + +// Don't allow this stage to fault; force that check to the rename stage. +// Simply ask to rename a logical register and get back a new physical +// register index. +SimpleRenameMap::RenameInfo +SimpleRenameMap::rename(RegIndex arch_reg) +{ + PhysRegIndex renamed_reg; + PhysRegIndex prev_reg; + + if (arch_reg < numLogicalIntRegs) { + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = intRenameMap[arch_reg].physical_reg; + + // If it's not referencing the zero register, then mark the register + // as not ready. + if (arch_reg != intZeroReg) { + // Get a free physical register to rename to. + renamed_reg = freeList->getIntReg(); + + // Update the integer rename map. + intRenameMap[arch_reg].physical_reg = renamed_reg; + + assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs); + + // Mark register as not ready. + intScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = intZeroReg; + } + } else if (arch_reg < numLogicalRegs) { + // Subtract off the base offset for floating point registers. +// arch_reg = arch_reg - numLogicalIntRegs; + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = floatRenameMap[arch_reg].physical_reg; + + // If it's not referencing the zero register, then mark the register + // as not ready. + if (arch_reg != floatZeroReg) { + // Get a free floating point register to rename to. + renamed_reg = freeList->getFloatReg(); + + // Update the floating point rename map. + floatRenameMap[arch_reg].physical_reg = renamed_reg; + + assert(renamed_reg < numPhysicalRegs && + renamed_reg >= numPhysicalIntRegs); + + // Mark register as not ready. + floatScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = floatZeroReg; + } + } else { + // Subtract off the base offset for miscellaneous registers. + arch_reg = arch_reg - numLogicalRegs; + + // No renaming happens to the misc. registers. They are simply the + // registers that come after all the physical registers; thus + // take the base architected register and add the physical registers + // to it. + renamed_reg = arch_reg + numPhysicalRegs; + + // Set the previous register to the same register; mainly it must be + // known that the prev reg was outside the range of normal registers + // so the free list can avoid adding it. + prev_reg = renamed_reg; + + assert(renamed_reg < numPhysicalRegs + numMiscRegs); + + miscScoreboard[renamed_reg] = false; + } + + return RenameInfo(renamed_reg, prev_reg); +} + +//Perhaps give this a pair as a return value, of the physical register +//and whether or not it's ready. +PhysRegIndex +SimpleRenameMap::lookup(RegIndex arch_reg) +{ + if (arch_reg < numLogicalIntRegs) { + return intRenameMap[arch_reg].physical_reg; + } else if (arch_reg < numLogicalRegs) { + // Subtract off the base FP offset. +// arch_reg = arch_reg - numLogicalIntRegs; + + return floatRenameMap[arch_reg].physical_reg; + } else { + // Subtract off the misc registers offset. + arch_reg = arch_reg - numLogicalRegs; + + // Misc. regs don't rename, so simply add the base arch reg to + // the number of physical registers. + return numPhysicalRegs + arch_reg; + } +} + +bool +SimpleRenameMap::isReady(PhysRegIndex phys_reg) +{ + if (phys_reg < numPhysicalIntRegs) { + return intScoreboard[phys_reg]; + } else if (phys_reg < numPhysicalRegs) { + + // Subtract off the base FP offset. +// phys_reg = phys_reg - numPhysicalIntRegs; + + return floatScoreboard[phys_reg]; + } else { + // Subtract off the misc registers offset. +// phys_reg = phys_reg - numPhysicalRegs; + + return miscScoreboard[phys_reg]; + } +} + +// In this implementation the miscellaneous registers do not actually rename, +// so this function does not allow you to try to change their mappings. +void +SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg) +{ + if (arch_reg < numLogicalIntRegs) { + DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n", + (int)arch_reg, renamed_reg); + + intRenameMap[arch_reg].physical_reg = renamed_reg; + } else { + assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); + + DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n", + (int)arch_reg - numLogicalIntRegs, renamed_reg); + + floatRenameMap[arch_reg].physical_reg = renamed_reg; + } +} + +void +SimpleRenameMap::squash(vector<RegIndex> freed_regs, + vector<UnmapInfo> unmaps) +{ + panic("Not sure this function should be called."); + + // Not sure the rename map should be able to access the free list + // like this. + while (!freed_regs.empty()) { + RegIndex free_register = freed_regs.back(); + + if (free_register < numPhysicalIntRegs) { + freeList->addIntReg(free_register); + } else { + // Subtract off the base FP dependence tag. + free_register = free_register - numPhysicalIntRegs; + freeList->addFloatReg(free_register); + } + + freed_regs.pop_back(); + } + + // Take unmap info and roll back the rename map. +} + +void +SimpleRenameMap::markAsReady(PhysRegIndex ready_reg) +{ + DPRINTF(Rename, "Rename map: Marking register %i as ready.\n", + (int)ready_reg); + + if (ready_reg < numPhysicalIntRegs) { + assert(ready_reg >= 0); + + intScoreboard[ready_reg] = 1; + } else if (ready_reg < numPhysicalRegs) { + + // Subtract off the base FP offset. +// ready_reg = ready_reg - numPhysicalIntRegs; + + floatScoreboard[ready_reg] = 1; + } else { + //Subtract off the misc registers offset. +// ready_reg = ready_reg - numPhysicalRegs; + + miscScoreboard[ready_reg] = 1; + } +} + +int +SimpleRenameMap::numFreeEntries() +{ + int free_int_regs = freeList->numFreeIntRegs(); + int free_float_regs = freeList->numFreeFloatRegs(); + + if (free_int_regs < free_float_regs) { + return free_int_regs; + } else { + return free_float_regs; + } +} diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh new file mode 100644 index 000000000..57be4a64a --- /dev/null +++ b/src/cpu/o3/rename_map.hh @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: Create destructor. +// Have it so that there's a more meaningful name given to the variable +// that marks the beginning of the FP registers. + +#ifndef __CPU_O3_CPU_RENAME_MAP_HH__ +#define __CPU_O3_CPU_RENAME_MAP_HH__ + +#include <iostream> +#include <utility> +#include <vector> + +#include "cpu/o3/free_list.hh" +//For RegIndex +#include "arch/isa_traits.hh" + +class SimpleRenameMap +{ + protected: + typedef TheISA::RegIndex RegIndex; + public: + /** + * Pair of a logical register and a physical register. Tells the + * previous mapping of a logical register to a physical register. + * Used to roll back the rename map to a previous state. + */ + typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo; + + /** + * Pair of a physical register and a physical register. Used to + * return the physical register that a logical register has been + * renamed to, and the previous physical register that the same + * logical register was previously mapped to. + */ + typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo; + + public: + //Constructor + SimpleRenameMap(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + RegIndex _intZeroReg, + RegIndex _floatZeroReg); + + /** Destructor. */ + ~SimpleRenameMap(); + + void setFreeList(SimpleFreeList *fl_ptr); + + //Tell rename map to get a free physical register for a given + //architected register. Not sure it should have a return value, + //but perhaps it should have some sort of fault in case there are + //no free registers. + RenameInfo rename(RegIndex arch_reg); + + PhysRegIndex lookup(RegIndex phys_reg); + + bool isReady(PhysRegIndex arch_reg); + + /** + * Marks the given register as ready, meaning that its value has been + * calculated and written to the register file. + * @param ready_reg The index of the physical register that is now ready. + */ + void markAsReady(PhysRegIndex ready_reg); + + void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg); + + void squash(std::vector<RegIndex> freed_regs, + std::vector<UnmapInfo> unmaps); + + int numFreeEntries(); + + private: + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Number of miscellaneous registers. */ + int numMiscRegs; + + /** Number of logical integer + float registers. */ + int numLogicalRegs; + + /** Number of physical integer + float registers. */ + int numPhysicalRegs; + + /** The integer zero register. This implementation assumes it is always + * zero and never can be anything else. + */ + RegIndex intZeroReg; + + /** The floating point zero register. This implementation assumes it is + * always zero and never can be anything else. + */ + RegIndex floatZeroReg; + + class RenameEntry + { + public: + PhysRegIndex physical_reg; + bool valid; + + RenameEntry() + : physical_reg(0), valid(false) + { } + }; + + /** Integer rename map. */ + RenameEntry *intRenameMap; + + /** Floating point rename map. */ + RenameEntry *floatRenameMap; + + /** Free list interface. */ + SimpleFreeList *freeList; + + // Might want to make all these scoreboards into one large scoreboard. + + /** Scoreboard of physical integer registers, saying whether or not they + * are ready. + */ + std::vector<bool> intScoreboard; + + /** Scoreboard of physical floating registers, saying whether or not they + * are ready. + */ + std::vector<bool> floatScoreboard; + + /** Scoreboard of miscellaneous registers, saying whether or not they + * are ready. + */ + std::vector<bool> miscScoreboard; +}; + +#endif //__CPU_O3_CPU_RENAME_MAP_HH__ diff --git a/src/cpu/o3/rob.cc b/src/cpu/o3/rob.cc new file mode 100644 index 000000000..c10f782fd --- /dev/null +++ b/src/cpu/o3/rob.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/rob_impl.hh" + +// Force instantiation of InstructionQueue. +template class ROB<AlphaSimpleImpl>; diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh new file mode 100644 index 000000000..1185564ad --- /dev/null +++ b/src/cpu/o3/rob.hh @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: Probably add in support for scheduling events (more than one as +// well) on the case of the ROB being empty or full. Considering tracking +// free entries instead of insts in ROB. Differentiate between squashing +// all instructions after the instruction, and all instructions after *and* +// including that instruction. + +#ifndef __CPU_O3_CPU_ROB_HH__ +#define __CPU_O3_CPU_ROB_HH__ + +#include <utility> +#include <vector> + +/** + * ROB class. Uses the instruction list that exists within the CPU to + * represent the ROB. This class doesn't contain that list, but instead + * a pointer to the CPU to get access to the list. The ROB, in this first + * implementation, is largely what drives squashing. + */ +template <class Impl> +class ROB +{ + protected: + typedef TheISA::RegIndex RegIndex; + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + + typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo_t; + typedef typename list<DynInstPtr>::iterator InstIt_t; + + public: + /** ROB constructor. + * @param _numEntries Number of entries in ROB. + * @param _squashWidth Number of instructions that can be squashed in a + * single cycle. + */ + ROB(unsigned _numEntries, unsigned _squashWidth); + + /** Function to set the CPU pointer, necessary due to which object the ROB + * is created within. + * @param cpu_ptr Pointer to the implementation specific full CPU object. + */ + void setCPU(FullCPU *cpu_ptr); + + /** Function to insert an instruction into the ROB. The parameter inst is + * not truly required, but is useful for checking correctness. Note + * that whatever calls this function must ensure that there is enough + * space within the ROB for the new instruction. + * @param inst The instruction being inserted into the ROB. + * @todo Remove the parameter once correctness is ensured. + */ + void insertInst(DynInstPtr &inst); + + /** Returns pointer to the head instruction within the ROB. There is + * no guarantee as to the return value if the ROB is empty. + * @retval Pointer to the DynInst that is at the head of the ROB. + */ + DynInstPtr readHeadInst() { return cpu->instList.front(); } + + DynInstPtr readTailInst() { return (*tail); } + + void retireHead(); + + bool isHeadReady(); + + unsigned numFreeEntries(); + + bool isFull() + { return numInstsInROB == numEntries; } + + bool isEmpty() + { return numInstsInROB == 0; } + + void doSquash(); + + void squash(InstSeqNum squash_num); + + uint64_t readHeadPC(); + + uint64_t readHeadNextPC(); + + InstSeqNum readHeadSeqNum(); + + uint64_t readTailPC(); + + InstSeqNum readTailSeqNum(); + + /** Checks if the ROB is still in the process of squashing instructions. + * @retval Whether or not the ROB is done squashing. + */ + bool isDoneSquashing() const { return doneSquashing; } + + /** This is more of a debugging function than anything. Use + * numInstsInROB to get the instructions in the ROB unless you are + * double checking that variable. + */ + int countInsts(); + + private: + + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Number of instructions in the ROB. */ + unsigned numEntries; + + /** Number of instructions that can be squashed in a single cycle. */ + unsigned squashWidth; + + /** Iterator pointing to the instruction which is the last instruction + * in the ROB. This may at times be invalid (ie when the ROB is empty), + * however it should never be incorrect. + */ + InstIt_t tail; + + /** Iterator used for walking through the list of instructions when + * squashing. Used so that there is persistent state between cycles; + * when squashing, the instructions are marked as squashed but not + * immediately removed, meaning the tail iterator remains the same before + * and after a squash. + * This will always be set to cpu->instList.end() if it is invalid. + */ + InstIt_t squashIt; + + /** Number of instructions in the ROB. */ + int numInstsInROB; + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Is the ROB done squashing. */ + bool doneSquashing; +}; + +#endif //__CPU_O3_CPU_ROB_HH__ diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh new file mode 100644 index 000000000..e7a5671d9 --- /dev/null +++ b/src/cpu/o3/rob_impl.hh @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_ROB_IMPL_HH__ +#define __CPU_O3_CPU_ROB_IMPL_HH__ + +#include "config/full_system.hh" +#include "cpu/o3/rob.hh" + +template <class Impl> +ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth) + : numEntries(_numEntries), + squashWidth(_squashWidth), + numInstsInROB(0), + squashedSeqNum(0) +{ + doneSquashing = true; +} + +template <class Impl> +void +ROB<Impl>::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + // Set the tail to the beginning of the CPU instruction list so that + // upon the first instruction being inserted into the ROB, the tail + // iterator can simply be incremented. + tail = cpu->instList.begin(); + + // Set the squash iterator to the end of the instruction list. + squashIt = cpu->instList.end(); +} + +template <class Impl> +int +ROB<Impl>::countInsts() +{ + // Start at 1; if the tail matches cpu->instList.begin(), then there is + // one inst in the ROB. + int return_val = 1; + + // There are quite a few special cases. Do not use this function other + // than for debugging purposes. + if (cpu->instList.begin() == cpu->instList.end()) { + // In this case there are no instructions in the list. The ROB + // must be empty. + return 0; + } else if (tail == cpu->instList.end()) { + // In this case, the tail is not yet pointing to anything valid. + // The ROB must be empty. + return 0; + } + + // Iterate through the ROB from the head to the tail, counting the + // entries. + for (InstIt_t i = cpu->instList.begin(); i != tail; ++i) + { + assert(i != cpu->instList.end()); + ++return_val; + } + + return return_val; + + // Because the head won't be tracked properly until the ROB gets the + // first instruction, and any time that the ROB is empty and has not + // yet gotten the instruction, this function doesn't work. +// return numInstsInROB; +} + +template <class Impl> +void +ROB<Impl>::insertInst(DynInstPtr &inst) +{ + // Make sure we have the right number of instructions. + assert(numInstsInROB == countInsts()); + // Make sure the instruction is valid. + assert(inst); + + DPRINTF(ROB, "ROB: Adding inst PC %#x to the ROB.\n", inst->readPC()); + + // If the ROB is full then exit. + assert(numInstsInROB != numEntries); + + ++numInstsInROB; + + // Increment the tail iterator, moving it one instruction back. + // There is a special case if the ROB was empty prior to this insertion, + // in which case the tail will be pointing at instList.end(). If that + // happens, then reset the tail to the beginning of the list. + if (tail != cpu->instList.end()) { + ++tail; + } else { + tail = cpu->instList.begin(); + } + + // Make sure the tail iterator is actually pointing at the instruction + // added. + assert((*tail) == inst); + + DPRINTF(ROB, "ROB: Now has %d instructions.\n", numInstsInROB); + +} + +// Whatever calls this function needs to ensure that it properly frees up +// registers prior to this function. +template <class Impl> +void +ROB<Impl>::retireHead() +{ + assert(numInstsInROB == countInsts()); + assert(numInstsInROB > 0); + + // Get the head ROB instruction. + DynInstPtr head_inst = cpu->instList.front(); + + // Make certain this can retire. + assert(head_inst->readyToCommit()); + + DPRINTF(ROB, "ROB: Retiring head instruction of the ROB, " + "instruction PC %#x, seq num %i\n", head_inst->readPC(), + head_inst->seqNum); + + // Keep track of how many instructions are in the ROB. + --numInstsInROB; + + // Tell CPU to remove the instruction from the list of instructions. + // A special case is needed if the instruction being retired is the + // only instruction in the ROB; otherwise the tail iterator will become + // invalidated. + cpu->removeFrontInst(head_inst); + + if (numInstsInROB == 0) { + tail = cpu->instList.end(); + } +} + +template <class Impl> +bool +ROB<Impl>::isHeadReady() +{ + if (numInstsInROB != 0) { + return cpu->instList.front()->readyToCommit(); + } + + return false; +} + +template <class Impl> +unsigned +ROB<Impl>::numFreeEntries() +{ + assert(numInstsInROB == countInsts()); + + return numEntries - numInstsInROB; +} + +template <class Impl> +void +ROB<Impl>::doSquash() +{ + DPRINTF(ROB, "ROB: Squashing instructions.\n"); + + assert(squashIt != cpu->instList.end()); + + for (int numSquashed = 0; + numSquashed < squashWidth && (*squashIt)->seqNum != squashedSeqNum; + ++numSquashed) + { + // Ensure that the instruction is younger. + assert((*squashIt)->seqNum > squashedSeqNum); + + DPRINTF(ROB, "ROB: Squashing instruction PC %#x, seq num %i.\n", + (*squashIt)->readPC(), (*squashIt)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*squashIt)->setSquashed(); + + (*squashIt)->setCanCommit(); + + // Special case for when squashing due to a syscall. It's possible + // that the squash happened after the head instruction was already + // committed, meaning that (*squashIt)->seqNum != squashedSeqNum + // will never be false. Normally the squash would never be able + // to go past the head of the ROB; in this case it might, so it + // must be handled otherwise it will segfault. +#if !FULL_SYSTEM + if (squashIt == cpu->instList.begin()) { + DPRINTF(ROB, "ROB: Reached head of instruction list while " + "squashing.\n"); + + squashIt = cpu->instList.end(); + + doneSquashing = true; + + return; + } +#endif + + // Move the tail iterator to the next instruction. + squashIt--; + } + + + // Check if ROB is done squashing. + if ((*squashIt)->seqNum == squashedSeqNum) { + DPRINTF(ROB, "ROB: Done squashing instructions.\n"); + + squashIt = cpu->instList.end(); + + doneSquashing = true; + } +} + +template <class Impl> +void +ROB<Impl>::squash(InstSeqNum squash_num) +{ + DPRINTF(ROB, "ROB: Starting to squash within the ROB.\n"); + doneSquashing = false; + + squashedSeqNum = squash_num; + + assert(tail != cpu->instList.end()); + + squashIt = tail; + + doSquash(); +} + +template <class Impl> +uint64_t +ROB<Impl>::readHeadPC() +{ + assert(numInstsInROB == countInsts()); + + DynInstPtr head_inst = cpu->instList.front(); + + return head_inst->readPC(); +} + +template <class Impl> +uint64_t +ROB<Impl>::readHeadNextPC() +{ + assert(numInstsInROB == countInsts()); + + DynInstPtr head_inst = cpu->instList.front(); + + return head_inst->readNextPC(); +} + +template <class Impl> +InstSeqNum +ROB<Impl>::readHeadSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + DynInstPtr head_inst = cpu->instList.front(); + + return head_inst->seqNum; +} + +template <class Impl> +uint64_t +ROB<Impl>::readTailPC() +{ + assert(numInstsInROB == countInsts()); + + assert(tail != cpu->instList.end()); + + return (*tail)->readPC(); +} + +template <class Impl> +InstSeqNum +ROB<Impl>::readTailSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + return (*tail)->seqNum; +} + +#endif // __CPU_O3_CPU_ROB_IMPL_HH__ diff --git a/src/cpu/o3/sat_counter.cc b/src/cpu/o3/sat_counter.cc new file mode 100644 index 000000000..d20fff650 --- /dev/null +++ b/src/cpu/o3/sat_counter.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/misc.hh" +#include "cpu/o3/sat_counter.hh" + +SatCounter::SatCounter() + : maxVal(0), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits) + : maxVal((1 << bits) - 1), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits, unsigned initial_val) + : maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { + panic("BP: Initial counter value exceeds max size."); + } +} + +void +SatCounter::setBits(unsigned bits) +{ + maxVal = (1 << bits) - 1; +} + +void +SatCounter::increment() +{ + if(counter < maxVal) { + ++counter; + } +} + +void +SatCounter::decrement() +{ + if(counter > 0) { + --counter; + } +} diff --git a/src/cpu/o3/sat_counter.hh b/src/cpu/o3/sat_counter.hh new file mode 100644 index 000000000..b7cfe6423 --- /dev/null +++ b/src/cpu/o3/sat_counter.hh @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_SAT_COUNTER_HH__ +#define __CPU_O3_CPU_SAT_COUNTER_HH__ + +#include "sim/host.hh" + +/** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ +class SatCounter +{ + public: + /** + * Constructor for the counter. + */ + SatCounter(); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ + SatCounter(unsigned bits); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ + SatCounter(unsigned bits, unsigned initial_val); + + /** + * Sets the number of bits. + */ + void setBits(unsigned bits); + + /** + * Increments the counter's current value. + */ + void increment(); + + /** + * Decrements the counter's current value. + */ + void decrement(); + + /** + * Read the counter's value. + */ + const uint8_t read() const + { + return counter; + } + + private: + uint8_t maxVal; + uint8_t counter; +}; + +#endif // __CPU_O3_CPU_SAT_COUNTER_HH__ diff --git a/src/cpu/o3/store_set.cc b/src/cpu/o3/store_set.cc new file mode 100644 index 000000000..11023f4a8 --- /dev/null +++ b/src/cpu/o3/store_set.cc @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/trace.hh" +#include "cpu/o3/store_set.hh" + +StoreSet::StoreSet(int _SSIT_size, int _LFST_size) + : SSIT_size(_SSIT_size), LFST_size(_LFST_size) +{ + DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); + DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", + SSIT_size, LFST_size); + + SSIT = new SSID[SSIT_size]; + + validSSIT.resize(SSIT_size); + + for (int i = 0; i < SSIT_size; ++i) + validSSIT[i] = false; + + LFST = new InstSeqNum[LFST_size]; + + validLFST.resize(LFST_size); + + SSCounters = new int[LFST_size]; + + for (int i = 0; i < LFST_size; ++i) + { + validLFST[i] = false; + SSCounters[i] = 0; + } + + index_mask = SSIT_size - 1; + + offset_bits = 2; +} + +void +StoreSet::violation(Addr store_PC, Addr load_PC) +{ + int load_index = calcIndex(load_PC); + int store_index = calcIndex(store_PC); + + assert(load_index < SSIT_size && store_index < SSIT_size); + + bool valid_load_SSID = validSSIT[load_index]; + bool valid_store_SSID = validSSIT[store_index]; + + if (!valid_load_SSID && !valid_store_SSID) { + // Calculate a new SSID here. + SSID new_set = calcSSID(load_PC); + + validSSIT[load_index] = true; + + SSIT[load_index] = new_set; + + validSSIT[store_index] = true; + + SSIT[store_index] = new_set; + + assert(new_set < LFST_size); + + SSCounters[new_set]++; + + + DPRINTF(StoreSet, "StoreSet: Neither load nor store had a valid " + "storeset, creating a new one: %i for load %#x, store %#x\n", + new_set, load_PC, store_PC); + } else if (valid_load_SSID && !valid_store_SSID) { + SSID load_SSID = SSIT[load_index]; + + validSSIT[store_index] = true; + + SSIT[store_index] = load_SSID; + + assert(load_SSID < LFST_size); + + SSCounters[load_SSID]++; + + DPRINTF(StoreSet, "StoreSet: Load had a valid store set. Adding " + "store to that set: %i for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); + } else if (!valid_load_SSID && valid_store_SSID) { + SSID store_SSID = SSIT[store_index]; + + validSSIT[load_index] = true; + + SSIT[load_index] = store_SSID; + + // Because we are having a load point to an already existing set, + // the size of the store set is not incremented. + + DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for " + "load %#x, store %#x\n", + store_SSID, load_PC, store_PC); + } else { + SSID load_SSID = SSIT[load_index]; + SSID store_SSID = SSIT[store_index]; + + assert(load_SSID < LFST_size && store_SSID < LFST_size); + + int load_SS_size = SSCounters[load_SSID]; + int store_SS_size = SSCounters[store_SSID]; + + // If the load has the bigger store set, then assign the store + // to the same store set as the load. Otherwise vice-versa. + if (load_SS_size > store_SS_size) { + SSIT[store_index] = load_SSID; + + SSCounters[load_SSID]++; + SSCounters[store_SSID]--; + + DPRINTF(StoreSet, "StoreSet: Load had bigger store set: %i; " + "for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); + } else { + SSIT[load_index] = store_SSID; + + SSCounters[store_SSID]++; + SSCounters[load_SSID]--; + + DPRINTF(StoreSet, "StoreSet: Store had bigger store set: %i; " + "for load %#x, store %#x\n", + store_SSID, load_PC, store_PC); + } + } +} + +void +StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num) +{ + // Does nothing. + return; +} + +void +StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num) +{ + int index = calcIndex(store_PC); + + int store_SSID; + + assert(index < SSIT_size); + + if (!validSSIT[index]) { + // Do nothing if there's no valid entry. + return; + } else { + store_SSID = SSIT[index]; + + assert(store_SSID < LFST_size); + + // Update the last store that was fetched with the current one. + LFST[store_SSID] = store_seq_num; + + validLFST[store_SSID] = 1; + + DPRINTF(StoreSet, "Store %#x updated the LFST, SSID: %i\n", + store_PC, store_SSID); + } +} + +InstSeqNum +StoreSet::checkInst(Addr PC) +{ + int index = calcIndex(PC); + + int inst_SSID; + + assert(index < SSIT_size); + + if (!validSSIT[index]) { + DPRINTF(StoreSet, "Inst %#x with index %i had no SSID\n", + PC, index); + + // Return 0 if there's no valid entry. + return 0; + } else { + inst_SSID = SSIT[index]; + + assert(inst_SSID < LFST_size); + + if (!validLFST[inst_SSID]) { + + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no " + "dependency\n", PC, index, inst_SSID); + + return 0; + } else { + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had LFST " + "inum of %i\n", PC, index, inst_SSID, LFST[inst_SSID]); + + return LFST[inst_SSID]; + } + } +} + +void +StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) +{ + // This only is updated upon a store being issued. + if (!is_store) { + return; + } + + int index = calcIndex(issued_PC); + + int store_SSID; + + assert(index < SSIT_size); + + // Make sure the SSIT still has a valid entry for the issued store. + if (!validSSIT[index]) { + return; + } + + store_SSID = SSIT[index]; + + assert(store_SSID < LFST_size); + + // If the last fetched store in the store set refers to the store that + // was just issued, then invalidate the entry. + if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) { + DPRINTF(StoreSet, "StoreSet: store invalidated itself in LFST.\n"); + validLFST[store_SSID] = false; + } +} + +void +StoreSet::squash(InstSeqNum squashed_num) +{ + // Not really sure how to do this well. + // Generally this is small enough that it should be okay; short circuit + // evaluation should take care of invalid entries. + + DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n", + squashed_num); + + for (int i = 0; i < LFST_size; ++i) { + if (validLFST[i] && LFST[i] < squashed_num) { + validLFST[i] = false; + } + } +} + +void +StoreSet::clear() +{ + for (int i = 0; i < SSIT_size; ++i) { + validSSIT[i] = false; + } + + for (int i = 0; i < LFST_size; ++i) { + validLFST[i] = false; + } +} + diff --git a/src/cpu/o3/store_set.hh b/src/cpu/o3/store_set.hh new file mode 100644 index 000000000..5a885d838 --- /dev/null +++ b/src/cpu/o3/store_set.hh @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_STORE_SET_HH__ +#define __CPU_O3_CPU_STORE_SET_HH__ + +#include <vector> + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" + +class StoreSet +{ + public: + typedef unsigned SSID; + + public: + StoreSet(int SSIT_size, int LFST_size); + + void violation(Addr store_PC, Addr load_PC); + + void insertLoad(Addr load_PC, InstSeqNum load_seq_num); + + void insertStore(Addr store_PC, InstSeqNum store_seq_num); + + InstSeqNum checkInst(Addr PC); + + void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store); + + void squash(InstSeqNum squashed_num); + + void clear(); + + private: + inline int calcIndex(Addr PC) + { return (PC >> offset_bits) & index_mask; } + + inline SSID calcSSID(Addr PC) + { return ((PC ^ (PC >> 10)) % LFST_size); } + + SSID *SSIT; + + std::vector<bool> validSSIT; + + InstSeqNum *LFST; + + std::vector<bool> validLFST; + + int *SSCounters; + + int SSIT_size; + + int LFST_size; + + int index_mask; + + // HACK: Hardcoded for now. + int offset_bits; +}; + +#endif // __CPU_O3_CPU_STORE_SET_HH__ diff --git a/src/cpu/o3/tournament_pred.cc b/src/cpu/o3/tournament_pred.cc new file mode 100644 index 000000000..3fb580510 --- /dev/null +++ b/src/cpu/o3/tournament_pred.cc @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/tournament_pred.hh" + +TournamentBP::TournamentBP(unsigned _local_predictor_size, + unsigned _local_ctr_bits, + unsigned _local_history_table_size, + unsigned _local_history_bits, + unsigned _global_predictor_size, + unsigned _global_ctr_bits, + unsigned _global_history_bits, + unsigned _choice_predictor_size, + unsigned _choice_ctr_bits, + unsigned _instShiftAmt) + : localPredictorSize(_local_predictor_size), + localCtrBits(_local_ctr_bits), + localHistoryTableSize(_local_history_table_size), + localHistoryBits(_local_history_bits), + globalPredictorSize(_global_predictor_size), + globalCtrBits(_global_ctr_bits), + globalHistoryBits(_global_history_bits), + choicePredictorSize(_global_predictor_size), + choiceCtrBits(_choice_ctr_bits), + instShiftAmt(_instShiftAmt) +{ + //Should do checks here to make sure sizes are correct (powers of 2) + + //Setup the array of counters for the local predictor + localCtrs = new SatCounter[localPredictorSize]; + + for (int i = 0; i < localPredictorSize; ++i) + localCtrs[i].setBits(localCtrBits); + + //Setup the history table for the local table + localHistoryTable = new unsigned[localHistoryTableSize]; + + for (int i = 0; i < localHistoryTableSize; ++i) + localHistoryTable[i] = 0; + + // Setup the local history mask + localHistoryMask = (1 << localHistoryBits) - 1; + + //Setup the array of counters for the global predictor + globalCtrs = new SatCounter[globalPredictorSize]; + + for (int i = 0; i < globalPredictorSize; ++i) + globalCtrs[i].setBits(globalCtrBits); + + //Clear the global history + globalHistory = 0; + // Setup the global history mask + globalHistoryMask = (1 << globalHistoryBits) - 1; + + //Setup the array of counters for the choice predictor + choiceCtrs = new SatCounter[choicePredictorSize]; + + for (int i = 0; i < choicePredictorSize; ++i) + choiceCtrs[i].setBits(choiceCtrBits); + + threshold = (1 << (localCtrBits - 1)) - 1; + threshold = threshold / 2; +} + +inline +unsigned +TournamentBP::calcLocHistIdx(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1); +} + +inline +void +TournamentBP::updateHistoriesTaken(unsigned local_history_idx) +{ + globalHistory = (globalHistory << 1) | 1; + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1) | 1; +} + +inline +void +TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx) +{ + globalHistory = (globalHistory << 1); + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1); +} + +bool +TournamentBP::lookup(Addr &branch_addr) +{ + uint8_t local_prediction; + unsigned local_history_idx; + unsigned local_predictor_idx; + + uint8_t global_prediction; + uint8_t choice_prediction; + + //Lookup in the local predictor to get its branch prediction + local_history_idx = calcLocHistIdx(branch_addr); + local_predictor_idx = localHistoryTable[local_history_idx] + & localHistoryMask; + local_prediction = localCtrs[local_predictor_idx].read(); + + //Lookup in the global predictor to get its branch prediction + global_prediction = globalCtrs[globalHistory].read(); + + //Lookup in the choice predictor to see which one to use + choice_prediction = choiceCtrs[globalHistory].read(); + + //@todo Put a threshold value in for the three predictors that can + // be set through the constructor (so this isn't hard coded). + //Also should put some of this code into functions. + if (choice_prediction > threshold) { + if (global_prediction > threshold) { + updateHistoriesTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].increment(); + localCtrs[local_history_idx].increment(); + + return true; + } else { + updateHistoriesNotTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].decrement(); + localCtrs[local_history_idx].decrement(); + + return false; + } + } else { + if (local_prediction > threshold) { + updateHistoriesTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].increment(); + localCtrs[local_history_idx].increment(); + + return true; + } else { + updateHistoriesNotTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].decrement(); + localCtrs[local_history_idx].decrement(); + + return false; + } + } +} + +// Update the branch predictor if it predicted a branch wrong. +void +TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken) +{ + + uint8_t local_prediction; + unsigned local_history_idx; + unsigned local_predictor_idx; + bool local_pred_taken; + + uint8_t global_prediction; + bool global_pred_taken; + + // Load the correct global history into the register. + globalHistory = correct_gh; + + // Get the local predictor's current prediction, remove the incorrect + // update, and update the local predictor + local_history_idx = calcLocHistIdx(branch_addr); + local_predictor_idx = localHistoryTable[local_history_idx]; + local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask; + + local_prediction = localCtrs[local_predictor_idx].read(); + local_pred_taken = local_prediction > threshold; + + //Get the global predictor's current prediction, and update the + //global predictor + global_prediction = globalCtrs[globalHistory].read(); + global_pred_taken = global_prediction > threshold; + + //Update the choice predictor to tell it which one was correct + if (local_pred_taken != global_pred_taken) { + //If the local prediction matches the actual outcome, decerement + //the counter. Otherwise increment the counter. + if (local_pred_taken == taken) { + choiceCtrs[globalHistory].decrement(); + } else { + choiceCtrs[globalHistory].increment(); + } + } + + if (taken) { + assert(globalHistory < globalPredictorSize && + local_predictor_idx < localPredictorSize); + + localCtrs[local_predictor_idx].increment(); + globalCtrs[globalHistory].increment(); + + globalHistory = (globalHistory << 1) | 1; + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] |= 1; + } + else { + assert(globalHistory < globalPredictorSize && + local_predictor_idx < localPredictorSize); + + localCtrs[local_predictor_idx].decrement(); + globalCtrs[globalHistory].decrement(); + + globalHistory = (globalHistory << 1); + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] &= ~1; + } +} diff --git a/src/cpu/o3/tournament_pred.hh b/src/cpu/o3/tournament_pred.hh new file mode 100644 index 000000000..cb93c2f67 --- /dev/null +++ b/src/cpu/o3/tournament_pred.hh @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_TOURNAMENT_PRED_HH__ +#define __CPU_O3_CPU_TOURNAMENT_PRED_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" +#include "cpu/o3/sat_counter.hh" + +class TournamentBP +{ + public: + /** + * Default branch predictor constructor. + */ + TournamentBP(unsigned local_predictor_size, + unsigned local_ctr_bits, + unsigned local_history_table_size, + unsigned local_history_bits, + unsigned global_predictor_size, + unsigned global_history_bits, + unsigned global_ctr_bits, + unsigned choice_predictor_size, + unsigned choice_ctr_bits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, unsigned global_history, bool taken); + + inline unsigned readGlobalHist() { return globalHistory; } + + private: + + inline bool getPrediction(uint8_t &count); + + inline unsigned calcLocHistIdx(Addr &branch_addr); + + inline void updateHistoriesTaken(unsigned local_history_idx); + + inline void updateHistoriesNotTaken(unsigned local_history_idx); + + /** Local counters. */ + SatCounter *localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + + /** Number of bits of the local predictor's counters. */ + unsigned localCtrBits; + + /** Array of local history table entries. */ + unsigned *localHistoryTable; + + /** Size of the local history table. */ + unsigned localHistoryTableSize; + + /** Number of bits for each entry of the local history table. + * @todo Doesn't this come from the size of the local predictor? + */ + unsigned localHistoryBits; + + /** Mask to get the proper local history. */ + unsigned localHistoryMask; + + + /** Array of counters that make up the global predictor. */ + SatCounter *globalCtrs; + + /** Size of the global predictor. */ + unsigned globalPredictorSize; + + /** Number of bits of the global predictor's counters. */ + unsigned globalCtrBits; + + /** Global history register. */ + unsigned globalHistory; + + /** Number of bits for the global history. */ + unsigned globalHistoryBits; + + /** Mask to get the proper global history. */ + unsigned globalHistoryMask; + + + /** Array of counters that make up the choice predictor. */ + SatCounter *choiceCtrs; + + /** Size of the choice predictor (identical to the global predictor). */ + unsigned choicePredictorSize; + + /** Number of bits of the choice predictor's counters. */ + unsigned choiceCtrBits; + + /** Number of bits to shift the instruction over to get rid of the word + * offset. + */ + unsigned instShiftAmt; + + /** Threshold for the counter value; above the threshold is taken, + * equal to or below the threshold is not taken. + */ + unsigned threshold; +}; + +#endif // __CPU_O3_CPU_TOURNAMENT_PRED_HH__ diff --git a/src/cpu/op_class.cc b/src/cpu/op_class.cc new file mode 100644 index 000000000..00136ded5 --- /dev/null +++ b/src/cpu/op_class.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/op_class.hh" + +/** OpClass enum -> description string */ +const char * +opClassStrings[Num_OpClasses] = +{ + "(null)", + "IntAlu", + "IntMult", + "IntDiv", + "FloatAdd", + "FloatCmp", + "FloatCvt", + "FloatMult", + "FloatDiv", + "FloatSqrt", + "MemRead", + "MemWrite", + "IprAccess", + "InstPrefetch" +}; + diff --git a/src/cpu/op_class.hh b/src/cpu/op_class.hh new file mode 100644 index 000000000..cdb40a0fb --- /dev/null +++ b/src/cpu/op_class.hh @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU__OP_CLASS_HH__ +#define __CPU__OP_CLASS_HH__ + +/** + * @file + * Definition of operation classes. + */ + +/** + * Instruction operation classes. These classes are used for + * assigning instructions to functional units. + */ +enum OpClass { + No_OpClass = 0, ///< Instruction does not use a functional unit + IntAluOp, ///< Integer ALU operaton (add/sub/logical) + IntMultOp, ///< Integer multiply + IntDivOp, ///< Integer divide + FloatAddOp, ///< Floating point add/subtract + FloatCmpOp, ///< Floating point comparison + FloatCvtOp, ///< Floating point<->integer conversion + FloatMultOp, ///< Floating point multiply + FloatDivOp, ///< Floating point divide + FloatSqrtOp, ///< Floating point square root + MemReadOp, ///< Memory read port + MemWriteOp, ///< Memory write port + IprAccessOp, ///< Internal Processor Register read/write port + InstPrefetchOp, ///< Instruction prefetch port (on I-cache) + Num_OpClasses ///< Total number of operation classes +}; + +/** + * Array mapping OpClass enum values to strings. Defined in op_class.cc. + */ +extern const char *opClassStrings[]; + +#endif // __CPU__OP_CLASS_HH__ diff --git a/src/cpu/ozone/cpu.cc b/src/cpu/ozone/cpu.cc new file mode 100644 index 000000000..cbeca9d3b --- /dev/null +++ b/src/cpu/ozone/cpu.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ooo_cpu/ooo_cpu_impl.hh" +#include "cpu/ooo_cpu/ooo_dyn_inst.hh" +#include "cpu/ooo_cpu/ooo_impl.hh" + +template class OoOCPU<OoOImpl>; diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh new file mode 100644 index 000000000..fa849bb09 --- /dev/null +++ b/src/cpu/ozone/cpu.hh @@ -0,0 +1,638 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OOO_CPU_OOO_CPU_HH__ +#define __CPU_OOO_CPU_OOO_CPU_HH__ + +#include "base/statistics.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "encumbered/cpu/full/fu_pool.hh" +#include "cpu/ooo_cpu/ea_list.hh" +#include "cpu/pc_event.hh" +#include "cpu/static_inst.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" + +// forward declarations +#if FULL_SYSTEM +class Processor; +class AlphaITB; +class AlphaDTB; +class PhysicalMemory; + +class RemoteGDB; +class GDBListener; + +#else + +class Process; + +#endif // FULL_SYSTEM + +class Checkpoint; +class MemInterface; + +namespace Trace { + class InstRecord; +} + +/** + * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with + * simple out-of-order capabilities added to it. It is still a 1 CPI machine + * (?), but is capable of handling cache misses. Basically it models having + * a ROB/IQ by only allowing a certain amount of instructions to execute while + * the cache miss is outstanding. + */ + +template <class Impl> +class OoOCPU : public BaseCPU +{ + private: + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + + public: + // main simulation loop (one cycle) + void tick(); + + private: + struct TickEvent : public Event + { + OoOCPU *cpu; + int width; + + TickEvent(OoOCPU *c, int w); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + /// Schedule tick event, regardless of its current state. + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) + tickEvent.reschedule(curTick + delay); + else if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + delay); + } + + /// Unschedule tick event, regardless of its current state. + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + private: + Trace::InstRecord *traceData; + + template<typename T> + void trace_data(T data); + + public: + // + enum Status { + Running, + Idle, + IcacheMiss, + IcacheMissComplete, + DcacheMissStall, + SwitchedOut + }; + + private: + Status _status; + + public: + void post_interrupt(int int_num, int index); + + void zero_fill_64(Addr addr) { + static int warned = 0; + if (!warned) { + warn ("WH64 is not implemented"); + warned = 1; + } + }; + + struct Params : public BaseCPU::Params + { + MemInterface *icache_interface; + MemInterface *dcache_interface; + int width; +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; + FunctionalMemory *mem; +#else + Process *process; +#endif + int issueWidth; + }; + + OoOCPU(Params *params); + + virtual ~OoOCPU(); + + void init(); + + private: + void copyFromXC(); + + public: + // execution context + ExecContext *xc; + + void switchOut(); + void takeOverFrom(BaseCPU *oldCPU); + +#if FULL_SYSTEM + Addr dbg_vtophys(Addr addr); + + bool interval_stats; +#endif + + // L1 instruction cache + MemInterface *icacheInterface; + + // L1 data cache + MemInterface *dcacheInterface; + + FuncUnitPool *fuPool; + + // Refcounted pointer to the one memory request. + MemReqPtr cacheMemReq; + + class ICacheCompletionEvent : public Event + { + private: + OoOCPU *cpu; + + public: + ICacheCompletionEvent(OoOCPU *_cpu); + + virtual void process(); + virtual const char *description(); + }; + + // Will need to create a cache completion event upon any memory miss. + ICacheCompletionEvent iCacheCompletionEvent; + + class DCacheCompletionEvent; + + typedef typename + std::list<DCacheCompletionEvent>::iterator DCacheCompEventIt; + + class DCacheCompletionEvent : public Event + { + private: + OoOCPU *cpu; + DynInstPtr inst; + DCacheCompEventIt dcceIt; + + public: + DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst, + DCacheCompEventIt &_dcceIt); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + protected: + std::list<DCacheCompletionEvent> dCacheCompList; + DCacheCompEventIt dcceIt; + + private: + Status status() const { return _status; } + + virtual void activateContext(int thread_num, int delay); + virtual void suspendContext(int thread_num); + virtual void deallocateContext(int thread_num); + virtual void haltContext(int thread_num); + + // statistics + virtual void regStats(); + virtual void resetStats(); + + // number of simulated instructions + Counter numInst; + Counter startNumInst; + Stats::Scalar<> numInsts; + + virtual Counter totalInstructions() const + { + return numInst - startNumInst; + } + + // number of simulated memory references + Stats::Scalar<> numMemRefs; + + // number of simulated loads + Counter numLoad; + Counter startNumLoad; + + // number of idle cycles + Stats::Average<> notIdleFraction; + Stats::Formula idleFraction; + + // number of cycles stalled for I-cache misses + Stats::Scalar<> icacheStallCycles; + Counter lastIcacheStall; + + // number of cycles stalled for D-cache misses + Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + void processICacheCompletion(); + + public: + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#if FULL_SYSTEM + bool validInstAddr(Addr addr) { return true; } + bool validDataAddr(Addr addr) { return true; } + int getInstAsid() { return xc->regs.instAsid(); } + int getDataAsid() { return xc->regs.dataAsid(); } + + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + bool validInstAddr(Addr addr) + { return xc->validInstAddr(addr); } + + bool validDataAddr(Addr addr) + { return xc->validDataAddr(addr); } + + int getInstAsid() { return xc->asid; } + int getDataAsid() { return xc->asid; } + + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return NoFault; + } + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + +#endif + + template <class T> + Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst); + + template <class T> + Fault write(T data, Addr addr, unsigned flags, + uint64_t *res, DynInstPtr inst); + + void prefetch(Addr addr, unsigned flags) + { + // need to do this... + } + + void writeHint(Addr addr, int size, unsigned flags) + { + // need to do this... + } + + Fault copySrcTranslate(Addr src); + + Fault copy(Addr dest); + + private: + bool executeInst(DynInstPtr &inst); + + void renameInst(DynInstPtr &inst); + + void addInst(DynInstPtr &inst); + + void commitHeadInst(); + + bool getOneInst(); + + Fault fetchCacheLine(); + + InstSeqNum getAndIncrementInstSeq(); + + bool ambigMemAddr; + + private: + InstSeqNum globalSeqNum; + + DynInstPtr renameTable[TheISA::TotalNumRegs]; + DynInstPtr commitTable[TheISA::TotalNumRegs]; + + // Might need a table of the shadow registers as well. +#if FULL_SYSTEM + DynInstPtr palShadowTable[TheISA::NumIntRegs]; +#endif + + public: + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + // In the OoO case these shouldn't read from the XC but rather from the + // rename table of DynInsts. Also these likely shouldn't be called very + // often, other than when adding things into the xc during say a syscall. + + uint64_t readIntReg(StaticInst *si, int idx) + { + return xc->readIntReg(si->srcRegIdx(idx)); + } + + FloatReg readFloatReg(StaticInst *si, int idx, width) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatReg(reg_idx, width); + } + + FloatReg readFloatReg(StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatReg(reg_idx); + } + + FloatRegBits readFloatRegBits(StaticInst *si, int idx, int width) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatRegBits(reg_idx, width); + } + + FloatRegBits readFloatRegBits(StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatRegBits(reg_idx); + } + + void setIntReg(StaticInst *si, int idx, uint64_t val) + { + xc->setIntReg(si->destRegIdx(idx), val); + } + + void setFloatReg(StaticInst *si, int idx, FloatReg val, int width) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatReg(reg_idx, val, width); + } + + void setFloatReg(StaticInst *si, int idx, FloatReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatReg(reg_idx, val); + } + + void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val, int width) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatRegBits(reg_idx, val, width); + } + + void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatRegBits(reg_idx, val); + } + + uint64_t readPC() { return PC; } + void setNextPC(Addr val) { nextPC = val; } + + private: + Addr PC; + Addr nextPC; + + unsigned issueWidth; + + bool fetchRedirExcp; + bool fetchRedirBranch; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + unsigned cacheBlkSize; + + Addr cacheBlkPC; + + /** The cache line being fetched. */ + uint8_t *cacheData; + + protected: + bool cacheBlkValid; + + private: + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = TheISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + unsigned instSize; + + // ROB tracking stuff. + DynInstPtr robHeadPtr; + DynInstPtr robTailPtr; + unsigned robSize; + unsigned robInsts; + + // List of outstanding EA instructions. + protected: + EAList eaList; + + public: + void branchToTarget(Addr val) + { + if (!fetchRedirExcp) { + fetchRedirBranch = true; + PC = val; + } + } + + // ISA stuff: + uint64_t readUniq() { return xc->readUniq(); } + void setUniq(uint64_t val) { xc->setUniq(val); } + + uint64_t readFpcr() { return xc->readFpcr(); } + void setFpcr(uint64_t val) { xc->setFpcr(val); } + +#if FULL_SYSTEM + uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); } + Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); } + Fault hwrei() { return xc->hwrei(); } + int readIntrFlag() { return xc->readIntrFlag(); } + void setIntrFlag(int val) { xc->setIntrFlag(val); } + bool inPalMode() { return xc->inPalMode(); } + void trap(Fault fault) { fault->invoke(xc); } + bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); } +#else + void syscall() { xc->syscall(); } +#endif + + ExecContext *xcBase() { return xc; } +}; + + +// precise architected memory state accessor macros +template <class Impl> +template <class T> +Fault +OoOCPU<Impl>::read(Addr addr, T &data, unsigned flags, DynInstPtr inst) +{ + MemReqPtr readReq = new MemReq(); + readReq->xc = xc; + readReq->asid = 0; + readReq->data = new uint8_t[64]; + + readReq->reset(addr, sizeof(T), flags); + + // translate to physical address - This might be an ISA impl call + Fault fault = translateDataReadReq(readReq); + + // do functional access + if (fault == NoFault) + fault = xc->mem->read(readReq, data); +#if 0 + if (traceData) { + traceData->setAddr(addr); + if (fault == NoFault) + traceData->setData(data); + } +#endif + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + readReq->cmd = Read; + readReq->completionEvent = NULL; + readReq->time = curTick; + /*MemAccessResult result = */dcacheInterface->access(readReq); + + if (dcacheInterface->doEvents()) { + readReq->completionEvent = new DCacheCompletionEvent(this, inst, + dcceIt); + } + } + + if (!dcacheInterface && (readReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); + + return fault; +} + +template <class Impl> +template <class T> +Fault +OoOCPU<Impl>::write(T data, Addr addr, unsigned flags, + uint64_t *res, DynInstPtr inst) +{ + MemReqPtr writeReq = new MemReq(); + writeReq->xc = xc; + writeReq->asid = 0; + writeReq->data = new uint8_t[64]; + +#if 0 + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } +#endif + + writeReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = translateDataWriteReq(writeReq); + + // do functional access + if (fault == NoFault) + fault = xc->write(writeReq, data); + + if (fault == NoFault && dcacheInterface) { + writeReq->cmd = Write; + memcpy(writeReq->data,(uint8_t *)&data,writeReq->size); + writeReq->completionEvent = NULL; + writeReq->time = curTick; + /*MemAccessResult result = */dcacheInterface->access(writeReq); + + if (dcacheInterface->doEvents()) { + writeReq->completionEvent = new DCacheCompletionEvent(this, inst, + dcceIt); + } + } + + if (res && (fault == NoFault)) + *res = writeReq->result; + + if (!dcacheInterface && (writeReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); + + return fault; +} + + +#endif // __CPU_OOO_CPU_OOO_CPU_HH__ diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh new file mode 100644 index 000000000..e7ed3cfe0 --- /dev/null +++ b/src/cpu/ozone/cpu_impl.hh @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OOO_CPU_OOO_IMPL_HH__ +#define __CPU_OOO_CPU_OOO_IMPL_HH__ + +#include "arch/isa_traits.hh" + +template <class Impl> +class OoOCPU; + +template <class Impl> +class OoODynInst; + +struct OoOImpl { + typedef AlphaISA ISA; + typedef OoOCPU<OoOImpl> OoOCPU; + typedef OoOCPU FullCPU; + typedef OoODynInst<OoOImpl> DynInst; + typedef RefCountingPtr<DynInst> DynInstPtr; +}; + +#endif // __CPU_OOO_CPU_OOO_IMPL_HH__ diff --git a/src/cpu/ozone/ea_list.cc b/src/cpu/ozone/ea_list.cc new file mode 100644 index 000000000..6114a0ca1 --- /dev/null +++ b/src/cpu/ozone/ea_list.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ooo_cpu/ea_list.hh" + +void +EAList::addAddr(const InstSeqNum &new_sn, const Addr &new_ea) +{ + instEA newEA(new_sn, new_ea); + + eaList.push_back(newEA); +} + +void +EAList::clearAddr(const InstSeqNum &sn_to_clear, const Addr &ea_to_clear) +{ + eaListIt list_it = eaList.begin(); + + while (list_it != eaList.end() && (*list_it).first != sn_to_clear) { + assert((*list_it).second == ea_to_clear); + } +} + +bool +EAList::checkConflict(const InstSeqNum &check_sn, const Addr &check_ea) const +{ + const constEAListIt list_it = eaList.begin(); + + while (list_it != eaList.end() && (*list_it).first < check_sn) { + if ((*list_it).second == check_ea) { + return true; + } + } + + return false; +} + +void +EAList::clear() +{ + eaList.clear(); +} + +void +EAList::commit(const InstSeqNum &commit_sn) +{ + while (!eaList.empty() && eaList.front().first <= commit_sn) { + eaList.pop_front(); + } +} diff --git a/src/cpu/ozone/ea_list.hh b/src/cpu/ozone/ea_list.hh new file mode 100644 index 000000000..c0eee4bb8 --- /dev/null +++ b/src/cpu/ozone/ea_list.hh @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_EA_LIST_HH__ +#define __CPU_EA_LIST_HH__ + +#include <list> +#include <utility> + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" + +/** + * Simple class to hold onto a list of pairs, each pair having a memory + * instruction's sequence number and effective addr. This list can be used + * for memory disambiguation. However, if I ever want to forward results, I + * may have to use a list that holds DynInstPtrs. Hence this may change in + * the future. + */ +class EAList { + private: + typedef std::pair<InstSeqNum, Addr> instEA; + typedef std::list<instEA>::iterator eaListIt; + typedef std::list<instEA>::const_iterator constEAListIt; + + std::list<instEA> eaList; + + public: + EAList() { } + ~EAList() { } + + void addAddr(const InstSeqNum &new_sn, const Addr &new_ea); + + void clearAddr(const InstSeqNum &sn_to_clear, const Addr &ea_to_clear); + + /** Checks if any instructions older than check_sn have a conflicting + * address with check_ea. Note that this function does not handle the + * sequence number rolling over. + */ + bool checkConflict(const InstSeqNum &check_sn, const Addr &check_ea) const; + + void clear(); + + void commit(const InstSeqNum &commit_sn); +}; + +#endif // __CPU_EA_LIST_HH__ diff --git a/src/cpu/pc_event.cc b/src/cpu/pc_event.cc new file mode 100644 index 000000000..050bf1a88 --- /dev/null +++ b/src/cpu/pc_event.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <algorithm> +#include <map> +#include <string> +#include <utility> + +#include "base/trace.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "cpu/pc_event.hh" +#include "sim/debug.hh" +#include "sim/root.hh" +#include "sim/system.hh" + +using namespace std; + +PCEventQueue::PCEventQueue() +{} + +PCEventQueue::~PCEventQueue() +{} + +bool +PCEventQueue::remove(PCEvent *event) +{ + int removed = 0; + range_t range = equal_range(event); + for (iterator i = range.first; i != range.second; ++i) { + if (*i == event) { + DPRINTF(PCEvent, "PC based event removed at %#x: %s\n", + event->pc(), event->descr()); + pc_map.erase(i); + ++removed; + } + } + + return removed > 0; +} + +bool +PCEventQueue::schedule(PCEvent *event) +{ + pc_map.push_back(event); + sort(pc_map.begin(), pc_map.end(), MapCompare()); + + DPRINTF(PCEvent, "PC based event scheduled for %#x: %s\n", + event->pc(), event->descr()); + + return true; +} + +bool +PCEventQueue::doService(ExecContext *xc) +{ + Addr pc = xc->readPC() & ~0x3; + int serviced = 0; + range_t range = equal_range(pc); + for (iterator i = range.first; i != range.second; ++i) { + // Make sure that the pc wasn't changed as the side effect of + // another event. This for example, prevents two invocations + // of the SkipFuncEvent. Maybe we should have separate PC + // event queues for each processor? + if (pc != (xc->readPC() & ~0x3)) + continue; + + DPRINTF(PCEvent, "PC based event serviced at %#x: %s\n", + (*i)->pc(), (*i)->descr()); + + (*i)->process(xc); + ++serviced; + } + + return serviced > 0; +} + +void +PCEventQueue::dump() const +{ + const_iterator i = pc_map.begin(); + const_iterator e = pc_map.end(); + + for (; i != e; ++i) + cprintf("%d: event at %#x: %s\n", curTick, (*i)->pc(), + (*i)->descr()); +} + +PCEventQueue::range_t +PCEventQueue::equal_range(Addr pc) +{ + return std::equal_range(pc_map.begin(), pc_map.end(), pc, MapCompare()); +} + +BreakPCEvent::BreakPCEvent(PCEventQueue *q, const std::string &desc, Addr addr, + bool del) + : PCEvent(q, desc, addr), remove(del) +{ +} + +void +BreakPCEvent::process(ExecContext *xc) +{ + StringWrap name(xc->getCpuPtr()->name() + ".break_event"); + DPRINTFN("break event %s triggered\n", descr()); + debug_break(); + if (remove) + delete this; +} + +#if FULL_SYSTEM +extern "C" +void +sched_break_pc_sys(System *sys, Addr addr) +{ + new BreakPCEvent(&sys->pcEventQueue, "debug break", addr, true); +} + +extern "C" +void +sched_break_pc(Addr addr) +{ + for (vector<System *>::iterator sysi = System::systemList.begin(); + sysi != System::systemList.end(); ++sysi) { + sched_break_pc_sys(*sysi, addr); + } + +} +#endif diff --git a/src/cpu/pc_event.hh b/src/cpu/pc_event.hh new file mode 100644 index 000000000..32b7f3ef5 --- /dev/null +++ b/src/cpu/pc_event.hh @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PC_EVENT_HH__ +#define __PC_EVENT_HH__ + +#include <vector> + +#include "base/misc.hh" + +class ExecContext; +class PCEventQueue; + +class PCEvent +{ + protected: + std::string description; + PCEventQueue *queue; + Addr evpc; + + public: + PCEvent(PCEventQueue *q, const std::string &desc, Addr pc); + + virtual ~PCEvent() { if (queue) remove(); } + + // for DPRINTF + virtual const std::string name() const { return description; } + + std::string descr() const { return description; } + Addr pc() const { return evpc; } + + bool remove(); + virtual void process(ExecContext *xc) = 0; +}; + +class PCEventQueue +{ + protected: + typedef PCEvent * record_t; + class MapCompare { + public: + bool operator()(const record_t &l, const record_t &r) const { + return l->pc() < r->pc(); + } + bool operator()(const record_t &l, Addr pc) const { + return l->pc() < pc; + } + bool operator()(Addr pc, const record_t &r) const { + return pc < r->pc(); + } + }; + typedef std::vector<record_t> map_t; + + public: + typedef map_t::iterator iterator; + typedef map_t::const_iterator const_iterator; + + protected: + typedef std::pair<iterator, iterator> range_t; + typedef std::pair<const_iterator, const_iterator> const_range_t; + + protected: + map_t pc_map; + + bool doService(ExecContext *xc); + + public: + PCEventQueue(); + ~PCEventQueue(); + + bool remove(PCEvent *event); + bool schedule(PCEvent *event); + bool service(ExecContext *xc) + { + if (pc_map.empty()) + return false; + + return doService(xc); + } + + range_t equal_range(Addr pc); + range_t equal_range(PCEvent *event) { return equal_range(event->pc()); } + + void dump() const; +}; + + +inline +PCEvent::PCEvent(PCEventQueue *q, const std::string &desc, Addr pc) + : description(desc), queue(q), evpc(pc) +{ + queue->schedule(this); +} + +inline bool +PCEvent::remove() +{ + if (!queue) + panic("cannot remove an uninitialized event;"); + + return queue->remove(this); +} + +class BreakPCEvent : public PCEvent +{ + protected: + bool remove; + + public: + BreakPCEvent(PCEventQueue *q, const std::string &desc, Addr addr, + bool del = false); + virtual void process(ExecContext *xc); +}; + +#endif // __PC_EVENT_HH__ diff --git a/src/cpu/profile.cc b/src/cpu/profile.cc new file mode 100644 index 000000000..fe3458b61 --- /dev/null +++ b/src/cpu/profile.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string> + +#include "base/bitfield.hh" +#include "base/callback.hh" +#include "base/statistics.hh" +#include "base/trace.hh" +#include "base/loader/symtab.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "cpu/profile.hh" + +using namespace std; + +ProfileNode::ProfileNode() + : count(0) +{ } + +void +ProfileNode::dump(const string &symbol, uint64_t id, const SymbolTable *symtab, + ostream &os) const +{ + ccprintf(os, "%#x %s %d ", id, symbol, count); + ChildList::const_iterator i, end = children.end(); + for (i = children.begin(); i != end; ++i) { + const ProfileNode *node = i->second; + ccprintf(os, "%#x ", (intptr_t)node); + } + + ccprintf(os, "\n"); + + for (i = children.begin(); i != end; ++i) { + Addr addr = i->first; + string symbol; + if (addr == 1) + symbol = "user"; + else if (addr == 2) + symbol = "console"; + else if (addr == 3) + symbol = "unknown"; + else if (!symtab->findSymbol(addr, symbol)) + panic("could not find symbol for address %#x\n", addr); + + const ProfileNode *node = i->second; + node->dump(symbol, (intptr_t)node, symtab, os); + } +} + +void +ProfileNode::clear() +{ + count = 0; + ChildList::iterator i, end = children.end(); + for (i = children.begin(); i != end; ++i) + i->second->clear(); +} + +FunctionProfile::FunctionProfile(const SymbolTable *_symtab) + : reset(0), symtab(_symtab) +{ + reset = new MakeCallback<FunctionProfile, &FunctionProfile::clear>(this); + Stats::registerResetCallback(reset); +} + +FunctionProfile::~FunctionProfile() +{ + if (reset) + delete reset; +} + +ProfileNode * +FunctionProfile::consume(const vector<Addr> &stack) +{ + ProfileNode *current = ⊤ + for (int i = 0, size = stack.size(); i < size; ++i) { + ProfileNode *&ptr = current->children[stack[size - i - 1]]; + if (ptr == NULL) + ptr = new ProfileNode; + + current = ptr; + } + + return current; +} + +void +FunctionProfile::clear() +{ + top.clear(); + pc_count.clear(); +} + +void +FunctionProfile::dump(ExecContext *xc, ostream &os) const +{ + ccprintf(os, ">>>PC data\n"); + map<Addr, Counter>::const_iterator i, end = pc_count.end(); + for (i = pc_count.begin(); i != end; ++i) { + Addr pc = i->first; + Counter count = i->second; + + std::string symbol; + if (pc == 1) + ccprintf(os, "user %d\n", count); + else if (symtab->findSymbol(pc, symbol) && !symbol.empty()) + ccprintf(os, "%s %d\n", symbol, count); + else + ccprintf(os, "%#x %d\n", pc, count); + } + + ccprintf(os, ">>>function data\n"); + top.dump("top", 0, symtab, os); +} + +void +FunctionProfile::sample(ProfileNode *node, Addr pc) +{ + node->count++; + + Addr symaddr; + if (symtab->findNearestAddr(pc, symaddr)) { + pc_count[symaddr]++; + } else { + // record PC even if we don't have a symbol to avoid + // silently biasing the histogram + pc_count[pc]++; + } +} diff --git a/src/cpu/profile.hh b/src/cpu/profile.hh new file mode 100644 index 000000000..d55c9eec9 --- /dev/null +++ b/src/cpu/profile.hh @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_PROFILE_HH__ +#define __CPU_PROFILE_HH__ + +#include <map> + +#include "cpu/static_inst.hh" +#include "sim/host.hh" +#include "arch/stacktrace.hh" + +class ExecContext; + +class ProfileNode +{ + private: + friend class FunctionProfile; + + typedef std::map<Addr, ProfileNode *> ChildList; + ChildList children; + + public: + Counter count; + + public: + ProfileNode(); + + void dump(const std::string &symbol, uint64_t id, + const SymbolTable *symtab, std::ostream &os) const; + void clear(); +}; + +class Callback; +class FunctionProfile +{ + private: + Callback *reset; + const SymbolTable *symtab; + ProfileNode top; + std::map<Addr, Counter> pc_count; + StackTrace trace; + + public: + FunctionProfile(const SymbolTable *symtab); + ~FunctionProfile(); + + ProfileNode *consume(ExecContext *xc, StaticInstPtr inst); + ProfileNode *consume(const std::vector<Addr> &stack); + void clear(); + void dump(ExecContext *xc, std::ostream &out) const; + void sample(ProfileNode *node, Addr pc); +}; + +inline ProfileNode * +FunctionProfile::consume(ExecContext *xc, StaticInstPtr inst) +{ + if (!trace.trace(xc, inst)) + return NULL; + trace.dprintf(); + return consume(trace.getstack()); +} + +#endif // __CPU_PROFILE_HH__ diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc new file mode 100644 index 000000000..e9422b9c0 --- /dev/null +++ b/src/cpu/simple/atomic.cc @@ -0,0 +1,560 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/utility.hh" +#include "cpu/exetrace.hh" +#include "cpu/simple/atomic.hh" +#include "mem/packet_impl.hh" +#include "sim/builder.hh" + +using namespace std; +using namespace TheISA; + +AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + + +void +AtomicSimpleCPU::TickEvent::process() +{ + cpu->tick(); +} + +const char * +AtomicSimpleCPU::TickEvent::description() +{ + return "AtomicSimpleCPU tick event"; +} + + +void +AtomicSimpleCPU::init() +{ + //Create Memory Ports (conect them up) + Port *mem_dport = mem->getPort(""); + dcachePort.setPeer(mem_dport); + mem_dport->setPeer(&dcachePort); + + Port *mem_iport = mem->getPort(""); + icachePort.setPeer(mem_iport); + mem_iport->setPeer(&icachePort); + + BaseCPU::init(); +#if FULL_SYSTEM + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + + // initialize CPU, including PC + TheISA::initCPU(xc, xc->readCpuId()); + } +#endif +} + +bool +AtomicSimpleCPU::CpuPort::recvTiming(Packet *pkt) +{ + panic("AtomicSimpleCPU doesn't expect recvAtomic callback!"); + return true; +} + +Tick +AtomicSimpleCPU::CpuPort::recvAtomic(Packet *pkt) +{ + panic("AtomicSimpleCPU doesn't expect recvAtomic callback!"); + return curTick; +} + +void +AtomicSimpleCPU::CpuPort::recvFunctional(Packet *pkt) +{ + panic("AtomicSimpleCPU doesn't expect recvFunctional callback!"); +} + +void +AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); +} + +Packet * +AtomicSimpleCPU::CpuPort::recvRetry() +{ + panic("AtomicSimpleCPU doesn't expect recvRetry callback!"); + return NULL; +} + + +AtomicSimpleCPU::AtomicSimpleCPU(Params *p) + : BaseSimpleCPU(p), tickEvent(this), + width(p->width), simulate_stalls(p->simulate_stalls), + icachePort(this), dcachePort(this) +{ + _status = Idle; + + ifetch_req = new Request(true); + ifetch_req->setAsid(0); + // @todo fix me and get the real cpu iD!!! + ifetch_req->setCpuNum(0); + ifetch_req->setSize(sizeof(MachInst)); + ifetch_pkt = new Packet; + ifetch_pkt->cmd = Read; + ifetch_pkt->dataStatic(&inst); + ifetch_pkt->req = ifetch_req; + ifetch_pkt->size = sizeof(MachInst); + ifetch_pkt->dest = Packet::Broadcast; + + data_read_req = new Request(true); + // @todo fix me and get the real cpu iD!!! + data_read_req->setCpuNum(0); + data_read_req->setAsid(0); + data_read_pkt = new Packet; + data_read_pkt->cmd = Read; + data_read_pkt->dataStatic(&dataReg); + data_read_pkt->req = data_read_req; + data_read_pkt->dest = Packet::Broadcast; + + data_write_req = new Request(true); + // @todo fix me and get the real cpu iD!!! + data_write_req->setCpuNum(0); + data_write_req->setAsid(0); + data_write_pkt = new Packet; + data_write_pkt->cmd = Write; + data_write_pkt->req = data_write_req; + data_write_pkt->dest = Packet::Broadcast; +} + + +AtomicSimpleCPU::~AtomicSimpleCPU() +{ +} + +void +AtomicSimpleCPU::serialize(ostream &os) +{ + BaseSimpleCPU::serialize(os); + SERIALIZE_ENUM(_status); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); +} + +void +AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) +{ + BaseSimpleCPU::unserialize(cp, section); + UNSERIALIZE_ENUM(_status); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); +} + +void +AtomicSimpleCPU::switchOut(Sampler *s) +{ + sampler = s; + if (status() == Running) { + _status = SwitchedOut; + + tickEvent.squash(); + } + sampler->signalSwitched(); +} + + +void +AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) +{ + BaseCPU::takeOverFrom(oldCPU); + + assert(!tickEvent.scheduled()); + + // if any of this CPU's ExecContexts are active, mark the CPU as + // running and schedule its tick event. + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + if (xc->status() == ExecContext::Active && _status != Running) { + _status = Running; + tickEvent.schedule(curTick); + break; + } + } +} + + +void +AtomicSimpleCPU::activateContext(int thread_num, int delay) +{ + assert(thread_num == 0); + assert(cpuXC); + + assert(_status == Idle); + assert(!tickEvent.scheduled()); + + notIdleFraction++; + tickEvent.schedule(curTick + cycles(delay)); + _status = Running; +} + + +void +AtomicSimpleCPU::suspendContext(int thread_num) +{ + assert(thread_num == 0); + assert(cpuXC); + + assert(_status == Running); + + // tick event may not be scheduled if this gets called from inside + // an instruction's execution, e.g. "quiesce" + if (tickEvent.scheduled()) + tickEvent.deschedule(); + + notIdleFraction--; + _status = Idle; +} + + +template <class T> +Fault +AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) +{ + data_read_req->setVaddr(addr); + data_read_req->setSize(sizeof(T)); + data_read_req->setFlags(flags); + data_read_req->setTime(curTick); + + if (traceData) { + traceData->setAddr(addr); + } + + // translate to physical address + Fault fault = cpuXC->translateDataReadReq(data_read_req); + + // Now do the access. + if (fault == NoFault) { + data_read_pkt->reset(); + data_read_pkt->addr = data_read_req->getPaddr(); + data_read_pkt->size = sizeof(T); + + dcache_complete = dcachePort.sendAtomic(data_read_pkt); + dcache_access = true; + + assert(data_read_pkt->result == Success); + data = data_read_pkt->get<T>(); + + } + + // This will need a new way to tell if it has a dcache attached. + if (data_read_req->getFlags() & UNCACHEABLE) + recordEvent("Uncached Read"); + + return fault; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template +Fault +AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); + +template +Fault +AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); + +template +Fault +AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); + +template +Fault +AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags) +{ + return read(addr, *(uint64_t*)&data, flags); +} + +template<> +Fault +AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags) +{ + return read(addr, *(uint32_t*)&data, flags); +} + + +template<> +Fault +AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) +{ + return read(addr, (uint32_t&)data, flags); +} + + +template <class T> +Fault +AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + data_write_req->setVaddr(addr); + data_write_req->setTime(curTick); + data_write_req->setSize(sizeof(T)); + data_write_req->setFlags(flags); + + if (traceData) { + traceData->setAddr(addr); + } + + // translate to physical address + Fault fault = cpuXC->translateDataWriteReq(data_write_req); + + // Now do the access. + if (fault == NoFault) { + data_write_pkt->reset(); + data = htog(data); + data_write_pkt->dataStatic(&data); + data_write_pkt->addr = data_write_req->getPaddr(); + data_write_pkt->size = sizeof(T); + + dcache_complete = dcachePort.sendAtomic(data_write_pkt); + dcache_access = true; + + assert(data_write_pkt->result == Success); + + if (res && data_write_req->getFlags() & LOCKED) { + *res = data_write_req->getScResult(); + } + } + + // This will need a new way to tell if it's hooked up to a cache or not. + if (data_write_req->getFlags() & UNCACHEABLE) + recordEvent("Uncached Write"); + + // If the write needs to have a fault on the access, consider calling + // changeStatus() and changing it to "bad addr write" or something. + return fault; +} + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +template +Fault +AtomicSimpleCPU::write(uint64_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +AtomicSimpleCPU::write(uint32_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +AtomicSimpleCPU::write(uint16_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +AtomicSimpleCPU::write(uint8_t data, Addr addr, + unsigned flags, uint64_t *res); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint64_t*)&data, addr, flags, res); +} + +template<> +Fault +AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint32_t*)&data, addr, flags, res); +} + + +template<> +Fault +AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) +{ + return write((uint32_t)data, addr, flags, res); +} + + +void +AtomicSimpleCPU::tick() +{ + Tick latency = cycles(1); // instruction takes one cycle by default + + for (int i = 0; i < width; ++i) { + numCycles++; + + checkForInterrupts(); + + ifetch_req->resetMin(); + ifetch_pkt->reset(); + Fault fault = setupFetchPacket(ifetch_pkt); + + if (fault == NoFault) { + Tick icache_complete = icachePort.sendAtomic(ifetch_pkt); + // ifetch_req is initialized to read the instruction directly + // into the CPU object's inst field. + + dcache_access = false; // assume no dcache access + preExecute(); + fault = curStaticInst->execute(this, traceData); + postExecute(); + + if (traceData) { + traceData->finalize(); + } + + if (simulate_stalls) { + // This calculation assumes that the icache and dcache + // access latencies are always a multiple of the CPU's + // cycle time. If not, the next tick event may get + // scheduled at a non-integer multiple of the CPU + // cycle time. + Tick icache_stall = icache_complete - curTick - cycles(1); + Tick dcache_stall = + dcache_access ? dcache_complete - curTick - cycles(1) : 0; + latency += icache_stall + dcache_stall; + } + + } + + advancePC(fault); + } + + if (_status != Idle) + tickEvent.schedule(curTick + latency); +} + + +//////////////////////////////////////////////////////////////////////// +// +// AtomicSimpleCPU Simulation Object +// +BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) + + Param<Counter> max_insts_any_thread; + Param<Counter> max_insts_all_threads; + Param<Counter> max_loads_any_thread; + Param<Counter> max_loads_all_threads; + SimObjectParam<MemObject *> mem; + +#if FULL_SYSTEM + SimObjectParam<AlphaITB *> itb; + SimObjectParam<AlphaDTB *> dtb; + SimObjectParam<System *> system; + Param<int> cpu_id; + Param<Tick> profile; +#else + SimObjectParam<Process *> workload; +#endif // FULL_SYSTEM + + Param<int> clock; + + Param<bool> defer_registration; + Param<int> width; + Param<bool> function_trace; + Param<Tick> function_trace_start; + Param<bool> simulate_stalls; + +END_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) + + INIT_PARAM(max_insts_any_thread, + "terminate when any thread reaches this inst count"), + INIT_PARAM(max_insts_all_threads, + "terminate when all threads have reached this inst count"), + INIT_PARAM(max_loads_any_thread, + "terminate when any thread reaches this load count"), + INIT_PARAM(max_loads_all_threads, + "terminate when all threads have reached this load count"), + INIT_PARAM(mem, "memory"), + +#if FULL_SYSTEM + INIT_PARAM(itb, "Instruction TLB"), + INIT_PARAM(dtb, "Data TLB"), + INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(profile, ""), +#else + INIT_PARAM(workload, "processes to run"), +#endif // FULL_SYSTEM + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + INIT_PARAM(width, "cpu width"), + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace"), + INIT_PARAM(simulate_stalls, "Simulate cache stall cycles") + +END_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) + + +CREATE_SIM_OBJECT(AtomicSimpleCPU) +{ + AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params(); + params->name = getInstanceName(); + params->numberOfThreads = 1; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + params->deferRegistration = defer_registration; + params->clock = clock; + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + params->width = width; + params->simulate_stalls = simulate_stalls; + params->mem = mem; + +#if FULL_SYSTEM + params->itb = itb; + params->dtb = dtb; + params->system = system; + params->cpu_id = cpu_id; + params->profile = profile; +#else + params->process = workload; +#endif + + AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params); + return cpu; +} + +REGISTER_SIM_OBJECT("AtomicSimpleCPU", AtomicSimpleCPU) + diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh new file mode 100644 index 000000000..d0ba085f0 --- /dev/null +++ b/src/cpu/simple/atomic.hh @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_SIMPLE_ATOMIC_HH__ +#define __CPU_SIMPLE_ATOMIC_HH__ + +#include "cpu/simple/base.hh" + +class AtomicSimpleCPU : public BaseSimpleCPU +{ + public: + + struct Params : public BaseSimpleCPU::Params { + int width; + bool simulate_stalls; + }; + + AtomicSimpleCPU(Params *params); + virtual ~AtomicSimpleCPU(); + + virtual void init(); + + public: + // + enum Status { + Running, + Idle, + SwitchedOut + }; + + protected: + Status _status; + + Status status() const { return _status; } + + private: + + struct TickEvent : public Event + { + AtomicSimpleCPU *cpu; + + TickEvent(AtomicSimpleCPU *c); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + const int width; + const bool simulate_stalls; + + // main simulation loop (one cycle) + void tick(); + + class CpuPort : public Port + { + + AtomicSimpleCPU *cpu; + + public: + + CpuPort(AtomicSimpleCPU *_cpu) + : cpu(_cpu) + { } + + protected: + + virtual bool recvTiming(Packet *pkt); + + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt); + + virtual void recvStatusChange(Status status); + + virtual Packet *recvRetry(); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + }; + + CpuPort icachePort; + CpuPort dcachePort; + + Request *ifetch_req; + Packet *ifetch_pkt; + Request *data_read_req; + Packet *data_read_pkt; + Request *data_write_req; + Packet *data_write_pkt; + + bool dcache_access; + Tick dcache_complete; + + public: + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + void switchOut(Sampler *s); + void takeOverFrom(BaseCPU *oldCPU); + + virtual void activateContext(int thread_num, int delay); + virtual void suspendContext(int thread_num); + + template <class T> + Fault read(Addr addr, T &data, unsigned flags); + + template <class T> + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); +}; + +#endif // __CPU_SIMPLE_ATOMIC_HH__ diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc new file mode 100644 index 000000000..30c002ed5 --- /dev/null +++ b/src/cpu/simple/base.cc @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/utility.hh" +#include "base/cprintf.hh" +#include "base/inifile.hh" +#include "base/loader/symtab.hh" +#include "base/misc.hh" +#include "base/pollevent.hh" +#include "base/range.hh" +#include "base/stats/events.hh" +#include "base/trace.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" +#include "cpu/exetrace.hh" +#include "cpu/profile.hh" +#include "cpu/sampler/sampler.hh" +#include "cpu/simple/base.hh" +#include "cpu/smt.hh" +#include "cpu/static_inst.hh" +#include "kern/kernel_stats.hh" +#include "mem/packet_impl.hh" +#include "sim/byteswap.hh" +#include "sim/builder.hh" +#include "sim/debug.hh" +#include "sim/host.hh" +#include "sim/sim_events.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#if FULL_SYSTEM +#include "base/remote_gdb.hh" +#include "sim/system.hh" +#include "arch/tlb.hh" +#include "arch/stacktrace.hh" +#include "arch/vtophys.hh" +#else // !FULL_SYSTEM +#include "mem/mem_object.hh" +#endif // FULL_SYSTEM + +using namespace std; +using namespace TheISA; + +BaseSimpleCPU::BaseSimpleCPU(Params *p) + : BaseCPU(p), mem(p->mem), cpuXC(NULL) +{ +#if FULL_SYSTEM + cpuXC = new CPUExecContext(this, 0, p->system, p->itb, p->dtb); +#else + cpuXC = new CPUExecContext(this, /* thread_num */ 0, p->process, + /* asid */ 0, mem); +#endif // !FULL_SYSTEM + + xcProxy = cpuXC->getProxy(); + + numInst = 0; + startNumInst = 0; + numLoad = 0; + startNumLoad = 0; + lastIcacheStall = 0; + lastDcacheStall = 0; + + execContexts.push_back(xcProxy); +} + +BaseSimpleCPU::~BaseSimpleCPU() +{ +} + +void +BaseSimpleCPU::deallocateContext(int thread_num) +{ + // for now, these are equivalent + suspendContext(thread_num); +} + + +void +BaseSimpleCPU::haltContext(int thread_num) +{ + // for now, these are equivalent + suspendContext(thread_num); +} + + +void +BaseSimpleCPU::regStats() +{ + using namespace Stats; + + BaseCPU::regStats(); + + numInsts + .name(name() + ".num_insts") + .desc("Number of instructions executed") + ; + + numMemRefs + .name(name() + ".num_refs") + .desc("Number of memory references") + ; + + notIdleFraction + .name(name() + ".not_idle_fraction") + .desc("Percentage of non-idle cycles") + ; + + idleFraction + .name(name() + ".idle_fraction") + .desc("Percentage of idle cycles") + ; + + icacheStallCycles + .name(name() + ".icache_stall_cycles") + .desc("ICache total stall cycles") + .prereq(icacheStallCycles) + ; + + dcacheStallCycles + .name(name() + ".dcache_stall_cycles") + .desc("DCache total stall cycles") + .prereq(dcacheStallCycles) + ; + + icacheRetryCycles + .name(name() + ".icache_retry_cycles") + .desc("ICache total retry cycles") + .prereq(icacheRetryCycles) + ; + + dcacheRetryCycles + .name(name() + ".dcache_retry_cycles") + .desc("DCache total retry cycles") + .prereq(dcacheRetryCycles) + ; + + idleFraction = constant(1.0) - notIdleFraction; +} + +void +BaseSimpleCPU::resetStats() +{ + startNumInst = numInst; + // notIdleFraction = (_status != Idle); +} + +void +BaseSimpleCPU::serialize(ostream &os) +{ + BaseCPU::serialize(os); + SERIALIZE_SCALAR(inst); + nameOut(os, csprintf("%s.xc", name())); + cpuXC->serialize(os); +} + +void +BaseSimpleCPU::unserialize(Checkpoint *cp, const string §ion) +{ + BaseCPU::unserialize(cp, section); + UNSERIALIZE_SCALAR(inst); + cpuXC->unserialize(cp, csprintf("%s.xc", section)); +} + +void +change_thread_state(int thread_number, int activate, int priority) +{ +} + +Fault +BaseSimpleCPU::copySrcTranslate(Addr src) +{ +#if 0 + static bool no_warn = true; + int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64; + // Only support block sizes of 64 atm. + assert(blk_size == 64); + int offset = src & (blk_size - 1); + + // Make sure block doesn't span page + if (no_warn && + (src & PageMask) != ((src + blk_size) & PageMask) && + (src >> 40) != 0xfffffc) { + warn("Copied block source spans pages %x.", src); + no_warn = false; + } + + memReq->reset(src & ~(blk_size - 1), blk_size); + + // translate to physical address + Fault fault = cpuXC->translateDataReadReq(req); + + if (fault == NoFault) { + cpuXC->copySrcAddr = src; + cpuXC->copySrcPhysAddr = memReq->paddr + offset; + } else { + assert(!fault->isAlignmentFault()); + + cpuXC->copySrcAddr = 0; + cpuXC->copySrcPhysAddr = 0; + } + return fault; +#else + return NoFault; +#endif +} + +Fault +BaseSimpleCPU::copy(Addr dest) +{ +#if 0 + static bool no_warn = true; + int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64; + // Only support block sizes of 64 atm. + assert(blk_size == 64); + uint8_t data[blk_size]; + //assert(cpuXC->copySrcAddr); + int offset = dest & (blk_size - 1); + + // Make sure block doesn't span page + if (no_warn && + (dest & PageMask) != ((dest + blk_size) & PageMask) && + (dest >> 40) != 0xfffffc) { + no_warn = false; + warn("Copied block destination spans pages %x. ", dest); + } + + memReq->reset(dest & ~(blk_size -1), blk_size); + // translate to physical address + Fault fault = cpuXC->translateDataWriteReq(req); + + if (fault == NoFault) { + Addr dest_addr = memReq->paddr + offset; + // Need to read straight from memory since we have more than 8 bytes. + memReq->paddr = cpuXC->copySrcPhysAddr; + cpuXC->mem->read(memReq, data); + memReq->paddr = dest_addr; + cpuXC->mem->write(memReq, data); + if (dcacheInterface) { + memReq->cmd = Copy; + memReq->completionEvent = NULL; + memReq->paddr = cpuXC->copySrcPhysAddr; + memReq->dest = dest_addr; + memReq->size = 64; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + dcacheInterface->access(memReq); + } + } + else + assert(!fault->isAlignmentFault()); + + return fault; +#else + panic("copy not implemented"); + return NoFault; +#endif +} + +#if FULL_SYSTEM +Addr +BaseSimpleCPU::dbg_vtophys(Addr addr) +{ + return vtophys(xcProxy, addr); +} +#endif // FULL_SYSTEM + +#if FULL_SYSTEM +void +BaseSimpleCPU::post_interrupt(int int_num, int index) +{ + BaseCPU::post_interrupt(int_num, index); + + if (cpuXC->status() == ExecContext::Suspended) { + DPRINTF(IPI,"Suspended Processor awoke\n"); + cpuXC->activate(); + } +} +#endif // FULL_SYSTEM + +void +BaseSimpleCPU::checkForInterrupts() +{ +#if FULL_SYSTEM + if (checkInterrupts && check_interrupts() && !cpuXC->inPalMode()) { + int ipl = 0; + int summary = 0; + checkInterrupts = false; + + if (cpuXC->readMiscReg(IPR_SIRR)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (cpuXC->readMiscReg(IPR_SIRR) & (ULL(1) << i)) { + // See table 4-19 of 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + uint64_t interrupts = cpuXC->cpu->intr_status(); + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + + if (cpuXC->readMiscReg(IPR_ASTRR)) + panic("asynchronous traps not implemented\n"); + + if (ipl && ipl > cpuXC->readMiscReg(IPR_IPLR)) { + cpuXC->setMiscReg(IPR_ISR, summary); + cpuXC->setMiscReg(IPR_INTID, ipl); + + Fault(new InterruptFault)->invoke(xcProxy); + + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + cpuXC->readMiscReg(IPR_IPLR), ipl, summary); + } + } +#endif +} + + +Fault +BaseSimpleCPU::setupFetchPacket(Packet *ifetch_pkt) +{ + // Try to fetch an instruction + + // set up memory request for instruction fetch + + DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p NNPC:%08p\n",cpuXC->readPC(), + cpuXC->readNextPC(),cpuXC->readNextNPC()); + + Request *ifetch_req = ifetch_pkt->req; + ifetch_req->setVaddr(cpuXC->readPC() & ~3); + ifetch_req->setTime(curTick); +#if FULL_SYSTEM + ifetch_req->setFlags((cpuXC->readPC() & 1) ? PHYSICAL : 0); +#else + ifetch_req->setFlags(0); +#endif + + Fault fault = cpuXC->translateInstReq(ifetch_req); + + if (fault == NoFault) { + ifetch_pkt->addr = ifetch_req->getPaddr(); + } + + return fault; +} + + +void +BaseSimpleCPU::preExecute() +{ + // maintain $r0 semantics + cpuXC->setIntReg(ZeroReg, 0); +#if THE_ISA == ALPHA_ISA + cpuXC->setFloatReg(ZeroReg, 0.0); +#endif // ALPHA_ISA + + // keep an instruction count + numInst++; + numInsts++; + + cpuXC->func_exe_inst++; + + // check for instruction-count-based events + comInstEventQueue[0]->serviceEvents(numInst); + + // decode the instruction + inst = gtoh(inst); + curStaticInst = StaticInst::decode(makeExtMI(inst, cpuXC->readPC())); + + traceData = Trace::getInstRecord(curTick, xcProxy, this, curStaticInst, + cpuXC->readPC()); + + DPRINTF(Decode,"Decode: Decoded %s instruction (opcode: 0x%x): 0x%x\n", + curStaticInst->getName(), curStaticInst->getOpcode(), + curStaticInst->machInst); + +#if FULL_SYSTEM + cpuXC->setInst(inst); +#endif // FULL_SYSTEM +} + +void +BaseSimpleCPU::postExecute() +{ +#if FULL_SYSTEM + if (system->kernelBinning->fnbin) { + assert(kernelStats); + system->kernelBinning->execute(xcProxy, inst); + } + + if (cpuXC->profile) { + bool usermode = + (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0; + cpuXC->profilePC = usermode ? 1 : cpuXC->readPC(); + ProfileNode *node = cpuXC->profile->consume(xcProxy, inst); + if (node) + cpuXC->profileNode = node; + } +#endif + + if (curStaticInst->isMemRef()) { + numMemRefs++; + } + + if (curStaticInst->isLoad()) { + ++numLoad; + comLoadEventQueue[0]->serviceEvents(numLoad); + } + + traceFunctions(cpuXC->readPC()); +} + + +void +BaseSimpleCPU::advancePC(Fault fault) +{ + if (fault != NoFault) { +#if FULL_SYSTEM + fault->invoke(xcProxy); +#else // !FULL_SYSTEM + fatal("fault (%s) detected @ PC %08p", fault->name(), cpuXC->readPC()); +#endif // FULL_SYSTEM + } + else { + // go to the next instruction + cpuXC->setPC(cpuXC->readNextPC()); +#if THE_ISA == ALPHA_ISA + cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst)); +#else + cpuXC->setNextPC(cpuXC->readNextNPC()); + cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst)); +#endif + + } + +#if FULL_SYSTEM + Addr oldpc; + do { + oldpc = cpuXC->readPC(); + system->pcEventQueue.service(xcProxy); + } while (oldpc != cpuXC->readPC()); +#endif +} + diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh new file mode 100644 index 000000000..4c0e6f3c7 --- /dev/null +++ b/src/cpu/simple/base.hh @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_SIMPLE_BASE_HH__ +#define __CPU_SIMPLE_BASE_HH__ + +#include "base/statistics.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/pc_event.hh" +#include "cpu/sampler/sampler.hh" +#include "cpu/static_inst.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +#include "mem/request.hh" +#include "sim/eventq.hh" + +// forward declarations +#if FULL_SYSTEM +class Processor; +class AlphaITB; +class AlphaDTB; +class MemObject; + +class RemoteGDB; +class GDBListener; + +#else + +class Process; + +#endif // FULL_SYSTEM + +class ExecContext; +class Checkpoint; + +namespace Trace { + class InstRecord; +} + + +class BaseSimpleCPU : public BaseCPU +{ + protected: + typedef TheISA::MachInst MachInst; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + + MemObject *mem; + + protected: + Trace::InstRecord *traceData; + + public: + void post_interrupt(int int_num, int index); + + void zero_fill_64(Addr addr) { + static int warned = 0; + if (!warned) { + warn ("WH64 is not implemented"); + warned = 1; + } + }; + + public: + struct Params : public BaseCPU::Params + { + MemObject *mem; +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#else + Process *process; +#endif + }; + BaseSimpleCPU(Params *params); + virtual ~BaseSimpleCPU(); + + public: + // execution context + CPUExecContext *cpuXC; + + ExecContext *xcProxy; + +#if FULL_SYSTEM + Addr dbg_vtophys(Addr addr); + + bool interval_stats; +#endif + + // current instruction + MachInst inst; + + // Static data storage + TheISA::IntReg dataReg; + + // Pointer to the sampler that is telling us to switchover. + // Used to signal the completion of the pipe drain and schedule + // the next switchover + Sampler *sampler; + + StaticInstPtr curStaticInst; + + void checkForInterrupts(); + Fault setupFetchPacket(Packet *ifetch_pkt); + void preExecute(); + void postExecute(); + void advancePC(Fault fault); + + virtual void deallocateContext(int thread_num); + virtual void haltContext(int thread_num); + + // statistics + virtual void regStats(); + virtual void resetStats(); + + // number of simulated instructions + Counter numInst; + Counter startNumInst; + Stats::Scalar<> numInsts; + + virtual Counter totalInstructions() const + { + return numInst - startNumInst; + } + + // number of simulated memory references + Stats::Scalar<> numMemRefs; + + // number of simulated loads + Counter numLoad; + Counter startNumLoad; + + // number of idle cycles + Stats::Average<> notIdleFraction; + Stats::Formula idleFraction; + + // number of cycles stalled for I-cache responses + Stats::Scalar<> icacheStallCycles; + Counter lastIcacheStall; + + // number of cycles stalled for I-cache retries + Stats::Scalar<> icacheRetryCycles; + Counter lastIcacheRetry; + + // number of cycles stalled for D-cache responses + Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + // number of cycles stalled for D-cache retries + Stats::Scalar<> dcacheRetryCycles; + Counter lastDcacheRetry; + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + // These functions are only used in CPU models that split + // effective address computation from the actual memory access. + void setEA(Addr EA) { panic("BaseSimpleCPU::setEA() not implemented\n"); } + Addr getEA() { panic("BaseSimpleCPU::getEA() not implemented\n"); } + + void prefetch(Addr addr, unsigned flags) + { + // need to do this... + } + + void writeHint(Addr addr, int size, unsigned flags) + { + // need to do this... + } + + Fault copySrcTranslate(Addr src); + + Fault copy(Addr dest); + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return cpuXC->readIntReg(si->srcRegIdx(idx)); + } + + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatReg(reg_idx, width); + } + + FloatReg readFloatReg(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatReg(reg_idx); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatRegBits(reg_idx, width); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatRegBits(reg_idx); + } + + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + cpuXC->setIntReg(si->destRegIdx(idx), val); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatReg(reg_idx, val, width); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatReg(reg_idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatRegBits(reg_idx, val, width); + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatRegBits(reg_idx, val); + } + + uint64_t readPC() { return cpuXC->readPC(); } + uint64_t readNextPC() { return cpuXC->readNextPC(); } + uint64_t readNextNPC() { return cpuXC->readNextNPC(); } + + void setPC(uint64_t val) { cpuXC->setPC(val); } + void setNextPC(uint64_t val) { cpuXC->setNextPC(val); } + void setNextNPC(uint64_t val) { cpuXC->setNextNPC(val); } + + MiscReg readMiscReg(int misc_reg) + { + return cpuXC->readMiscReg(misc_reg); + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return cpuXC->readMiscRegWithEffect(misc_reg, fault); + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + return cpuXC->setMiscReg(misc_reg, val); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return cpuXC->setMiscRegWithEffect(misc_reg, val); + } + +#if FULL_SYSTEM + Fault hwrei() { return cpuXC->hwrei(); } + int readIntrFlag() { return cpuXC->readIntrFlag(); } + void setIntrFlag(int val) { cpuXC->setIntrFlag(val); } + bool inPalMode() { return cpuXC->inPalMode(); } + void ev5_trap(Fault fault) { fault->invoke(xcProxy); } + bool simPalCheck(int palFunc) { return cpuXC->simPalCheck(palFunc); } +#else + void syscall(int64_t callnum) { cpuXC->syscall(callnum); } +#endif + + bool misspeculating() { return cpuXC->misspeculating(); } + ExecContext *xcBase() { return xcProxy; } +}; + +#endif // __CPU_SIMPLE_BASE_HH__ diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc new file mode 100644 index 000000000..70b88c4b1 --- /dev/null +++ b/src/cpu/simple/timing.cc @@ -0,0 +1,570 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/utility.hh" +#include "cpu/exetrace.hh" +#include "cpu/simple/timing.hh" +#include "mem/packet_impl.hh" +#include "sim/builder.hh" + +using namespace std; +using namespace TheISA; + + +void +TimingSimpleCPU::init() +{ + //Create Memory Ports (conect them up) + Port *mem_dport = mem->getPort(""); + dcachePort.setPeer(mem_dport); + mem_dport->setPeer(&dcachePort); + + Port *mem_iport = mem->getPort(""); + icachePort.setPeer(mem_iport); + mem_iport->setPeer(&icachePort); + + BaseCPU::init(); +#if FULL_SYSTEM + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + + // initialize CPU, including PC + TheISA::initCPU(xc, xc->readCpuId()); + } +#endif +} + +Tick +TimingSimpleCPU::CpuPort::recvAtomic(Packet *pkt) +{ + panic("TimingSimpleCPU doesn't expect recvAtomic callback!"); + return curTick; +} + +void +TimingSimpleCPU::CpuPort::recvFunctional(Packet *pkt) +{ + panic("TimingSimpleCPU doesn't expect recvFunctional callback!"); +} + +void +TimingSimpleCPU::CpuPort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("TimingSimpleCPU doesn't expect recvStatusChange callback!"); +} + +TimingSimpleCPU::TimingSimpleCPU(Params *p) + : BaseSimpleCPU(p), icachePort(this), dcachePort(this) +{ + _status = Idle; + ifetch_pkt = dcache_pkt = NULL; +} + + +TimingSimpleCPU::~TimingSimpleCPU() +{ +} + +void +TimingSimpleCPU::serialize(ostream &os) +{ + BaseSimpleCPU::serialize(os); + SERIALIZE_ENUM(_status); +} + +void +TimingSimpleCPU::unserialize(Checkpoint *cp, const string §ion) +{ + BaseSimpleCPU::unserialize(cp, section); + UNSERIALIZE_ENUM(_status); +} + +void +TimingSimpleCPU::switchOut(Sampler *s) +{ + sampler = s; + if (status() == Running) { + _status = SwitchedOut; + } + sampler->signalSwitched(); +} + + +void +TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU) +{ + BaseCPU::takeOverFrom(oldCPU); + + // if any of this CPU's ExecContexts are active, mark the CPU as + // running and schedule its tick event. + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + if (xc->status() == ExecContext::Active && _status != Running) { + _status = Running; + break; + } + } +} + + +void +TimingSimpleCPU::activateContext(int thread_num, int delay) +{ + assert(thread_num == 0); + assert(cpuXC); + + assert(_status == Idle); + + notIdleFraction++; + _status = Running; + // kick things off by initiating the fetch of the next instruction + Event *e = + new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, true); + e->schedule(curTick + cycles(delay)); +} + + +void +TimingSimpleCPU::suspendContext(int thread_num) +{ + assert(thread_num == 0); + assert(cpuXC); + + panic("TimingSimpleCPU::suspendContext not implemented"); + + assert(_status == Running); + + notIdleFraction--; + _status = Idle; +} + + +template <class T> +Fault +TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) +{ + Request *data_read_req = new Request(true); + + data_read_req->setVaddr(addr); + data_read_req->setSize(sizeof(T)); + data_read_req->setFlags(flags); + data_read_req->setTime(curTick); + + if (traceData) { + traceData->setAddr(data_read_req->getVaddr()); + } + + // translate to physical address + Fault fault = cpuXC->translateDataReadReq(data_read_req); + + // Now do the access. + if (fault == NoFault) { + Packet *data_read_pkt = new Packet; + data_read_pkt->cmd = Read; + data_read_pkt->req = data_read_req; + data_read_pkt->dataDynamic<T>(new T); + data_read_pkt->addr = data_read_req->getPaddr(); + data_read_pkt->size = sizeof(T); + data_read_pkt->dest = Packet::Broadcast; + + if (!dcachePort.sendTiming(data_read_pkt)) { + _status = DcacheRetry; + dcache_pkt = data_read_pkt; + } else { + _status = DcacheWaitResponse; + dcache_pkt = NULL; + } + } + + // This will need a new way to tell if it has a dcache attached. + if (data_read_req->getFlags() & UNCACHEABLE) + recordEvent("Uncached Read"); + + return fault; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template +Fault +TimingSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); + +template +Fault +TimingSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); + +template +Fault +TimingSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); + +template +Fault +TimingSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +TimingSimpleCPU::read(Addr addr, double &data, unsigned flags) +{ + return read(addr, *(uint64_t*)&data, flags); +} + +template<> +Fault +TimingSimpleCPU::read(Addr addr, float &data, unsigned flags) +{ + return read(addr, *(uint32_t*)&data, flags); +} + + +template<> +Fault +TimingSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) +{ + return read(addr, (uint32_t&)data, flags); +} + + +template <class T> +Fault +TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + Request *data_write_req = new Request(true); + data_write_req->setVaddr(addr); + data_write_req->setTime(curTick); + data_write_req->setSize(sizeof(T)); + data_write_req->setFlags(flags); + + // translate to physical address + Fault fault = cpuXC->translateDataWriteReq(data_write_req); + // Now do the access. + if (fault == NoFault) { + Packet *data_write_pkt = new Packet; + data_write_pkt->cmd = Write; + data_write_pkt->req = data_write_req; + data_write_pkt->allocate(); + data_write_pkt->size = sizeof(T); + data_write_pkt->set(data); + data_write_pkt->addr = data_write_req->getPaddr(); + data_write_pkt->dest = Packet::Broadcast; + + if (!dcachePort.sendTiming(data_write_pkt)) { + _status = DcacheRetry; + dcache_pkt = data_write_pkt; + } else { + _status = DcacheWaitResponse; + dcache_pkt = NULL; + } + } + + // This will need a new way to tell if it's hooked up to a cache or not. + if (data_write_req->getFlags() & UNCACHEABLE) + recordEvent("Uncached Write"); + + // If the write needs to have a fault on the access, consider calling + // changeStatus() and changing it to "bad addr write" or something. + return fault; +} + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +template +Fault +TimingSimpleCPU::write(uint64_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +TimingSimpleCPU::write(uint32_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +TimingSimpleCPU::write(uint16_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +TimingSimpleCPU::write(uint8_t data, Addr addr, + unsigned flags, uint64_t *res); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +TimingSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint64_t*)&data, addr, flags, res); +} + +template<> +Fault +TimingSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint32_t*)&data, addr, flags, res); +} + + +template<> +Fault +TimingSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) +{ + return write((uint32_t)data, addr, flags, res); +} + + +void +TimingSimpleCPU::fetch() +{ + checkForInterrupts(); + + Request *ifetch_req = new Request(true); + ifetch_req->setSize(sizeof(MachInst)); + + ifetch_pkt = new Packet; + ifetch_pkt->cmd = Read; + ifetch_pkt->dataStatic(&inst); + ifetch_pkt->req = ifetch_req; + ifetch_pkt->size = sizeof(MachInst); + ifetch_pkt->dest = Packet::Broadcast; + + Fault fault = setupFetchPacket(ifetch_pkt); + if (fault == NoFault) { + if (!icachePort.sendTiming(ifetch_pkt)) { + // Need to wait for retry + _status = IcacheRetry; + } else { + // Need to wait for cache to respond + _status = IcacheWaitResponse; + // ownership of packet transferred to memory system + ifetch_pkt = NULL; + } + } else { + panic("TimingSimpleCPU fetch fault handling not implemented"); + } +} + + +void +TimingSimpleCPU::completeInst(Fault fault) +{ + postExecute(); + + if (traceData) { + traceData->finalize(); + } + + advancePC(fault); + + if (_status == Running) { + // kick off fetch of next instruction... callback from icache + // response will cause that instruction to be executed, + // keeping the CPU running. + fetch(); + } +} + + +void +TimingSimpleCPU::completeIfetch() +{ + // received a response from the icache: execute the received + // instruction + assert(_status == IcacheWaitResponse); + _status = Running; + preExecute(); + if (curStaticInst->isMemRef()) { + // load or store: just send to dcache + Fault fault = curStaticInst->initiateAcc(this, traceData); + assert(fault == NoFault); + assert(_status == DcacheWaitResponse); + // instruction will complete in dcache response callback + } else { + // non-memory instruction: execute completely now + Fault fault = curStaticInst->execute(this, traceData); + completeInst(fault); + } +} + + +bool +TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt) +{ + cpu->completeIfetch(); + return true; +} + +Packet * +TimingSimpleCPU::IcachePort::recvRetry() +{ + // we shouldn't get a retry unless we have a packet that we're + // waiting to transmit + assert(cpu->ifetch_pkt != NULL); + assert(cpu->_status == IcacheRetry); + cpu->_status = IcacheWaitResponse; + Packet *tmp = cpu->ifetch_pkt; + cpu->ifetch_pkt = NULL; + return tmp; +} + +void +TimingSimpleCPU::completeDataAccess(Packet *pkt) +{ + // received a response from the dcache: complete the load or store + // instruction + assert(pkt->result == Success); + assert(_status == DcacheWaitResponse); + _status = Running; + + Fault fault = curStaticInst->completeAcc(pkt, this, traceData); + + completeInst(fault); +} + + + +bool +TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt) +{ + cpu->completeDataAccess(pkt); + return true; +} + +Packet * +TimingSimpleCPU::DcachePort::recvRetry() +{ + // we shouldn't get a retry unless we have a packet that we're + // waiting to transmit + assert(cpu->dcache_pkt != NULL); + assert(cpu->_status == DcacheRetry); + cpu->_status = DcacheWaitResponse; + Packet *tmp = cpu->dcache_pkt; + cpu->dcache_pkt = NULL; + return tmp; +} + + +//////////////////////////////////////////////////////////////////////// +// +// TimingSimpleCPU Simulation Object +// +BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) + + Param<Counter> max_insts_any_thread; + Param<Counter> max_insts_all_threads; + Param<Counter> max_loads_any_thread; + Param<Counter> max_loads_all_threads; + SimObjectParam<MemObject *> mem; + +#if FULL_SYSTEM + SimObjectParam<AlphaITB *> itb; + SimObjectParam<AlphaDTB *> dtb; + SimObjectParam<System *> system; + Param<int> cpu_id; + Param<Tick> profile; +#else + SimObjectParam<Process *> workload; +#endif // FULL_SYSTEM + + Param<int> clock; + + Param<bool> defer_registration; + Param<int> width; + Param<bool> function_trace; + Param<Tick> function_trace_start; + Param<bool> simulate_stalls; + +END_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) + + INIT_PARAM(max_insts_any_thread, + "terminate when any thread reaches this inst count"), + INIT_PARAM(max_insts_all_threads, + "terminate when all threads have reached this inst count"), + INIT_PARAM(max_loads_any_thread, + "terminate when any thread reaches this load count"), + INIT_PARAM(max_loads_all_threads, + "terminate when all threads have reached this load count"), + INIT_PARAM(mem, "memory"), + +#if FULL_SYSTEM + INIT_PARAM(itb, "Instruction TLB"), + INIT_PARAM(dtb, "Data TLB"), + INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(profile, ""), +#else + INIT_PARAM(workload, "processes to run"), +#endif // FULL_SYSTEM + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + INIT_PARAM(width, "cpu width"), + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace"), + INIT_PARAM(simulate_stalls, "Simulate cache stall cycles") + +END_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) + + +CREATE_SIM_OBJECT(TimingSimpleCPU) +{ + TimingSimpleCPU::Params *params = new TimingSimpleCPU::Params(); + params->name = getInstanceName(); + params->numberOfThreads = 1; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + params->deferRegistration = defer_registration; + params->clock = clock; + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + params->mem = mem; + +#if FULL_SYSTEM + params->itb = itb; + params->dtb = dtb; + params->system = system; + params->cpu_id = cpu_id; + params->profile = profile; +#else + params->process = workload; +#endif + + TimingSimpleCPU *cpu = new TimingSimpleCPU(params); + return cpu; +} + +REGISTER_SIM_OBJECT("TimingSimpleCPU", TimingSimpleCPU) + diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh new file mode 100644 index 000000000..83be025d9 --- /dev/null +++ b/src/cpu/simple/timing.hh @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_SIMPLE_TIMING_HH__ +#define __CPU_SIMPLE_TIMING_HH__ + +#include "cpu/simple/base.hh" + +class TimingSimpleCPU : public BaseSimpleCPU +{ + public: + + struct Params : public BaseSimpleCPU::Params { + }; + + TimingSimpleCPU(Params *params); + virtual ~TimingSimpleCPU(); + + virtual void init(); + + public: + // + enum Status { + Idle, + Running, + IcacheRetry, + IcacheWaitResponse, + IcacheWaitSwitch, + DcacheRetry, + DcacheWaitResponse, + DcacheWaitSwitch, + SwitchedOut + }; + + protected: + Status _status; + + Status status() const { return _status; } + + private: + + class CpuPort : public Port + { + protected: + TimingSimpleCPU *cpu; + + public: + + CpuPort(TimingSimpleCPU *_cpu) + : cpu(_cpu) + { } + + protected: + + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + }; + + class IcachePort : public CpuPort + { + public: + + IcachePort(TimingSimpleCPU *_cpu) + : CpuPort(_cpu) + { } + + protected: + + virtual bool recvTiming(Packet *pkt); + + virtual Packet *recvRetry(); + }; + + class DcachePort : public CpuPort + { + public: + + DcachePort(TimingSimpleCPU *_cpu) + : CpuPort(_cpu) + { } + + protected: + + virtual bool recvTiming(Packet *pkt); + + virtual Packet *recvRetry(); + }; + + IcachePort icachePort; + DcachePort dcachePort; + + Packet *ifetch_pkt; + Packet *dcache_pkt; + + public: + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + void switchOut(Sampler *s); + void takeOverFrom(BaseCPU *oldCPU); + + virtual void activateContext(int thread_num, int delay); + virtual void suspendContext(int thread_num); + + template <class T> + Fault read(Addr addr, T &data, unsigned flags); + + template <class T> + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + + void fetch(); + void completeInst(Fault fault); + void completeIfetch(); + void completeDataAccess(Packet *); +}; + +#endif // __CPU_SIMPLE_TIMING_HH__ diff --git a/src/cpu/smt.hh b/src/cpu/smt.hh new file mode 100644 index 000000000..9c52abf95 --- /dev/null +++ b/src/cpu/smt.hh @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Defines SMT_MAX_THREADS. + */ + +#ifndef __SMT_HH__ +#define __SMT_HH__ + +#ifndef SMT_MAX_THREADS +/** The number of TPUs in any processor. */ +#define SMT_MAX_THREADS 4 +#endif + +/** + * The maximum number of active threads across all cpus. Used to + * initialize per-thread statistics in the cache. + * + * NB: Be careful to only use it once all the CPUs that you care about + * have been initialized + */ +extern int maxThreadsPerCPU; + +/** + * Changes the status and priority of the thread with the given number. + * @param thread_number The thread to change. + * @param activate The new active status. + * @param priority The new priority. + */ +void change_thread_state(int thread_number, int activate, int priority); + +#endif // __SMT_HH__ diff --git a/src/cpu/static_inst.cc b/src/cpu/static_inst.cc new file mode 100644 index 000000000..c307dc6fc --- /dev/null +++ b/src/cpu/static_inst.cc @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <iostream> +#include "cpu/static_inst.hh" +#include "sim/root.hh" + +StaticInstPtr StaticInst::nullStaticInstPtr; + +// Define the decode cache hash map. +StaticInst::DecodeCache StaticInst::decodeCache; + +void +StaticInst::dumpDecodeCacheStats() +{ + using namespace std; + + cerr << "Decode hash table stats @ " << curTick << ":" << endl; + cerr << "\tnum entries = " << decodeCache.size() << endl; + cerr << "\tnum buckets = " << decodeCache.bucket_count() << endl; + vector<int> hist(100, 0); + int max_hist = 0; + for (int i = 0; i < decodeCache.bucket_count(); ++i) { + int count = decodeCache.elems_in_bucket(i); + if (count > max_hist) + max_hist = count; + hist[count]++; + } + for (int i = 0; i <= max_hist; ++i) { + cerr << "\tbuckets of size " << i << " = " << hist[i] << endl; + } +} + +bool +StaticInst::hasBranchTarget(Addr pc, ExecContext *xc, Addr &tgt) const +{ + if (isDirectCtrl()) { + tgt = branchTarget(pc); + return true; + } + + if (isIndirectCtrl()) { + tgt = branchTarget(xc); + return true; + } + + return false; +} + diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh new file mode 100644 index 000000000..33c9144fb --- /dev/null +++ b/src/cpu/static_inst.hh @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_STATIC_INST_HH__ +#define __CPU_STATIC_INST_HH__ + +#include <bitset> +#include <string> + +#include "base/hashmap.hh" +#include "base/misc.hh" +#include "base/refcnt.hh" +#include "cpu/op_class.hh" +#include "sim/host.hh" +#include "arch/isa_traits.hh" + +// forward declarations +struct AlphaSimpleImpl; +class ExecContext; +class DynInst; +class Packet; + +template <class Impl> +class AlphaDynInst; + +class FastCPU; +class AtomicSimpleCPU; +class TimingSimpleCPU; +class InorderCPU; +class SymbolTable; + +namespace Trace { + class InstRecord; +} + +/** + * Base, ISA-independent static instruction class. + * + * The main component of this class is the vector of flags and the + * associated methods for reading them. Any object that can rely + * solely on these flags can process instructions without being + * recompiled for multiple ISAs. + */ +class StaticInstBase : public RefCounted +{ + protected: + + /// Set of boolean static instruction properties. + /// + /// Notes: + /// - The IsInteger and IsFloating flags are based on the class of + /// registers accessed by the instruction. Although most + /// instructions will have exactly one of these two flags set, it + /// is possible for an instruction to have neither (e.g., direct + /// unconditional branches, memory barriers) or both (e.g., an + /// FP/int conversion). + /// - If IsMemRef is set, then exactly one of IsLoad or IsStore + /// will be set. + /// - If IsControl is set, then exactly one of IsDirectControl or + /// IsIndirect Control will be set, and exactly one of + /// IsCondControl or IsUncondControl will be set. + /// - IsSerializing, IsMemBarrier, and IsWriteBarrier are + /// implemented as flags since in the current model there's no + /// other way for instructions to inject behavior into the + /// pipeline outside of fetch. Once we go to an exec-in-exec CPU + /// model we should be able to get rid of these flags and + /// implement this behavior via the execute() methods. + /// + enum Flags { + IsNop, ///< Is a no-op (no effect at all). + + IsInteger, ///< References integer regs. + IsFloating, ///< References FP regs. + + IsMemRef, ///< References memory (load, store, or prefetch). + IsLoad, ///< Reads from memory (load or prefetch). + IsStore, ///< Writes to memory. + IsInstPrefetch, ///< Instruction-cache prefetch. + IsDataPrefetch, ///< Data-cache prefetch. + IsCopy, ///< Fast Cache block copy + + IsControl, ///< Control transfer instruction. + IsDirectControl, ///< PC relative control transfer. + IsIndirectControl, ///< Register indirect control transfer. + IsCondControl, ///< Conditional control transfer. + IsUncondControl, ///< Unconditional control transfer. + IsCall, ///< Subroutine call. + IsReturn, ///< Subroutine return. + + IsCondDelaySlot,///< Conditional Delay-Slot Instruction + + IsThreadSync, ///< Thread synchronization operation. + + IsSerializing, ///< Serializes pipeline: won't execute until all + /// older instructions have committed. + IsSerializeBefore, + IsSerializeAfter, + IsMemBarrier, ///< Is a memory barrier + IsWriteBarrier, ///< Is a write barrier + + IsNonSpeculative, ///< Should not be executed speculatively + + NumFlags + }; + + /// Flag values for this instruction. + std::bitset<NumFlags> flags; + + /// See opClass(). + OpClass _opClass; + + /// See numSrcRegs(). + int8_t _numSrcRegs; + + /// See numDestRegs(). + int8_t _numDestRegs; + + /// The following are used to track physical register usage + /// for machines with separate int & FP reg files. + //@{ + int8_t _numFPDestRegs; + int8_t _numIntDestRegs; + //@} + + /// Constructor. + /// It's important to initialize everything here to a sane + /// default, since the decoder generally only overrides + /// the fields that are meaningful for the particular + /// instruction. + StaticInstBase(OpClass __opClass) + : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0), + _numFPDestRegs(0), _numIntDestRegs(0) + { + } + + public: + + /// @name Register information. + /// The sum of numFPDestRegs() and numIntDestRegs() equals + /// numDestRegs(). The former two functions are used to track + /// physical register usage for machines with separate int & FP + /// reg files. + //@{ + /// Number of source registers. + int8_t numSrcRegs() const { return _numSrcRegs; } + /// Number of destination registers. + int8_t numDestRegs() const { return _numDestRegs; } + /// Number of floating-point destination regs. + int8_t numFPDestRegs() const { return _numFPDestRegs; } + /// Number of integer destination regs. + int8_t numIntDestRegs() const { return _numIntDestRegs; } + //@} + + /// @name Flag accessors. + /// These functions are used to access the values of the various + /// instruction property flags. See StaticInstBase::Flags for descriptions + /// of the individual flags. + //@{ + + bool isNop() const { return flags[IsNop]; } + + bool isMemRef() const { return flags[IsMemRef]; } + bool isLoad() const { return flags[IsLoad]; } + bool isStore() const { return flags[IsStore]; } + bool isInstPrefetch() const { return flags[IsInstPrefetch]; } + bool isDataPrefetch() const { return flags[IsDataPrefetch]; } + bool isCopy() const { return flags[IsCopy];} + + bool isInteger() const { return flags[IsInteger]; } + bool isFloating() const { return flags[IsFloating]; } + + bool isControl() const { return flags[IsControl]; } + bool isCall() const { return flags[IsCall]; } + bool isReturn() const { return flags[IsReturn]; } + bool isDirectCtrl() const { return flags[IsDirectControl]; } + bool isIndirectCtrl() const { return flags[IsIndirectControl]; } + bool isCondCtrl() const { return flags[IsCondControl]; } + bool isUncondCtrl() const { return flags[IsUncondControl]; } + + bool isThreadSync() const { return flags[IsThreadSync]; } + bool isSerializing() const { return flags[IsSerializing] || + flags[IsSerializeBefore] || + flags[IsSerializeAfter]; } + bool isSerializeBefore() const { return flags[IsSerializeBefore]; } + bool isSerializeAfter() const { return flags[IsSerializeAfter]; } + bool isMemBarrier() const { return flags[IsMemBarrier]; } + bool isWriteBarrier() const { return flags[IsWriteBarrier]; } + bool isNonSpeculative() const { return flags[IsNonSpeculative]; } + //@} + + /// Operation class. Used to select appropriate function unit in issue. + OpClass opClass() const { return _opClass; } +}; + + +// forward declaration +class StaticInstPtr; + +/** + * Generic yet ISA-dependent static instruction class. + * + * This class builds on StaticInstBase, defining fields and interfaces + * that are generic across all ISAs but that differ in details + * according to the specific ISA being used. + */ +class StaticInst : public StaticInstBase +{ + public: + + /// Binary machine instruction type. + typedef TheISA::MachInst MachInst; + /// Binary extended machine instruction type. + typedef TheISA::ExtMachInst ExtMachInst; + /// Logical register index type. + typedef TheISA::RegIndex RegIndex; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + + /// Return logical index (architectural reg num) of i'th destination reg. + /// Only the entries from 0 through numDestRegs()-1 are valid. + RegIndex destRegIdx(int i) const { return _destRegIdx[i]; } + + /// Return logical index (architectural reg num) of i'th source reg. + /// Only the entries from 0 through numSrcRegs()-1 are valid. + RegIndex srcRegIdx(int i) const { return _srcRegIdx[i]; } + + /// Pointer to a statically allocated "null" instruction object. + /// Used to give eaCompInst() and memAccInst() something to return + /// when called on non-memory instructions. + static StaticInstPtr nullStaticInstPtr; + + /** + * Memory references only: returns "fake" instruction representing + * the effective address part of the memory operation. Used to + * obtain the dependence info (numSrcRegs and srcRegIdx[]) for + * just the EA computation. + */ + virtual const + StaticInstPtr &eaCompInst() const { return nullStaticInstPtr; } + + /** + * Memory references only: returns "fake" instruction representing + * the memory access part of the memory operation. Used to + * obtain the dependence info (numSrcRegs and srcRegIdx[]) for + * just the memory access (not the EA computation). + */ + virtual const + StaticInstPtr &memAccInst() const { return nullStaticInstPtr; } + + /// The binary machine instruction. + const ExtMachInst machInst; + + protected: + + /// See destRegIdx(). + RegIndex _destRegIdx[MaxInstDestRegs]; + /// See srcRegIdx(). + RegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** + * Base mnemonic (e.g., "add"). Used by generateDisassembly() + * methods. Also useful to readily identify instructions from + * within the debugger when #cachedDisassembly has not been + * initialized. + */ + const char *mnemonic; + + /** + * String representation of disassembly (lazily evaluated via + * disassemble()). + */ + mutable std::string *cachedDisassembly; + + /** + * Internal function to generate disassembly string. + */ + virtual std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const = 0; + + /// Constructor. + StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) + : StaticInstBase(__opClass), + machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) + { + } + + public: + + virtual ~StaticInst() + { + if (cachedDisassembly) + delete cachedDisassembly; + } + +/** + * The execute() signatures are auto-generated by scons based on the + * set of CPU models we are compiling in today. + */ +#include "cpu/static_inst_exec_sigs.hh" + + /** + * Return the target address for a PC-relative branch. + * Invalid if not a PC-relative branch (i.e. isDirectCtrl() + * should be true). + */ + virtual Addr branchTarget(Addr branchPC) const + { + panic("StaticInst::branchTarget() called on instruction " + "that is not a PC-relative branch."); + } + + /** + * Return the target address for an indirect branch (jump). The + * register value is read from the supplied execution context, so + * the result is valid only if the execution context is about to + * execute the branch in question. Invalid if not an indirect + * branch (i.e. isIndirectCtrl() should be true). + */ + virtual Addr branchTarget(ExecContext *xc) const + { + panic("StaticInst::branchTarget() called on instruction " + "that is not an indirect branch."); + } + + /** + * Return true if the instruction is a control transfer, and if so, + * return the target address as well. + */ + bool hasBranchTarget(Addr pc, ExecContext *xc, Addr &tgt) const; + + /** + * Return string representation of disassembled instruction. + * The default version of this function will call the internal + * virtual generateDisassembly() function to get the string, + * then cache it in #cachedDisassembly. If the disassembly + * should not be cached, this function should be overridden directly. + */ + virtual const std::string &disassemble(Addr pc, + const SymbolTable *symtab = 0) const + { + if (!cachedDisassembly) + cachedDisassembly = + new std::string(generateDisassembly(pc, symtab)); + + return *cachedDisassembly; + } + + /// Decoded instruction cache type. + /// For now we're using a generic hash_map; this seems to work + /// pretty well. + typedef m5::hash_map<ExtMachInst, StaticInstPtr> DecodeCache; + + /// A cache of decoded instruction objects. + static DecodeCache decodeCache; + + /** + * Dump some basic stats on the decode cache hash map. + * Only gets called if DECODE_CACHE_HASH_STATS is defined. + */ + static void dumpDecodeCacheStats(); + + /// Decode a machine instruction. + /// @param mach_inst The binary instruction to decode. + /// @retval A pointer to the corresponding StaticInst object. + //This is defined as inline below. + static StaticInstPtr decode(ExtMachInst mach_inst); + + //MIPS Decoder Debug Functions + int getOpcode() { return (machInst & 0xFC000000) >> 26 ; }//31..26 + int getRs() { return (machInst & 0x03E00000) >> 21; } //25...21 + int getRt() { return (machInst & 0x001F0000) >> 16; } //20...16 + int getRd() { return (machInst & 0x0000F800) >> 11; } //15...11 + int getImm() { return (machInst & 0x0000FFFF); } //15...0 + int getFunction(){ return (machInst & 0x0000003F); }//5...0 + int getBranch(){ return (machInst & 0x0000FFFF); }//15...0 + int getJump(){ return (machInst & 0x03FFFFFF); }//5...0 + int getHint(){ return (machInst & 0x000007C0) >> 6; } //10...6 + std::string getName() { return mnemonic; } +}; + +typedef RefCountingPtr<StaticInstBase> StaticInstBasePtr; + +/// Reference-counted pointer to a StaticInst object. +/// This type should be used instead of "StaticInst *" so that +/// StaticInst objects can be properly reference-counted. +class StaticInstPtr : public RefCountingPtr<StaticInst> +{ + public: + /// Constructor. + StaticInstPtr() + : RefCountingPtr<StaticInst>() + { + } + + /// Conversion from "StaticInst *". + StaticInstPtr(StaticInst *p) + : RefCountingPtr<StaticInst>(p) + { + } + + /// Copy constructor. + StaticInstPtr(const StaticInstPtr &r) + : RefCountingPtr<StaticInst>(r) + { + } + + /// Construct directly from machine instruction. + /// Calls StaticInst::decode(). + StaticInstPtr(TheISA::ExtMachInst mach_inst) + : RefCountingPtr<StaticInst>(StaticInst::decode(mach_inst)) + { + } + + /// Convert to pointer to StaticInstBase class. + operator const StaticInstBasePtr() + { + return this->get(); + } +}; + +inline StaticInstPtr +StaticInst::decode(StaticInst::ExtMachInst mach_inst) +{ +#ifdef DECODE_CACHE_HASH_STATS + // Simple stats on decode hash_map. Turns out the default + // hash function is as good as anything I could come up with. + const int dump_every_n = 10000000; + static int decodes_til_dump = dump_every_n; + + if (--decodes_til_dump == 0) { + dumpDecodeCacheStats(); + decodes_til_dump = dump_every_n; + } +#endif + + DecodeCache::iterator iter = decodeCache.find(mach_inst); + if (iter != decodeCache.end()) { + return iter->second; + } + + StaticInstPtr si = TheISA::decodeInst(mach_inst); + decodeCache[mach_inst] = si; + return si; +} + +#endif // __CPU_STATIC_INST_HH__ diff --git a/src/cpu/trace/opt_cpu.cc b/src/cpu/trace/opt_cpu.cc new file mode 100644 index 000000000..6cd23b0dd --- /dev/null +++ b/src/cpu/trace/opt_cpu.cc @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definition of a memory trace CPU object for optimal caches. Uses a memory + * trace to access a fully associative cache with optimal replacement. + */ + +#include <algorithm> // For heap functions. + +#include "cpu/trace/opt_cpu.hh" +#include "cpu/trace/reader/mem_trace_reader.hh" + +#include "sim/builder.hh" +#include "sim/sim_events.hh" + +using namespace std; + +OptCPU::OptCPU(const string &name, + MemTraceReader *_trace, + int block_size, + int cache_size, + int _assoc) + : SimObject(name), tickEvent(this), trace(_trace), + numBlks(cache_size/block_size), assoc(_assoc), numSets(numBlks/assoc), + setMask(numSets - 1) +{ + int log_block_size = 0; + int tmp_block_size = block_size; + while (tmp_block_size > 1) { + ++log_block_size; + tmp_block_size = tmp_block_size >> 1; + } + assert(1<<log_block_size == block_size); + MemReqPtr req; + trace->getNextReq(req); + refInfo.resize(numSets); + while (req) { + RefInfo temp; + temp.addr = req->paddr >> log_block_size; + int set = temp.addr & setMask; + refInfo[set].push_back(temp); + trace->getNextReq(req); + } + + // Initialize top level of lookup table. + lookupTable.resize(16); + + // Annotate references with next ref time. + for (int k = 0; k < numSets; ++k) { + for (RefIndex i = refInfo[k].size() - 1; i >= 0; --i) { + Addr addr = refInfo[k][i].addr; + initTable(addr, InfiniteRef); + refInfo[k][i].nextRefTime = lookupValue(addr); + setValue(addr, i); + } + } + + // Reset the lookup table + for (int j = 0; j < 16; ++j) { + if (lookupTable[j].size() == (1<<16)) { + for (int k = 0; k < (1<<16); ++k) { + if (lookupTable[j][k].size() == (1<<16)) { + for (int l = 0; l < (1<<16); ++l) { + lookupTable[j][k][l] = -1; + } + } + } + } + } + + tickEvent.schedule(0); + + hits = 0; + misses = 0; +} + +void +OptCPU::processSet(int set) +{ + // Initialize cache + int blks_in_cache = 0; + RefIndex i = 0; + cacheHeap.clear(); + cacheHeap.resize(assoc); + + while (blks_in_cache < assoc) { + RefIndex cache_index = lookupValue(refInfo[set][i].addr); + if (cache_index == -1) { + // First reference to this block + misses++; + cache_index = blks_in_cache++; + setValue(refInfo[set][i].addr, cache_index); + } else { + hits++; + } + // update cache heap to most recent reference + cacheHeap[cache_index] = i; + if (++i >= refInfo[set].size()) { + return; + } + } + for (int start = assoc/2; start >= 0; --start) { + heapify(set,start); + } + //verifyHeap(set,0); + + for (; i < refInfo[set].size(); ++i) { + RefIndex cache_index = lookupValue(refInfo[set][i].addr); + if (cache_index == -1) { + // miss + misses++; + // replace from cacheHeap[0] + // mark replaced block as absent + setValue(refInfo[set][cacheHeap[0]].addr, -1); + setValue(refInfo[set][i].addr, 0); + cacheHeap[0] = i; + heapify(set, 0); + // Make sure its in the cache + assert(lookupValue(refInfo[set][i].addr) != -1); + } else { + // hit + hits++; + assert(refInfo[set][cacheHeap[cache_index]].addr == + refInfo[set][i].addr); + assert(refInfo[set][cacheHeap[cache_index]].nextRefTime == i); + assert(heapLeft(cache_index) >= assoc); + + cacheHeap[cache_index] = i; + processRankIncrease(set, cache_index); + assert(lookupValue(refInfo[set][i].addr) != -1); + } + } +} +void +OptCPU::tick() +{ + // Do opt simulation + + int references = 0; + for (int set = 0; set < numSets; ++set) { + if (!refInfo[set].empty()) { + processSet(set); + } + references += refInfo[set].size(); + } + // exit; + fprintf(stderr,"sys.cpu.misses %d #opt cache misses\n",misses); + fprintf(stderr,"sys.cpu.hits %d #opt cache hits\n", hits); + fprintf(stderr,"sys.cpu.accesses %d #opt cache acceses\n", references); + new SimExitEvent("Finshed Memory Trace"); +} + +void +OptCPU::initTable(Addr addr, RefIndex index) +{ + int l1_index = (addr >> 32) & 0x0f; + int l2_index = (addr >> 16) & 0xffff; + assert(l1_index == addr >> 32); + if (lookupTable[l1_index].size() != (1<<16)) { + lookupTable[l1_index].resize(1<<16); + } + if (lookupTable[l1_index][l2_index].size() != (1<<16)) { + lookupTable[l1_index][l2_index].resize(1<<16, index); + } +} + +OptCPU::TickEvent::TickEvent(OptCPU *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +void +OptCPU::TickEvent::process() +{ + cpu->tick(); +} + +const char * +OptCPU::TickEvent::description() +{ + return "OptCPU tick event"; +} + + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(OptCPU) + + SimObjectParam<MemTraceReader *> data_trace; + Param<int> size; + Param<int> block_size; +Param<int> assoc; + +END_DECLARE_SIM_OBJECT_PARAMS(OptCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(OptCPU) + + INIT_PARAM_DFLT(data_trace, "memory trace", NULL), + INIT_PARAM(size, "cache size"), + INIT_PARAM(block_size, "block size"), + INIT_PARAM(assoc,"associativity") + +END_INIT_SIM_OBJECT_PARAMS(OptCPU) + +CREATE_SIM_OBJECT(OptCPU) +{ + return new OptCPU(getInstanceName(), + data_trace, + block_size, + size, + assoc); +} + +REGISTER_SIM_OBJECT("OptCPU", OptCPU) diff --git a/src/cpu/trace/opt_cpu.hh b/src/cpu/trace/opt_cpu.hh new file mode 100644 index 000000000..f81691733 --- /dev/null +++ b/src/cpu/trace/opt_cpu.hh @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a memory trace CPU object for optimal caches. Uses a memory + * trace to access a fully associative cache with optimal replacement. + */ + +#ifndef __CPU_TRACE_OPT_CPU_HH__ +#define __CPU_TRACE_OPT_CPU_HH__ + +#include <vector> + +#include "mem/mem_req.hh" // for MemReqPtr +#include "sim/eventq.hh" // for Event +#include "sim/sim_object.hh" + +// Forward Declaration +class MemTraceReader; + +/** + * A CPU object to simulate a fully-associative cache with optimal replacement. + */ +class OptCPU : public SimObject +{ + private: + typedef int RefIndex; + + typedef std::vector<RefIndex> L3Table; + typedef std::vector<L3Table> L2Table; + typedef std::vector<L2Table> L1Table; + + /** + * Event to call OptCPU::tick + */ + class TickEvent : public Event + { + private: + /** The associated CPU */ + OptCPU *cpu; + + public: + /** + * Construct this event; + */ + TickEvent(OptCPU *c); + + /** + * Call the tick function. + */ + void process(); + + /** + * Return a string description of this event. + */ + const char *description(); + }; + + TickEvent tickEvent; + + class RefInfo + { + public: + RefIndex nextRefTime; + Addr addr; + }; + + /** Reference Information, per set. */ + std::vector<std::vector<RefInfo> > refInfo; + + /** Lookup table to track blocks in the cache heap */ + L1Table lookupTable; + + /** + * Return the correct value in the lookup table. + */ + RefIndex lookupValue(Addr addr) + { + int l1_index = (addr >> 32) & 0x0f; + int l2_index = (addr >> 16) & 0xffff; + int l3_index = addr & 0xffff; + assert(l1_index == addr >> 32); + return lookupTable[l1_index][l2_index][l3_index]; + } + + /** + * Set the value in the lookup table. + */ + void setValue(Addr addr, RefIndex index) + { + int l1_index = (addr >> 32) & 0x0f; + int l2_index = (addr >> 16) & 0xffff; + int l3_index = addr & 0xffff; + assert(l1_index == addr >> 32); + lookupTable[l1_index][l2_index][l3_index]=index; + } + + /** + * Initialize the lookup table to the given value. + */ + void initTable(Addr addr, RefIndex index); + + void heapSwap(int set, int a, int b) { + RefIndex tmp = cacheHeap[a]; + cacheHeap[a] = cacheHeap[b]; + cacheHeap[b] = tmp; + + setValue(refInfo[set][cacheHeap[a]].addr, a); + setValue(refInfo[set][cacheHeap[b]].addr, b); + } + + int heapLeft(int index) { return index + index + 1; } + int heapRight(int index) { return index + index + 2; } + int heapParent(int index) { return (index - 1) >> 1; } + + RefIndex heapRank(int set, int index) { + return refInfo[set][cacheHeap[index]].nextRefTime; + } + + void heapify(int set, int start){ + int left = heapLeft(start); + int right = heapRight(start); + int max = start; + if (left < assoc && heapRank(set, left) > heapRank(set, start)) { + max = left; + } + if (right < assoc && heapRank(set, right) > heapRank(set, max)) { + max = right; + } + + if (max != start) { + heapSwap(set, start, max); + heapify(set, max); + } + } + + void verifyHeap(int set, int start) { + int left = heapLeft(start); + int right = heapRight(start); + + if (left < assoc) { + assert(heapRank(set, start) >= heapRank(set, left)); + verifyHeap(set, left); + } + if (right < assoc) { + assert(heapRank(set, start) >= heapRank(set, right)); + verifyHeap(set, right); + } + } + + void processRankIncrease(int set, int start) { + int parent = heapParent(start); + while (start > 0 && heapRank(set,parent) < heapRank(set,start)) { + heapSwap(set, parent, start); + start = parent; + parent = heapParent(start); + } + } + + void processSet(int set); + + static const RefIndex InfiniteRef = 0x7fffffff; + + /** Memory reference trace. */ + MemTraceReader *trace; + + /** Cache heap for replacement. */ + std::vector<RefIndex> cacheHeap; + + /** The number of blocks in the cache. */ + const int numBlks; + + const int assoc; + const int numSets; + const int setMask; + + + int misses; + int hits; + + public: + /** + * Construct a OptCPU object. + */ + OptCPU(const std::string &name, + MemTraceReader *_trace, + int block_size, + int cache_size, + int assoc); + + /** + * Perform the optimal replacement simulation. + */ + void tick(); +}; + +#endif // __CPU_TRACE_OPT_CPU_HH__ diff --git a/src/cpu/trace/reader/ibm_reader.cc b/src/cpu/trace/reader/ibm_reader.cc new file mode 100644 index 000000000..420101b63 --- /dev/null +++ b/src/cpu/trace/reader/ibm_reader.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a IBM memory trace format reader. + */ +#include <sstream> + +#include "cpu/trace/reader/ibm_reader.hh" +#include "sim/builder.hh" +#include "base/misc.hh" // for fatal + +using namespace std; + +IBMReader::IBMReader(const string &name, const string &filename) + : MemTraceReader(name) +{ + if (strcmp((filename.c_str() + filename.length() -3), ".gz") == 0) { + // Compressed file, need to use a pipe to gzip. + stringstream buf; + buf << "gzip -d -c " << filename << endl; + trace = popen(buf.str().c_str(), "r"); + } else { + trace = fopen(filename.c_str(), "rb"); + } + if (!trace) { + fatal("Can't open file %s", filename); + } +} + +Tick +IBMReader::getNextReq(MemReqPtr &req) +{ + MemReqPtr tmp_req; + + int c = getc(trace); + if (c != EOF) { + tmp_req = new MemReq(); + //int cpu_id = (c & 0xf0) >> 4; + int type = c & 0x0f; + // We have L1 miss traces, so all accesses are 128 bytes + tmp_req->size = 128; + + tmp_req->paddr = 0; + for (int i = 2; i >= 0; --i) { + c = getc(trace); + if (c == EOF) { + fatal("Unexpected end of file"); + } + tmp_req->paddr |= ((c & 0xff) << (8 * i)); + } + tmp_req->paddr = tmp_req->paddr << 7; + + switch(type) { + case IBM_COND_EXCLUSIVE_FETCH: + case IBM_READ_ONLY_FETCH: + tmp_req->cmd = Read; + break; + case IBM_EXCLUSIVE_FETCH: + case IBM_FETCH_NO_DATA: + tmp_req->cmd = Write; + break; + case IBM_INST_FETCH: + tmp_req->cmd = Read; + break; + default: + fatal("Unknown trace entry type."); + } + + } + req = tmp_req; + return 0; +} + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(IBMReader) + + Param<string> filename; + +END_DECLARE_SIM_OBJECT_PARAMS(IBMReader) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(IBMReader) + + INIT_PARAM(filename, "trace file") + +END_INIT_SIM_OBJECT_PARAMS(IBMReader) + + +CREATE_SIM_OBJECT(IBMReader) +{ + return new IBMReader(getInstanceName(), filename); +} + +REGISTER_SIM_OBJECT("IBMReader", IBMReader) diff --git a/src/cpu/trace/reader/ibm_reader.hh b/src/cpu/trace/reader/ibm_reader.hh new file mode 100644 index 000000000..ce29206a2 --- /dev/null +++ b/src/cpu/trace/reader/ibm_reader.hh @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definition of a IBM memory trace format reader. + */ + +#ifndef __IBM_READER_HH__ +#define __IBM_READER_HH__ + +#include <stdio.h> +#include "cpu/trace/reader/mem_trace_reader.hh" +#include "mem/mem_req.hh" + +/** + * A memory trace reader for the IBM memory trace format. + */ +class IBMReader : public MemTraceReader +{ + /** IBM trace file. */ + FILE* trace; + + enum IBMType { + IBM_INST_FETCH, + IBM_READ_ONLY_FETCH, + IBM_COND_EXCLUSIVE_FETCH, + IBM_EXCLUSIVE_FETCH, + IBM_FETCH_NO_DATA + }; + + public: + /** + * Construct an IBMReader. + */ + IBMReader(const std::string &name, const std::string &filename); + + /** + * Read the next request from the trace. Returns the request in the + * provided MemReqPtr and the cycle of the request in the return value. + * @param req Return the next request from the trace. + * @return IBM traces don't store timing information, return 0 + */ + virtual Tick getNextReq(MemReqPtr &req); +}; + +#endif //__IBM_READER_HH__ + diff --git a/src/cpu/trace/reader/itx_reader.cc b/src/cpu/trace/reader/itx_reader.cc new file mode 100644 index 000000000..39ba27393 --- /dev/null +++ b/src/cpu/trace/reader/itx_reader.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a Intel ITX memory trace format reader. + */ +#include <sstream> + +#include "cpu/trace/reader/itx_reader.hh" +#include "sim/builder.hh" +#include "base/misc.hh" // for fatal + +using namespace std; + +ITXReader::ITXReader(const string &name, const string &filename) + : MemTraceReader(name) +{ + if (strcmp((filename.c_str() + filename.length() -3), ".gz") == 0) { + // Compressed file, need to use a pipe to gzip. + stringstream buf; + buf << "gzip -d -c " << filename << endl; + trace = popen(buf.str().c_str(), "r"); + } else { + trace = fopen(filename.c_str(), "rb"); + } + if (!trace) { + fatal("Can't open file %s", filename); + } + traceFormat = 0; + int c; + for (int i = 0; i < 4; ++i) { + c = getc(trace); + if (c == EOF) { + fatal("Unexpected end of trace file."); + } + traceFormat |= (c & 0xff) << (8 * i); + } + if (traceFormat > 2) + fatal("Invalid trace format."); +} + +Tick +ITXReader::getNextReq(MemReqPtr &req) +{ + MemReqPtr tmp_req = new MemReq(); + bool phys_val; + do { + int c = getc(trace); + if (c != EOF) { + // Decode first byte + // phys_val<1> | type <2:0> | size <3:0> + phys_val = c & 0x80; + tmp_req->size = (c & 0x0f) + 1; + int type = (c & 0x70) >> 4; + + // Could be a compressed instruction entry, expand if necessary + if (type == ITXCodeComp) { + if (traceFormat != 2) { + fatal("Compressed code entry in non CompCode trace."); + } + if (!codeVirtValid) { + fatal("Corrupt CodeComp entry."); + } + + tmp_req->vaddr = codeVirtAddr; + codeVirtAddr += tmp_req->size; + if (phys_val) { + if (!codePhysValid) { + fatal("Corrupt CodeComp entry."); + } + tmp_req->paddr = codePhysAddr; + if (((tmp_req->paddr & 0xfff) + tmp_req->size) & ~0xfff) { + // Crossed page boundary, next physical address is + // invalid + codePhysValid = false; + } else { + codePhysAddr += tmp_req->size; + } + assert(tmp_req->paddr >> 36 == 0); + } else { + codePhysValid = false; + } + type = ITXCode; + tmp_req->cmd = Read; + } else { + // Normal entry + tmp_req->vaddr = 0; + for (int i = 0; i < 4; ++i) { + c = getc(trace); + if (c == EOF) { + fatal("Unexpected end of trace file."); + } + tmp_req->vaddr |= (c & 0xff) << (8 * i); + } + if (type == ITXCode) { + codeVirtAddr = tmp_req->vaddr + tmp_req->size; + codeVirtValid = true; + } + tmp_req->paddr = 0; + if (phys_val) { + c = getc(trace); + if (c == EOF) { + fatal("Unexpected end of trace file."); + } + // Get the page offset from the virtual address. + tmp_req->paddr = tmp_req->vaddr & 0xfff; + tmp_req->paddr |= (c & 0xf0) << 8; + tmp_req->paddr |= (Addr)(c & 0x0f) << 32; + for (int i = 2; i < 4; ++i) { + c = getc(trace); + if (c == EOF) { + fatal("Unexpected end of trace file."); + } + tmp_req->paddr |= (Addr)(c & 0xff) << (8 * i); + } + if (type == ITXCode) { + if (((tmp_req->paddr & 0xfff) + tmp_req->size) + & ~0xfff) { + // Crossing the page boundary, next physical + // address isn't valid + codePhysValid = false; + } else { + codePhysAddr = tmp_req->paddr + tmp_req->size; + codePhysValid = true; + } + } + assert(tmp_req->paddr >> 36 == 0); + } else if (type == ITXCode) { + codePhysValid = false; + } + switch(type) { + case ITXRead: + tmp_req->cmd = Read; + break; + case ITXWrite: + tmp_req->cmd = Write; + break; + case ITXWriteback: + tmp_req->cmd = Writeback; + break; + case ITXCode: + tmp_req->cmd = Read; + tmp_req->flags |= INST_READ; + break; + default: + fatal("Unknown ITX type"); + } + } + } else { + // EOF need to return a null request + MemReqPtr null_req; + req = null_req; + return 0; + } + } while (!phys_val); + req = tmp_req; + assert(!req || (req->paddr >> 36) == 0); + return 0; +} + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(ITXReader) + + Param<string> filename; + +END_DECLARE_SIM_OBJECT_PARAMS(ITXReader) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(ITXReader) + + INIT_PARAM(filename, "trace file") + +END_INIT_SIM_OBJECT_PARAMS(ITXReader) + + +CREATE_SIM_OBJECT(ITXReader) +{ + return new ITXReader(getInstanceName(), filename); +} + +REGISTER_SIM_OBJECT("ITXReader", ITXReader) diff --git a/src/cpu/trace/reader/itx_reader.hh b/src/cpu/trace/reader/itx_reader.hh new file mode 100644 index 000000000..a16a08085 --- /dev/null +++ b/src/cpu/trace/reader/itx_reader.hh @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definition of a Intel ITX memory trace format reader. + */ + +#ifndef __ITX_READER_HH__ +#define __ITX_READER_HH__ + +#include <stdio.h> +#include <string> + +#include "cpu/trace/reader/mem_trace_reader.hh" +#include "mem/mem_req.hh" + + +/** + * A memory trace reader for the Intel ITX memory trace format. + */ +class ITXReader : public MemTraceReader +{ + private: + /** Trace file. */ + FILE *trace; + + bool codeVirtValid; + Addr codeVirtAddr; + bool codePhysValid; + Addr codePhysAddr; + + int traceFormat; + + enum ITXType { + ITXRead, + ITXWrite, + ITXWriteback, + ITXCode, + ITXCodeComp + }; + + public: + /** + * Construct an ITXReader. + */ + ITXReader(const std::string &name, const std::string &filename); + + /** + * Read the next request from the trace. Returns the request in the + * provided MemReqPtr and the cycle of the request in the return value. + * @param req Return the next request from the trace. + * @return ITX traces don't store timing information, return 0 + */ + virtual Tick getNextReq(MemReqPtr &req); +}; + +#endif //__ITX_READER_HH__ + diff --git a/src/cpu/trace/reader/m5_reader.cc b/src/cpu/trace/reader/m5_reader.cc new file mode 100644 index 000000000..ce44672f2 --- /dev/null +++ b/src/cpu/trace/reader/m5_reader.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a memory trace reader for a M5 memory trace. + */ + +#include "cpu/trace/reader/m5_reader.hh" +#include "mem/trace/m5_format.hh" +#include "mem/mem_cmd.hh" +#include "sim/builder.hh" + +using namespace std; + +M5Reader::M5Reader(const string &name, const string &filename) + : MemTraceReader(name) +{ + traceFile.open(filename.c_str(), ios::binary); +} + +Tick +M5Reader::getNextReq(MemReqPtr &req) +{ + M5Format ref; + + MemReqPtr tmp_req; + // Need to read EOF char before eof() will return true. + traceFile.read((char*) &ref, sizeof(ref)); + if (!traceFile.eof()) { + //traceFile.read((char*) &ref, sizeof(ref)); +#ifndef NDEBUG + int gcount = traceFile.gcount(); + assert(gcount != 0 || traceFile.eof()); + assert(gcount == sizeof(ref)); + assert(ref.cmd < 12); +#endif + tmp_req = new MemReq(); + tmp_req->paddr = ref.paddr; + tmp_req->asid = ref.asid; + // Assume asid == thread_num + tmp_req->thread_num = ref.asid; + tmp_req->cmd = (MemCmdEnum)ref.cmd; + tmp_req->size = ref.size; + tmp_req->dest = ref.dest; + } else { + ref.cycle = 0; + } + req = tmp_req; + return ref.cycle; +} + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(M5Reader) + + Param<string> filename; + +END_DECLARE_SIM_OBJECT_PARAMS(M5Reader) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(M5Reader) + + INIT_PARAM(filename, "trace file") + +END_INIT_SIM_OBJECT_PARAMS(M5Reader) + + +CREATE_SIM_OBJECT(M5Reader) +{ + return new M5Reader(getInstanceName(), filename); +} + +REGISTER_SIM_OBJECT("M5Reader", M5Reader) diff --git a/src/cpu/trace/reader/m5_reader.hh b/src/cpu/trace/reader/m5_reader.hh new file mode 100644 index 000000000..974a83ffa --- /dev/null +++ b/src/cpu/trace/reader/m5_reader.hh @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definition of a memory trace reader for a M5 memory trace. + */ + +#ifndef __M5_READER_HH__ +#define __M5_READER_HH__ + +#include <fstream> + +#include "cpu/trace/reader/mem_trace_reader.hh" + +/** + * A memory trace reader for an M5 memory trace. @sa M5Writer. + */ +class M5Reader : public MemTraceReader +{ + /** The traceFile. */ + std::ifstream traceFile; + + std::string fn; + + public: + /** + * Construct an M5 memory trace reader. + */ + M5Reader(const std::string &name, const std::string &filename); + + + /** + * Read the next request from the trace. Returns the request in the + * provided MemReqPtr and the cycle of the request in the return value. + * @param req Return the next request from the trace. + * @return The cycle the reference was started. + */ + virtual Tick getNextReq(MemReqPtr &req); +}; + +#endif // __M5_READER_HH__ diff --git a/src/cpu/trace/reader/mem_trace_reader.cc b/src/cpu/trace/reader/mem_trace_reader.cc new file mode 100644 index 000000000..769f0be27 --- /dev/null +++ b/src/cpu/trace/reader/mem_trace_reader.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * SimObject Declaration of pure virtual MemTraceReader class. + */ + +#include "cpu/trace/reader/mem_trace_reader.hh" +#include "sim/param.hh" + +DEFINE_SIM_OBJECT_CLASS_NAME("MemTraceReader", MemTraceReader); diff --git a/src/cpu/trace/reader/mem_trace_reader.hh b/src/cpu/trace/reader/mem_trace_reader.hh new file mode 100644 index 000000000..b433cdbdd --- /dev/null +++ b/src/cpu/trace/reader/mem_trace_reader.hh @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Definitions for a pure virtual interface to a memory trace reader. + */ + +#ifndef __MEM_TRACE_READER_HH__ +#define __MEM_TRACE_READER_HH__ + +#include "sim/sim_object.hh" +#include "mem/mem_req.hh" // For MemReqPtr + +/** + * Pure virtual base class for memory trace readers. + */ +class MemTraceReader : public SimObject +{ + public: + /** Construct this MemoryTrace reader. */ + MemTraceReader(const std::string &name) : SimObject(name) {} + + /** + * Read the next request from the trace. Returns the request in the + * provided MemReqPtr and the cycle of the request in the return value. + * @param req Return the next request from the trace. + * @return The cycle of the request, 0 if none in trace. + */ + virtual Tick getNextReq(MemReqPtr &req) = 0; +}; + +#endif //__MEM_TRACE_READER_HH__ diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc new file mode 100644 index 000000000..20d0a567f --- /dev/null +++ b/src/cpu/trace/trace_cpu.cc @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a memory trace CPU object. Uses a memory trace to drive the + * provided memory hierarchy. + */ + +#include <algorithm> // For min + +#include "cpu/trace/trace_cpu.hh" +#include "cpu/trace/reader/mem_trace_reader.hh" +#include "mem/base_mem.hh" // For PARAM constructor +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" + +using namespace std; + +TraceCPU::TraceCPU(const string &name, + MemInterface *icache_interface, + MemInterface *dcache_interface, + MemTraceReader *data_trace) + : SimObject(name), icacheInterface(icache_interface), + dcacheInterface(dcache_interface), + dataTrace(data_trace), outstandingRequests(0), tickEvent(this) +{ + assert(dcacheInterface); + nextCycle = dataTrace->getNextReq(nextReq); + tickEvent.schedule(0); +} + +void +TraceCPU::tick() +{ + assert(outstandingRequests >= 0); + assert(outstandingRequests < 1000); + int instReqs = 0; + int dataReqs = 0; + + while (nextReq && curTick >= nextCycle) { + assert(nextReq->thread_num < 4 && "Not enough threads"); + if (nextReq->isInstRead() && icacheInterface) { + if (icacheInterface->isBlocked()) + break; + + nextReq->time = curTick; + if (nextReq->cmd == Squash) { + icacheInterface->squash(nextReq->asid); + } else { + ++instReqs; + if (icacheInterface->doEvents()) { + nextReq->completionEvent = + new TraceCompleteEvent(nextReq, this); + icacheInterface->access(nextReq); + } else { + icacheInterface->access(nextReq); + completeRequest(nextReq); + } + } + } else { + if (dcacheInterface->isBlocked()) + break; + + ++dataReqs; + nextReq->time = curTick; + if (dcacheInterface->doEvents()) { + nextReq->completionEvent = + new TraceCompleteEvent(nextReq, this); + dcacheInterface->access(nextReq); + } else { + dcacheInterface->access(nextReq); + completeRequest(nextReq); + } + + } + nextCycle = dataTrace->getNextReq(nextReq); + } + + if (!nextReq) { + // No more requests to send. Finish trailing events and exit. + if (mainEventQueue.empty()) { + new SimExitEvent("Finshed Memory Trace"); + } else { + tickEvent.schedule(mainEventQueue.nextEventTime() + cycles(1)); + } + } else { + tickEvent.schedule(max(curTick + cycles(1), nextCycle)); + } +} + +void +TraceCPU::completeRequest(MemReqPtr& req) +{ +} + +void +TraceCompleteEvent::process() +{ + tester->completeRequest(req); +} + +const char * +TraceCompleteEvent::description() +{ + return "trace access complete"; +} + +TraceCPU::TickEvent::TickEvent(TraceCPU *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +void +TraceCPU::TickEvent::process() +{ + cpu->tick(); +} + +const char * +TraceCPU::TickEvent::description() +{ + return "TraceCPU tick event"; +} + + + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(TraceCPU) + + SimObjectParam<BaseMem *> icache; + SimObjectParam<BaseMem *> dcache; + SimObjectParam<MemTraceReader *> data_trace; + +END_DECLARE_SIM_OBJECT_PARAMS(TraceCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(TraceCPU) + + INIT_PARAM_DFLT(icache, "instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "data cache", NULL), + INIT_PARAM_DFLT(data_trace, "data trace", NULL) + +END_INIT_SIM_OBJECT_PARAMS(TraceCPU) + +CREATE_SIM_OBJECT(TraceCPU) +{ + return new TraceCPU(getInstanceName(), + (icache) ? icache->getInterface() : NULL, + (dcache) ? dcache->getInterface() : NULL, + data_trace); +} + +REGISTER_SIM_OBJECT("TraceCPU", TraceCPU) + diff --git a/src/cpu/trace/trace_cpu.hh b/src/cpu/trace/trace_cpu.hh new file mode 100644 index 000000000..69ca35321 --- /dev/null +++ b/src/cpu/trace/trace_cpu.hh @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a memory trace CPU object. Uses a memory trace to drive the + * provided memory hierarchy. + */ + +#ifndef __CPU_TRACE_TRACE_CPU_HH__ +#define __CPU_TRACE_TRACE_CPU_HH__ + +#include <string> + +#include "mem/mem_req.hh" // for MemReqPtr +#include "sim/eventq.hh" // for Event +#include "sim/sim_object.hh" + +// Forward declaration. +class MemInterface; +class MemTraceReader; + +/** + * A cpu object for running memory traces through a memory hierarchy. + */ +class TraceCPU : public SimObject +{ + private: + /** Interface for instruction trace requests, if any. */ + MemInterface *icacheInterface; + /** Interface for data trace requests, if any. */ + MemInterface *dcacheInterface; + + /** Data reference trace. */ + MemTraceReader *dataTrace; + + /** Number of outstanding requests. */ + int outstandingRequests; + + /** Cycle of the next request, 0 if not available. */ + Tick nextCycle; + + /** Next request. */ + MemReqPtr nextReq; + + /** + * Event to call the TraceCPU::tick + */ + class TickEvent : public Event + { + private: + /** The associated CPU */ + TraceCPU *cpu; + + public: + /** + * Construct this event; + */ + TickEvent(TraceCPU *c); + + /** + * Call the tick function. + */ + void process(); + + /** + * Return a string description of this event. + */ + const char *description(); + }; + + TickEvent tickEvent; + + public: + /** + * Construct a TraceCPU object. + */ + TraceCPU(const std::string &name, + MemInterface *icache_interface, + MemInterface *dcache_interface, + MemTraceReader *data_trace); + + inline Tick cycles(int numCycles) { return numCycles; } + + /** + * Perform all the accesses for one cycle. + */ + void tick(); + + /** + * Handle a completed memory request. + */ + void completeRequest(MemReqPtr &req); +}; + +class TraceCompleteEvent : public Event +{ + MemReqPtr req; + TraceCPU *tester; + + public: + + TraceCompleteEvent(MemReqPtr &_req, TraceCPU *_tester) + : Event(&mainEventQueue), req(_req), tester(_tester) + { + setFlags(AutoDelete); + } + + void process(); + + virtual const char *description(); +}; + +#endif // __CPU_TRACE_TRACE_CPU_HH__ + |