From 26ca8b87470912d5e593a21fc968dd2ddf0e20b2 Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Fri, 10 Feb 2012 09:51:37 -0600 Subject: Regressions: Update stats due to O3 CPU changes --- .../se/10.mcf/ref/arm/linux/o3-timing/config.ini | 44 +++- .../long/se/10.mcf/ref/arm/linux/o3-timing/simout | 12 +- .../se/10.mcf/ref/arm/linux/o3-timing/stats.txt | 250 ++++++++++----------- 3 files changed, 170 insertions(+), 136 deletions(-) (limited to 'tests/long/se/10.mcf/ref/arm') diff --git a/tests/long/se/10.mcf/ref/arm/linux/o3-timing/config.ini b/tests/long/se/10.mcf/ref/arm/linux/o3-timing/config.ini index bec9490f3..cbe079647 100644 --- a/tests/long/se/10.mcf/ref/arm/linux/o3-timing/config.ini +++ b/tests/long/se/10.mcf/ref/arm/linux/o3-timing/config.ini @@ -1,6 +1,7 @@ [root] type=Root children=system +full_system=false time_sync_enable=false time_sync_period=100000000000 time_sync_spin_threshold=100000000 @@ -8,10 +9,16 @@ time_sync_spin_threshold=100000000 [system] type=System children=cpu membus physmem +boot_osflags=a +init_param=0 +kernel= +load_addr_mask=1099511627775 mem_mode=atomic memories=system.physmem num_work_ids=16 physmem=system.physmem +readfile= +symbolfile= work_begin_ckpt_count=0 work_begin_cpu_id_exit=-1 work_begin_exit_count=0 @@ -23,7 +30,7 @@ system_port=system.membus.port[0] [system.cpu] type=DerivO3CPU -children=dcache dtb fuPool icache itb l2cache toL2Bus tracer workload +children=dcache dtb fuPool icache interrupts itb l2cache toL2Bus tracer workload BTBEntries=4096 BTBTagSize=16 LFSTSize=1024 @@ -52,6 +59,7 @@ decodeWidth=8 defer_registration=false dispatchWidth=8 do_checkpoint_insts=true +do_quiesce=true do_statistics_insts=true dtb=system.cpu.dtb fetchToDecodeDelay=1 @@ -69,6 +77,7 @@ iewToDecodeDelay=1 iewToFetchDelay=1 iewToRenameDelay=1 instShiftAmt=2 +interrupts=system.cpu.interrupts issueToExecuteDelay=1 issueWidth=8 itb=system.cpu.itb @@ -80,6 +89,7 @@ max_insts_all_threads=0 max_insts_any_thread=0 max_loads_all_threads=0 max_loads_any_thread=0 +needsTSO=false numIQEntries=64 numPhysFloatRegs=256 numPhysIntRegs=256 @@ -88,6 +98,7 @@ numRobs=1 numThreads=1 phase=0 predType=tournament +profile=0 progress_interval=0 renameToDecodeDelay=1 renameToFetchDelay=1 @@ -148,7 +159,16 @@ mem_side=system.cpu.toL2Bus.port[1] [system.cpu.dtb] type=ArmTLB +children=walker size=64 +walker=system.cpu.dtb.walker + +[system.cpu.dtb.walker] +type=ArmTableWalker +max_backoff=100000 +min_backoff=0 +sys=system +port=system.cpu.toL2Bus.port[3] [system.cpu.fuPool] type=FUPool @@ -445,9 +465,21 @@ write_buffers=8 cpu_side=system.cpu.icache_port mem_side=system.cpu.toL2Bus.port[0] +[system.cpu.interrupts] +type=ArmInterrupts + [system.cpu.itb] type=ArmTLB +children=walker size=64 +walker=system.cpu.itb.walker + +[system.cpu.itb.walker] +type=ArmTableWalker +max_backoff=100000 +min_backoff=0 +sys=system +port=system.cpu.toL2Bus.port[2] [system.cpu.l2cache] type=BaseCache @@ -478,7 +510,7 @@ tgts_per_mshr=5 trace_addr=0 two_queue=false write_buffers=8 -cpu_side=system.cpu.toL2Bus.port[2] +cpu_side=system.cpu.toL2Bus.port[4] mem_side=system.membus.port[2] [system.cpu.toL2Bus] @@ -489,7 +521,7 @@ clock=1000 header_cycles=1 use_default_range=false width=64 -port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side +port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.itb.walker.port system.cpu.dtb.walker.port system.cpu.l2cache.cpu_side [system.cpu.tracer] type=ExeTracer @@ -497,14 +529,14 @@ type=ExeTracer [system.cpu.workload] type=LiveProcess cmd=mcf mcf.in -cwd=build/ARM_SE/tests/opt/long/10.mcf/arm/linux/o3-timing +cwd=build/ARM/tests/opt/long/se/10.mcf/arm/linux/o3-timing egid=100 env= errout=cerr euid=100 -executable=/dist/m5/cpu2000/binaries/arm/linux/mcf +executable=/scratch/nilay/GEM5/dist/m5/cpu2000/binaries/arm/linux/mcf gid=100 -input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in +input=/scratch/nilay/GEM5/dist/m5/cpu2000/data/mcf/smred/input/mcf.in max_stack_size=67108864 output=cout pid=100 diff --git a/tests/long/se/10.mcf/ref/arm/linux/o3-timing/simout b/tests/long/se/10.mcf/ref/arm/linux/o3-timing/simout index db74d3d24..3ae44ae93 100755 --- a/tests/long/se/10.mcf/ref/arm/linux/o3-timing/simout +++ b/tests/long/se/10.mcf/ref/arm/linux/o3-timing/simout @@ -1,10 +1,12 @@ +Redirecting stdout to build/ARM/tests/opt/long/se/10.mcf/arm/linux/o3-timing/simout +Redirecting stderr to build/ARM/tests/opt/long/se/10.mcf/arm/linux/o3-timing/simerr gem5 Simulator System. http://gem5.org gem5 is copyrighted software; use the --copyright option for details. -gem5 compiled Jan 23 2012 04:16:21 -gem5 started Jan 23 2012 08:43:41 -gem5 executing on zizzer -command line: build/ARM_SE/gem5.opt -d build/ARM_SE/tests/opt/long/10.mcf/arm/linux/o3-timing -re tests/run.py build/ARM_SE/tests/opt/long/10.mcf/arm/linux/o3-timing +gem5 compiled Feb 10 2012 00:18:03 +gem5 started Feb 10 2012 00:18:22 +gem5 executing on ribera.cs.wisc.edu +command line: build/ARM/gem5.opt -d build/ARM/tests/opt/long/se/10.mcf/arm/linux/o3-timing -re tests/run.py build/ARM/tests/opt/long/se/10.mcf/arm/linux/o3-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... @@ -23,4 +25,4 @@ simplex iterations : 2663 flow value : 3080014995 checksum : 68389 optimal -Exiting @ tick 33080569000 because target called exit() +Exiting @ tick 33080570000 because target called exit() diff --git a/tests/long/se/10.mcf/ref/arm/linux/o3-timing/stats.txt b/tests/long/se/10.mcf/ref/arm/linux/o3-timing/stats.txt index 190781128..833e2ce53 100644 --- a/tests/long/se/10.mcf/ref/arm/linux/o3-timing/stats.txt +++ b/tests/long/se/10.mcf/ref/arm/linux/o3-timing/stats.txt @@ -1,13 +1,13 @@ ---------- Begin Simulation Statistics ---------- sim_seconds 0.033081 # Number of seconds simulated -sim_ticks 33080569000 # Number of ticks simulated -final_tick 33080569000 # Number of ticks from beginning of simulation (restored from checkpoints and never reset) +sim_ticks 33080570000 # Number of ticks simulated +final_tick 33080570000 # Number of ticks from beginning of simulation (restored from checkpoints and never reset) sim_freq 1000000000000 # Frequency of simulated ticks -host_inst_rate 140676 # Simulator instruction rate (inst/s) -host_tick_rate 50998874 # Simulator tick rate (ticks/s) -host_mem_usage 353196 # Number of bytes of host memory used -host_seconds 648.65 # Real time elapsed on the host +host_inst_rate 45520 # Simulator instruction rate (inst/s) +host_tick_rate 16502276 # Simulator tick rate (ticks/s) +host_mem_usage 388968 # Number of bytes of host memory used +host_seconds 2004.61 # Real time elapsed on the host sim_insts 91249885 # Number of instructions simulated system.physmem.bytes_read 997440 # Number of bytes read from this memory system.physmem.bytes_inst_read 44864 # Number of instructions bytes read from this memory @@ -15,10 +15,10 @@ system.physmem.bytes_written 2048 # Nu system.physmem.num_reads 15585 # Number of read requests responded to by this memory system.physmem.num_writes 32 # Number of write requests responded to by this memory system.physmem.num_other 0 # Number of other requests responded to by this memory -system.physmem.bw_read 30151839 # Total read bandwidth from this memory (bytes/s) +system.physmem.bw_read 30151838 # Total read bandwidth from this memory (bytes/s) system.physmem.bw_inst_read 1356204 # Instruction read bandwidth from this memory (bytes/s) system.physmem.bw_write 61909 # Write bandwidth from this memory (bytes/s) -system.physmem.bw_total 30213749 # Total bandwidth to/from this memory (bytes/s) +system.physmem.bw_total 30213748 # Total bandwidth to/from this memory (bytes/s) system.cpu.dtb.inst_hits 0 # ITB inst hits system.cpu.dtb.inst_misses 0 # ITB inst misses system.cpu.dtb.read_hits 0 # DTB read hits @@ -62,7 +62,7 @@ system.cpu.itb.hits 0 # DT system.cpu.itb.misses 0 # DTB misses system.cpu.itb.accesses 0 # DTB accesses system.cpu.workload.num_syscalls 442 # Number of system calls -system.cpu.numCycles 66161139 # number of cpu cycles simulated +system.cpu.numCycles 66161141 # number of cpu cycles simulated system.cpu.numWorkItemsStarted 0 # number of work items this cpu started system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed system.cpu.BPredUnit.lookups 27503856 # Number of BP lookups @@ -73,95 +73,95 @@ system.cpu.BPredUnit.BTBHits 23511296 # Nu system.cpu.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. system.cpu.BPredUnit.usedRAS 109835 # Number of times the RAS was used to get a target. system.cpu.BPredUnit.RASInCorrect 10070 # Number of incorrect RAS predictions. -system.cpu.fetch.icacheStallCycles 15373276 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.Insts 131330352 # Number of instructions fetch has processed +system.cpu.fetch.icacheStallCycles 15373267 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.Insts 131330347 # Number of instructions fetch has processed system.cpu.fetch.Branches 27503856 # Number of branches that fetch encountered system.cpu.fetch.predictedBranches 23621131 # Number of branches that fetch has predicted taken -system.cpu.fetch.Cycles 32575580 # Number of cycles fetch has run and was not squashing or blocked -system.cpu.fetch.SquashCycles 5466802 # Number of cycles fetch has spent squashing -system.cpu.fetch.BlockedCycles 14146451 # Number of cycles fetch has spent blocked +system.cpu.fetch.Cycles 32575588 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.SquashCycles 5466804 # Number of cycles fetch has spent squashing +system.cpu.fetch.BlockedCycles 14146452 # Number of cycles fetch has spent blocked system.cpu.fetch.MiscStallCycles 1 # Number of cycles fetch has spent waiting on interrupts, or bad addresses, or out of MSHRs system.cpu.fetch.PendingTrapStallCycles 14 # Number of stall cycles due to pending traps -system.cpu.fetch.CacheLines 14744728 # Number of cache lines fetched -system.cpu.fetch.IcacheSquashes 369535 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.rateDist::samples 66131343 # Number of instructions fetched each cycle (Total) +system.cpu.fetch.CacheLines 14744727 # Number of cache lines fetched +system.cpu.fetch.IcacheSquashes 369536 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.rateDist::samples 66131345 # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::mean 2.004854 # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::stdev 2.741973 # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::underflows 0 0.00% 0.00% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::0 33609066 50.82% 50.82% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::1 6636464 10.04% 60.86% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::0 33609060 50.82% 50.82% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::1 6636469 10.04% 60.86% # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::2 5762437 8.71% 69.57% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::3 4857984 7.35% 76.92% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::4 2814891 4.26% 81.17% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::3 4857985 7.35% 76.92% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::4 2814890 4.26% 81.17% # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::5 1640731 2.48% 83.65% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::6 1559267 2.36% 86.01% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::7 2974436 4.50% 90.51% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::8 6276067 9.49% 100.00% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::6 1559273 2.36% 86.01% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::7 2974432 4.50% 90.51% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::8 6276068 9.49% 100.00% # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::overflows 0 0.00% 100.00% # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::min_value 0 # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::max_value 8 # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::total 66131343 # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::total 66131345 # Number of instructions fetched each cycle (Total) system.cpu.fetch.branchRate 0.415710 # Number of branch fetches per cycle system.cpu.fetch.rate 1.985007 # Number of inst fetches per cycle -system.cpu.decode.IdleCycles 17946396 # Number of cycles decode is idle -system.cpu.decode.BlockedCycles 12652276 # Number of cycles decode is blocked -system.cpu.decode.RunCycles 30529024 # Number of cycles decode is running +system.cpu.decode.IdleCycles 17946387 # Number of cycles decode is idle +system.cpu.decode.BlockedCycles 12652277 # Number of cycles decode is blocked +system.cpu.decode.RunCycles 30529032 # Number of cycles decode is running system.cpu.decode.UnblockCycles 996648 # Number of cycles decode is unblocking -system.cpu.decode.SquashCycles 4006999 # Number of cycles decode is squashing +system.cpu.decode.SquashCycles 4007001 # Number of cycles decode is squashing system.cpu.decode.BranchResolved 4433202 # Number of times decode resolved a branch system.cpu.decode.BranchMispred 29411 # Number of times decode detected a branch misprediction -system.cpu.decode.DecodedInsts 129091755 # Number of instructions handled by decode +system.cpu.decode.DecodedInsts 129091783 # Number of instructions handled by decode system.cpu.decode.SquashedInsts 32642 # Number of squashed instructions handled by decode -system.cpu.rename.SquashCycles 4006999 # Number of cycles rename is squashing -system.cpu.rename.IdleCycles 19654600 # Number of cycles rename is idle -system.cpu.rename.BlockCycles 1107804 # Number of cycles rename is blocking +system.cpu.rename.SquashCycles 4007001 # Number of cycles rename is squashing +system.cpu.rename.IdleCycles 19654593 # Number of cycles rename is idle +system.cpu.rename.BlockCycles 1107803 # Number of cycles rename is blocking system.cpu.rename.serializeStallCycles 8424491 # count of cycles rename stalled for serializing inst -system.cpu.rename.RunCycles 29777332 # Number of cycles rename is running -system.cpu.rename.UnblockCycles 3160117 # Number of cycles rename is unblocking -system.cpu.rename.RenamedInsts 124853414 # Number of instructions processed by rename +system.cpu.rename.RunCycles 29777338 # Number of cycles rename is running +system.cpu.rename.UnblockCycles 3160119 # Number of cycles rename is unblocking +system.cpu.rename.RenamedInsts 124853428 # Number of instructions processed by rename system.cpu.rename.ROBFullEvents 19 # Number of times rename has blocked due to ROB full system.cpu.rename.IQFullEvents 254616 # Number of times rename has blocked due to IQ full -system.cpu.rename.LSQFullEvents 1879605 # Number of times rename has blocked due to LSQ full +system.cpu.rename.LSQFullEvents 1879607 # Number of times rename has blocked due to LSQ full system.cpu.rename.FullRegisterEvents 6 # Number of times there has been no free registers -system.cpu.rename.RenamedOperands 145685583 # Number of destination operands rename has renamed -system.cpu.rename.RenameLookups 543523067 # Number of register rename lookups that rename has made -system.cpu.rename.int_rename_lookups 543516086 # Number of integer rename lookups +system.cpu.rename.RenamedOperands 145685596 # Number of destination operands rename has renamed +system.cpu.rename.RenameLookups 543523130 # Number of register rename lookups that rename has made +system.cpu.rename.int_rename_lookups 543516149 # Number of integer rename lookups system.cpu.rename.fp_rename_lookups 6981 # Number of floating rename lookups system.cpu.rename.CommittedMaps 107429439 # Number of HB maps that are committed -system.cpu.rename.UndoneMaps 38256144 # Number of HB maps that are undone due to squashing -system.cpu.rename.serializingInsts 662187 # count of serializing insts renamed -system.cpu.rename.tempSerializingInsts 664355 # count of temporary serializing insts renamed -system.cpu.rename.skidInsts 7619533 # count of insts added to the skid buffer -system.cpu.memDep0.insertedLoads 29336350 # Number of loads inserted to the mem dependence unit. +system.cpu.rename.UndoneMaps 38256157 # Number of HB maps that are undone due to squashing +system.cpu.rename.serializingInsts 662188 # count of serializing insts renamed +system.cpu.rename.tempSerializingInsts 664356 # count of temporary serializing insts renamed +system.cpu.rename.skidInsts 7619540 # count of insts added to the skid buffer +system.cpu.memDep0.insertedLoads 29336358 # Number of loads inserted to the mem dependence unit. system.cpu.memDep0.insertedStores 5741000 # Number of stores inserted to the mem dependence unit. system.cpu.memDep0.conflictingLoads 1194254 # Number of conflicting loads. system.cpu.memDep0.conflictingStores 692979 # Number of conflicting stores. -system.cpu.iq.iqInstsAdded 117270516 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsAdded 117270526 # Number of instructions added to the IQ (excludes non-spec) system.cpu.iq.iqNonSpecInstsAdded 648807 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqInstsIssued 106162042 # Number of instructions issued +system.cpu.iq.iqInstsIssued 106162051 # Number of instructions issued system.cpu.iq.iqSquashedInstsIssued 30561 # Number of squashed instructions issued -system.cpu.iq.iqSquashedInstsExamined 26211084 # Number of squashed instructions iterated over during squash; mainly for profiling -system.cpu.iq.iqSquashedOperandsExamined 62748223 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.iq.iqSquashedInstsExamined 26211100 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedOperandsExamined 62748267 # Number of squashed operands that are examined and possibly removed from graph system.cpu.iq.iqSquashedNonSpecRemoved 93963 # Number of squashed non-spec instructions that were removed -system.cpu.iq.issued_per_cycle::samples 66131343 # Number of insts issued each cycle +system.cpu.iq.issued_per_cycle::samples 66131345 # Number of insts issued each cycle system.cpu.iq.issued_per_cycle::mean 1.605321 # Number of insts issued each cycle system.cpu.iq.issued_per_cycle::stdev 1.761707 # Number of insts issued each cycle system.cpu.iq.issued_per_cycle::underflows 0 0.00% 0.00% # Number of insts issued each cycle -system.cpu.iq.issued_per_cycle::0 24322507 36.78% 36.78% # Number of insts issued each cycle -system.cpu.iq.issued_per_cycle::1 14238727 21.53% 58.31% # Number of insts issued each cycle -system.cpu.iq.issued_per_cycle::2 9857796 14.91% 73.22% # Number of insts issued each cycle -system.cpu.iq.issued_per_cycle::3 8080873 12.22% 85.44% # Number of insts issued each cycle -system.cpu.iq.issued_per_cycle::4 4216462 6.38% 91.81% # Number of insts issued each cycle -system.cpu.iq.issued_per_cycle::5 2267133 3.43% 95.24% # Number of insts issued each cycle -system.cpu.iq.issued_per_cycle::6 2478028 3.75% 98.99% # Number of insts issued each cycle +system.cpu.iq.issued_per_cycle::0 24322505 36.78% 36.78% # Number of insts issued each cycle +system.cpu.iq.issued_per_cycle::1 14238731 21.53% 58.31% # Number of insts issued each cycle +system.cpu.iq.issued_per_cycle::2 9857797 14.91% 73.22% # Number of insts issued each cycle +system.cpu.iq.issued_per_cycle::3 8080871 12.22% 85.44% # Number of insts issued each cycle +system.cpu.iq.issued_per_cycle::4 4216459 6.38% 91.81% # Number of insts issued each cycle +system.cpu.iq.issued_per_cycle::5 2267136 3.43% 95.24% # Number of insts issued each cycle +system.cpu.iq.issued_per_cycle::6 2478029 3.75% 98.99% # Number of insts issued each cycle system.cpu.iq.issued_per_cycle::7 463113 0.70% 99.69% # Number of insts issued each cycle system.cpu.iq.issued_per_cycle::8 206704 0.31% 100.00% # Number of insts issued each cycle system.cpu.iq.issued_per_cycle::overflows 0 0.00% 100.00% # Number of insts issued each cycle system.cpu.iq.issued_per_cycle::min_value 0 # Number of insts issued each cycle system.cpu.iq.issued_per_cycle::max_value 8 # Number of insts issued each cycle -system.cpu.iq.issued_per_cycle::total 66131343 # Number of insts issued each cycle +system.cpu.iq.issued_per_cycle::total 66131345 # Number of insts issued each cycle system.cpu.iq.fu_full::No_OpClass 0 0.00% 0.00% # attempts to use FU when none available -system.cpu.iq.fu_full::IntAlu 52363 10.31% 10.31% # attempts to use FU when none available +system.cpu.iq.fu_full::IntAlu 52363 10.30% 10.30% # attempts to use FU when none available system.cpu.iq.fu_full::IntMult 27 0.01% 10.31% # attempts to use FU when none available system.cpu.iq.fu_full::IntDiv 0 0.00% 10.31% # attempts to use FU when none available system.cpu.iq.fu_full::FloatAdd 0 0.00% 10.31% # attempts to use FU when none available @@ -190,12 +190,12 @@ system.cpu.iq.fu_full::SimdFloatMisc 0 0.00% 10.31% # at system.cpu.iq.fu_full::SimdFloatMult 0 0.00% 10.31% # attempts to use FU when none available system.cpu.iq.fu_full::SimdFloatMultAcc 0 0.00% 10.31% # attempts to use FU when none available system.cpu.iq.fu_full::SimdFloatSqrt 0 0.00% 10.31% # attempts to use FU when none available -system.cpu.iq.fu_full::MemRead 192834 37.95% 48.26% # attempts to use FU when none available +system.cpu.iq.fu_full::MemRead 192835 37.95% 48.26% # attempts to use FU when none available system.cpu.iq.fu_full::MemWrite 262907 51.74% 100.00% # attempts to use FU when none available system.cpu.iq.fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available system.cpu.iq.fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available system.cpu.iq.FU_type_0::No_OpClass 0 0.00% 0.00% # Type of FU issued -system.cpu.iq.FU_type_0::IntAlu 74696384 70.36% 70.36% # Type of FU issued +system.cpu.iq.FU_type_0::IntAlu 74696385 70.36% 70.36% # Type of FU issued system.cpu.iq.FU_type_0::IntMult 11141 0.01% 70.37% # Type of FU issued system.cpu.iq.FU_type_0::IntDiv 0 0.00% 70.37% # Type of FU issued system.cpu.iq.FU_type_0::FloatAdd 0 0.00% 70.37% # Type of FU issued @@ -224,26 +224,26 @@ system.cpu.iq.FU_type_0::SimdFloatMisc 260 0.00% 70.37% # Ty system.cpu.iq.FU_type_0::SimdFloatMult 0 0.00% 70.37% # Type of FU issued system.cpu.iq.FU_type_0::SimdFloatMultAcc 2 0.00% 70.37% # Type of FU issued system.cpu.iq.FU_type_0::SimdFloatSqrt 0 0.00% 70.37% # Type of FU issued -system.cpu.iq.FU_type_0::MemRead 26155378 24.64% 95.01% # Type of FU issued +system.cpu.iq.FU_type_0::MemRead 26155386 24.64% 95.01% # Type of FU issued system.cpu.iq.FU_type_0::MemWrite 5298717 4.99% 100.00% # Type of FU issued system.cpu.iq.FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued system.cpu.iq.FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued -system.cpu.iq.FU_type_0::total 106162042 # Type of FU issued +system.cpu.iq.FU_type_0::total 106162051 # Type of FU issued system.cpu.iq.rate 1.604598 # Inst issue rate -system.cpu.iq.fu_busy_cnt 508131 # FU busy when requested +system.cpu.iq.fu_busy_cnt 508132 # FU busy when requested system.cpu.iq.fu_busy_rate 0.004786 # FU busy rate (busy events/executed inst) -system.cpu.iq.int_inst_queue_reads 278993219 # Number of integer instruction queue reads -system.cpu.iq.int_inst_queue_writes 144129610 # Number of integer instruction queue writes -system.cpu.iq.int_inst_queue_wakeup_accesses 102521129 # Number of integer instruction queue wakeup accesses +system.cpu.iq.int_inst_queue_reads 278993240 # Number of integer instruction queue reads +system.cpu.iq.int_inst_queue_writes 144129636 # Number of integer instruction queue writes +system.cpu.iq.int_inst_queue_wakeup_accesses 102521130 # Number of integer instruction queue wakeup accesses system.cpu.iq.fp_inst_queue_reads 900 # Number of floating instruction queue reads system.cpu.iq.fp_inst_queue_writes 1354 # Number of floating instruction queue writes system.cpu.iq.fp_inst_queue_wakeup_accesses 412 # Number of floating instruction queue wakeup accesses -system.cpu.iq.int_alu_accesses 106669721 # Number of integer alu accesses +system.cpu.iq.int_alu_accesses 106669731 # Number of integer alu accesses system.cpu.iq.fp_alu_accesses 452 # Number of floating point alu accesses -system.cpu.iew.lsq.thread0.forwLoads 366276 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread0.forwLoads 366279 # Number of loads that had data forwarded from stores system.cpu.iew.lsq.thread0.invAddrLoads 0 # Number of loads ignored due to an invalid address -system.cpu.iew.lsq.thread0.squashedLoads 6760478 # Number of loads squashed -system.cpu.iew.lsq.thread0.ignoredResponses 42465 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread0.squashedLoads 6760486 # Number of loads squashed +system.cpu.iew.lsq.thread0.ignoredResponses 42468 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread0.memOrderViolation 731 # Number of memory ordering violations system.cpu.iew.lsq.thread0.squashedStores 994251 # Number of stores squashed system.cpu.iew.lsq.thread0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address @@ -251,12 +251,12 @@ system.cpu.iew.lsq.thread0.blockedLoads 0 # Nu system.cpu.iew.lsq.thread0.rescheduledLoads 2 # Number of loads that were rescheduled system.cpu.iew.lsq.thread0.cacheBlocked 30282 # Number of times an access to memory failed due to the cache being blocked system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle -system.cpu.iew.iewSquashCycles 4006999 # Number of cycles IEW is squashing +system.cpu.iew.iewSquashCycles 4007001 # Number of cycles IEW is squashing system.cpu.iew.iewBlockCycles 182542 # Number of cycles IEW is blocking system.cpu.iew.iewUnblockCycles 28701 # Number of cycles IEW is unblocking -system.cpu.iew.iewDispatchedInsts 117958129 # Number of instructions dispatched to IQ +system.cpu.iew.iewDispatchedInsts 117958139 # Number of instructions dispatched to IQ system.cpu.iew.iewDispSquashedInsts 810273 # Number of squashed instructions skipped by dispatch -system.cpu.iew.iewDispLoadInsts 29336350 # Number of dispatched load instructions +system.cpu.iew.iewDispLoadInsts 29336358 # Number of dispatched load instructions system.cpu.iew.iewDispStoreInsts 5741000 # Number of dispatched store instructions system.cpu.iew.iewDispNonSpecInsts 643936 # Number of dispatched non-speculative instructions system.cpu.iew.iewIQFullEvents 9429 # Number of times the IQ has become full, causing a stall @@ -265,17 +265,17 @@ system.cpu.iew.memOrderViolationEvents 731 # Nu system.cpu.iew.predictedTakenIncorrect 1288873 # Number of branches that were predicted taken incorrectly system.cpu.iew.predictedNotTakenIncorrect 210071 # Number of branches that were predicted not taken incorrectly system.cpu.iew.branchMispredicts 1498944 # Number of branch mispredicts detected at execute -system.cpu.iew.iewExecutedInsts 104530426 # Number of executed instructions +system.cpu.iew.iewExecutedInsts 104530427 # Number of executed instructions system.cpu.iew.iewExecLoadInsts 25743276 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 1631616 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecSquashedInsts 1631624 # Number of squashed instructions skipped in execute system.cpu.iew.exec_swp 0 # number of swp insts executed system.cpu.iew.exec_nop 38806 # number of nop insts executed system.cpu.iew.exec_refs 30946109 # number of memory reference insts executed system.cpu.iew.exec_branches 21214083 # Number of branches executed system.cpu.iew.exec_stores 5202833 # Number of stores executed system.cpu.iew.exec_rate 1.579937 # Inst execution rate -system.cpu.iew.wb_sent 102941811 # cumulative count of insts sent to commit -system.cpu.iew.wb_count 102521541 # cumulative count of insts written-back +system.cpu.iew.wb_sent 102941812 # cumulative count of insts sent to commit +system.cpu.iew.wb_count 102521542 # cumulative count of insts written-back system.cpu.iew.wb_producers 60312663 # num instructions producing a value system.cpu.iew.wb_consumers 96996327 # num instructions consuming a value system.cpu.iew.wb_penalized 0 # number of instrctions required to write to 'other' IQ @@ -283,7 +283,7 @@ system.cpu.iew.wb_rate 1.549573 # in system.cpu.iew.wb_fanout 0.621804 # average fanout of values written-back system.cpu.iew.wb_penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ system.cpu.commit.commitCommittedInsts 91262494 # The number of committed instructions -system.cpu.commit.commitSquashedInsts 26696986 # The number of squashed insts skipped by commit +system.cpu.commit.commitSquashedInsts 26696996 # The number of squashed insts skipped by commit system.cpu.commit.commitNonSpecStalls 554844 # The number of times commit has been forced to stall to communicate backwards system.cpu.commit.branchMispredicts 1392644 # The number of times a branch was mispredicted system.cpu.commit.committed_per_cycle::samples 62124345 # Number of insts commited each cycle @@ -314,42 +314,42 @@ system.cpu.commit.int_insts 72533302 # Nu system.cpu.commit.function_calls 56148 # Number of function calls committed. system.cpu.commit.bw_lim_events 4531141 # number cycles where commit BW limit reached system.cpu.commit.bw_limited 0 # number of insts not committed due to BW limits -system.cpu.rob.rob_reads 175546950 # The number of ROB reads -system.cpu.rob.rob_writes 239939834 # The number of ROB writes +system.cpu.rob.rob_reads 175546960 # The number of ROB reads +system.cpu.rob.rob_writes 239939856 # The number of ROB writes system.cpu.timesIdled 1543 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.idleCycles 29796 # Total number of cycles that the CPU has spent unscheduled due to idling system.cpu.committedInsts 91249885 # Number of Instructions Simulated system.cpu.committedInsts_total 91249885 # Number of Instructions Simulated -system.cpu.cpi 0.725054 # CPI: Cycles Per Instruction -system.cpu.cpi_total 0.725054 # CPI: Total CPI of All Threads +system.cpu.cpi 0.725055 # CPI: Cycles Per Instruction +system.cpu.cpi_total 0.725055 # CPI: Total CPI of All Threads system.cpu.ipc 1.379207 # IPC: Instructions Per Cycle system.cpu.ipc_total 1.379207 # IPC: Total IPC of All Threads -system.cpu.int_regfile_reads 496902731 # number of integer regfile reads -system.cpu.int_regfile_writes 120936097 # number of integer regfile writes +system.cpu.int_regfile_reads 496902735 # number of integer regfile reads +system.cpu.int_regfile_writes 120936098 # number of integer regfile writes system.cpu.fp_regfile_reads 197 # number of floating regfile reads system.cpu.fp_regfile_writes 534 # number of floating regfile writes -system.cpu.misc_regfile_reads 184886725 # number of misc regfile reads +system.cpu.misc_regfile_reads 184886717 # number of misc regfile reads system.cpu.misc_regfile_writes 11594 # number of misc regfile writes system.cpu.icache.replacements 2 # number of replacements -system.cpu.icache.tagsinuse 611.587678 # Cycle average of tags in use -system.cpu.icache.total_refs 14743812 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 611.587679 # Cycle average of tags in use +system.cpu.icache.total_refs 14743811 # Total number of references to valid blocks. system.cpu.icache.sampled_refs 722 # Sample count of references to valid blocks. -system.cpu.icache.avg_refs 20420.792244 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 20420.790859 # Average number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. -system.cpu.icache.occ_blocks::0 611.587678 # Average occupied blocks per context +system.cpu.icache.occ_blocks::0 611.587679 # Average occupied blocks per context system.cpu.icache.occ_percent::0 0.298627 # Average percentage of cache occupancy -system.cpu.icache.ReadReq_hits 14743812 # number of ReadReq hits -system.cpu.icache.demand_hits 14743812 # number of demand (read+write) hits -system.cpu.icache.overall_hits 14743812 # number of overall hits +system.cpu.icache.ReadReq_hits 14743811 # number of ReadReq hits +system.cpu.icache.demand_hits 14743811 # number of demand (read+write) hits +system.cpu.icache.overall_hits 14743811 # number of overall hits system.cpu.icache.ReadReq_misses 916 # number of ReadReq misses system.cpu.icache.demand_misses 916 # number of demand (read+write) misses system.cpu.icache.overall_misses 916 # number of overall misses system.cpu.icache.ReadReq_miss_latency 32376000 # number of ReadReq miss cycles system.cpu.icache.demand_miss_latency 32376000 # number of demand (read+write) miss cycles system.cpu.icache.overall_miss_latency 32376000 # number of overall miss cycles -system.cpu.icache.ReadReq_accesses 14744728 # number of ReadReq accesses(hits+misses) -system.cpu.icache.demand_accesses 14744728 # number of demand (read+write) accesses -system.cpu.icache.overall_accesses 14744728 # number of overall (read+write) accesses +system.cpu.icache.ReadReq_accesses 14744727 # number of ReadReq accesses(hits+misses) +system.cpu.icache.demand_accesses 14744727 # number of demand (read+write) accesses +system.cpu.icache.overall_accesses 14744727 # number of overall (read+write) accesses system.cpu.icache.ReadReq_miss_rate 0.000062 # miss rate for ReadReq accesses system.cpu.icache.demand_miss_rate 0.000062 # miss rate for demand accesses system.cpu.icache.overall_miss_rate 0.000062 # miss rate for overall accesses @@ -387,45 +387,45 @@ system.cpu.icache.mshr_cap_events 0 # nu system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.replacements 943456 # number of replacements -system.cpu.dcache.tagsinuse 3558.808717 # Cycle average of tags in use -system.cpu.dcache.total_refs 28819274 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 3558.808733 # Cycle average of tags in use +system.cpu.dcache.total_refs 28819271 # Total number of references to valid blocks. system.cpu.dcache.sampled_refs 947552 # Sample count of references to valid blocks. -system.cpu.dcache.avg_refs 30.414451 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 30.414448 # Average number of references to valid blocks. system.cpu.dcache.warmup_cycle 12353041000 # Cycle when the warmup percentage was hit. -system.cpu.dcache.occ_blocks::0 3558.808717 # Average occupied blocks per context +system.cpu.dcache.occ_blocks::0 3558.808733 # Average occupied blocks per context system.cpu.dcache.occ_percent::0 0.868850 # Average percentage of cache occupancy -system.cpu.dcache.ReadReq_hits 24247443 # number of ReadReq hits +system.cpu.dcache.ReadReq_hits 24247440 # number of ReadReq hits system.cpu.dcache.WriteReq_hits 4559242 # number of WriteReq hits system.cpu.dcache.LoadLockedReq_hits 6797 # number of LoadLockedReq hits system.cpu.dcache.StoreCondReq_hits 5792 # number of StoreCondReq hits -system.cpu.dcache.demand_hits 28806685 # number of demand (read+write) hits -system.cpu.dcache.overall_hits 28806685 # number of overall hits +system.cpu.dcache.demand_hits 28806682 # number of demand (read+write) hits +system.cpu.dcache.overall_hits 28806682 # number of overall hits system.cpu.dcache.ReadReq_misses 989267 # number of ReadReq misses system.cpu.dcache.WriteReq_misses 175739 # number of WriteReq misses system.cpu.dcache.LoadLockedReq_misses 7 # number of LoadLockedReq misses system.cpu.dcache.demand_misses 1165006 # number of demand (read+write) misses system.cpu.dcache.overall_misses 1165006 # number of overall misses -system.cpu.dcache.ReadReq_miss_latency 5475542500 # number of ReadReq miss cycles -system.cpu.dcache.WriteReq_miss_latency 4498706928 # number of WriteReq miss cycles +system.cpu.dcache.ReadReq_miss_latency 5475545000 # number of ReadReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 4498707428 # number of WriteReq miss cycles system.cpu.dcache.LoadLockedReq_miss_latency 124500 # number of LoadLockedReq miss cycles -system.cpu.dcache.demand_miss_latency 9974249428 # number of demand (read+write) miss cycles -system.cpu.dcache.overall_miss_latency 9974249428 # number of overall miss cycles -system.cpu.dcache.ReadReq_accesses 25236710 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.demand_miss_latency 9974252428 # number of demand (read+write) miss cycles +system.cpu.dcache.overall_miss_latency 9974252428 # number of overall miss cycles +system.cpu.dcache.ReadReq_accesses 25236707 # number of ReadReq accesses(hits+misses) system.cpu.dcache.WriteReq_accesses 4734981 # number of WriteReq accesses(hits+misses) system.cpu.dcache.LoadLockedReq_accesses 6804 # number of LoadLockedReq accesses(hits+misses) system.cpu.dcache.StoreCondReq_accesses 5792 # number of StoreCondReq accesses(hits+misses) -system.cpu.dcache.demand_accesses 29971691 # number of demand (read+write) accesses -system.cpu.dcache.overall_accesses 29971691 # number of overall (read+write) accesses +system.cpu.dcache.demand_accesses 29971688 # number of demand (read+write) accesses +system.cpu.dcache.overall_accesses 29971688 # number of overall (read+write) accesses system.cpu.dcache.ReadReq_miss_rate 0.039200 # miss rate for ReadReq accesses system.cpu.dcache.WriteReq_miss_rate 0.037115 # miss rate for WriteReq accesses system.cpu.dcache.LoadLockedReq_miss_rate 0.001029 # miss rate for LoadLockedReq accesses system.cpu.dcache.demand_miss_rate 0.038870 # miss rate for demand accesses system.cpu.dcache.overall_miss_rate 0.038870 # miss rate for overall accesses -system.cpu.dcache.ReadReq_avg_miss_latency 5534.949109 # average ReadReq miss latency -system.cpu.dcache.WriteReq_avg_miss_latency 25598.796670 # average WriteReq miss latency +system.cpu.dcache.ReadReq_avg_miss_latency 5534.951636 # average ReadReq miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 25598.799515 # average WriteReq miss latency system.cpu.dcache.LoadLockedReq_avg_miss_latency 17785.714286 # average LoadLockedReq miss latency -system.cpu.dcache.demand_avg_miss_latency 8561.543398 # average overall miss latency -system.cpu.dcache.overall_avg_miss_latency 8561.543398 # average overall miss latency +system.cpu.dcache.demand_avg_miss_latency 8561.545973 # average overall miss latency +system.cpu.dcache.overall_avg_miss_latency 8561.545973 # average overall miss latency system.cpu.dcache.blocked_cycles::no_mshrs 23239503 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked::no_mshrs 8123 # number of cycles access was blocked @@ -445,31 +445,31 @@ system.cpu.dcache.WriteReq_mshr_misses 44526 # nu system.cpu.dcache.demand_mshr_misses 947553 # number of demand (read+write) MSHR misses system.cpu.dcache.overall_mshr_misses 947553 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses -system.cpu.dcache.ReadReq_mshr_miss_latency 2253075000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_latency 1081062556 # number of WriteReq MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_latency 3334137556 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_latency 3334137556 # number of overall MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency 2253076500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 1081063056 # number of WriteReq MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 3334139556 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 3334139556 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.035782 # mshr miss rate for ReadReq accesses system.cpu.dcache.WriteReq_mshr_miss_rate 0.009404 # mshr miss rate for WriteReq accesses system.cpu.dcache.demand_mshr_miss_rate 0.031615 # mshr miss rate for demand accesses system.cpu.dcache.overall_mshr_miss_rate 0.031615 # mshr miss rate for overall accesses -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2495.025066 # average ReadReq mshr miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 24279.354894 # average WriteReq mshr miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 3518.681864 # average overall mshr miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 3518.681864 # average overall mshr miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2495.026727 # average ReadReq mshr miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 24279.366123 # average WriteReq mshr miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 3518.683974 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 3518.683974 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.replacements 744 # number of replacements -system.cpu.l2cache.tagsinuse 9229.669539 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 9229.669691 # Cycle average of tags in use system.cpu.l2cache.total_refs 1596774 # Total number of references to valid blocks. system.cpu.l2cache.sampled_refs 15569 # Sample count of references to valid blocks. system.cpu.l2cache.avg_refs 102.561115 # Average number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. -system.cpu.l2cache.occ_blocks::0 392.792284 # Average occupied blocks per context -system.cpu.l2cache.occ_blocks::1 8836.877255 # Average occupied blocks per context +system.cpu.l2cache.occ_blocks::0 392.792276 # Average occupied blocks per context +system.cpu.l2cache.occ_blocks::1 8836.877415 # Average occupied blocks per context system.cpu.l2cache.occ_percent::0 0.011987 # Average percentage of cache occupancy system.cpu.l2cache.occ_percent::1 0.269680 # Average percentage of cache occupancy system.cpu.l2cache.ReadReq_hits 901413 # number of ReadReq hits -- cgit v1.2.3