diff options
75 files changed, 1478 insertions, 998 deletions
diff --git a/SConstruct b/SConstruct index dac317fe8..6ca3d6a14 100644 --- a/SConstruct +++ b/SConstruct @@ -324,11 +324,11 @@ Usage: scons [scons options] [build options] [target(s)] Global sticky options: ''' -help_text += global_sticky_vars.GenerateHelpText(main) - # Update main environment with values from ARGUMENTS & global_sticky_vars_file global_sticky_vars.Update(main) +help_text += global_sticky_vars.GenerateHelpText(main) + # Save sticky variable settings back to current variables file global_sticky_vars.Save(global_sticky_vars_file, main) diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py index 7ab7319cd..cf4c9b6f5 100644 --- a/configs/common/FSConfig.py +++ b/configs/common/FSConfig.py @@ -216,6 +216,8 @@ def makeX86System(mem_mode, numCPUs = 1, mdesc = None, self = None): mdesc.diskname = 'x86root.img' self.readfile = mdesc.script() + self.mem_mode = mem_mode + # Physical memory self.membus = MemBus(bus_id=1) self.physmem = PhysicalMemory(range = AddrRange(mdesc.mem())) diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa index 52e124ad5..fe70e4d16 100644 --- a/src/arch/alpha/isa/decoder.isa +++ b/src/arch/alpha/isa/decoder.isa @@ -338,6 +338,31 @@ decode OPCODE default Unknown::unknown() { 0x1c: decode INTFUNC { 0x00: decode RA { 31: sextb({{ Rc.sb = Rb_or_imm< 7:0>; }}); } 0x01: decode RA { 31: sextw({{ Rc.sw = Rb_or_imm<15:0>; }}); } + + 0x30: ctpop({{ + uint64_t count = 0; + for (int i = 0; Rb<63:i>; ++i) { + if (Rb<i:i> == 0x1) + ++count; + } + Rc = count; + }}, IntAluOp); + + 0x31: perr({{ + uint64_t temp = 0; + int hi = 7; + int lo = 0; + for (int i = 0; i < 8; ++i) { + uint8_t ra_ub = Ra.uq<hi:lo>; + uint8_t rb_ub = Rb.uq<hi:lo>; + temp += (ra_ub >= rb_ub) ? + (ra_ub - rb_ub) : (rb_ub - ra_ub); + hi += 8; + lo += 8; + } + Rc = temp; + }}); + 0x32: ctlz({{ uint64_t count = 0; uint64_t temp = Rb; @@ -359,26 +384,163 @@ decode OPCODE default Unknown::unknown() { if (!(temp<7:0>)) { temp >>= 8; count += 8; } if (!(temp<3:0>)) { temp >>= 4; count += 4; } if (!(temp<1:0>)) { temp >>= 2; count += 2; } + if (!(temp<0:0> & ULL(0x1))) { + temp >>= 1; count += 1; + } if (!(temp<0:0> & ULL(0x1))) count += 1; Rc = count; }}, IntAluOp); - format FailUnimpl { - 0x30: ctpop(); - 0x31: perr(); - 0x34: unpkbw(); - 0x35: unpkbl(); - 0x36: pkwb(); - 0x37: pklb(); - 0x38: minsb8(); - 0x39: minsw4(); - 0x3a: minub8(); - 0x3b: minuw4(); - 0x3c: maxub8(); - 0x3d: maxuw4(); - 0x3e: maxsb8(); - 0x3f: maxsw4(); - } + + 0x34: unpkbw({{ + Rc = (Rb.uq<7:0> + | (Rb.uq<15:8> << 16) + | (Rb.uq<23:16> << 32) + | (Rb.uq<31:24> << 48)); + }}, IntAluOp); + + 0x35: unpkbl({{ + Rc = (Rb.uq<7:0> | (Rb.uq<15:8> << 32)); + }}, IntAluOp); + + 0x36: pkwb({{ + Rc = (Rb.uq<7:0> + | (Rb.uq<23:16> << 8) + | (Rb.uq<39:32> << 16) + | (Rb.uq<55:48> << 24)); + }}, IntAluOp); + + 0x37: pklb({{ + Rc = (Rb.uq<7:0> | (Rb.uq<39:32> << 8)); + }}, IntAluOp); + + 0x38: minsb8({{ + uint64_t temp = 0; + int hi = 63; + int lo = 56; + for (int i = 7; i >= 0; --i) { + int8_t ra_sb = Ra.uq<hi:lo>; + int8_t rb_sb = Rb.uq<hi:lo>; + temp = ((temp << 8) + | ((ra_sb < rb_sb) ? Ra.uq<hi:lo> + : Rb.uq<hi:lo>)); + hi -= 8; + lo -= 8; + } + Rc = temp; + }}); + + 0x39: minsw4({{ + uint64_t temp = 0; + int hi = 63; + int lo = 48; + for (int i = 3; i >= 0; --i) { + int16_t ra_sw = Ra.uq<hi:lo>; + int16_t rb_sw = Rb.uq<hi:lo>; + temp = ((temp << 16) + | ((ra_sw < rb_sw) ? Ra.uq<hi:lo> + : Rb.uq<hi:lo>)); + hi -= 16; + lo -= 16; + } + Rc = temp; + }}); + + 0x3a: minub8({{ + uint64_t temp = 0; + int hi = 63; + int lo = 56; + for (int i = 7; i >= 0; --i) { + uint8_t ra_ub = Ra.uq<hi:lo>; + uint8_t rb_ub = Rb.uq<hi:lo>; + temp = ((temp << 8) + | ((ra_ub < rb_ub) ? Ra.uq<hi:lo> + : Rb.uq<hi:lo>)); + hi -= 8; + lo -= 8; + } + Rc = temp; + }}); + + 0x3b: minuw4({{ + uint64_t temp = 0; + int hi = 63; + int lo = 48; + for (int i = 3; i >= 0; --i) { + uint16_t ra_sw = Ra.uq<hi:lo>; + uint16_t rb_sw = Rb.uq<hi:lo>; + temp = ((temp << 16) + | ((ra_sw < rb_sw) ? Ra.uq<hi:lo> + : Rb.uq<hi:lo>)); + hi -= 16; + lo -= 16; + } + Rc = temp; + }}); + + 0x3c: maxub8({{ + uint64_t temp = 0; + int hi = 63; + int lo = 56; + for (int i = 7; i >= 0; --i) { + uint8_t ra_ub = Ra.uq<hi:lo>; + uint8_t rb_ub = Rb.uq<hi:lo>; + temp = ((temp << 8) + | ((ra_ub > rb_ub) ? Ra.uq<hi:lo> + : Rb.uq<hi:lo>)); + hi -= 8; + lo -= 8; + } + Rc = temp; + }}); + + 0x3d: maxuw4({{ + uint64_t temp = 0; + int hi = 63; + int lo = 48; + for (int i = 3; i >= 0; --i) { + uint16_t ra_uw = Ra.uq<hi:lo>; + uint16_t rb_uw = Rb.uq<hi:lo>; + temp = ((temp << 16) + | ((ra_uw > rb_uw) ? Ra.uq<hi:lo> + : Rb.uq<hi:lo>)); + hi -= 16; + lo -= 16; + } + Rc = temp; + }}); + + 0x3e: maxsb8({{ + uint64_t temp = 0; + int hi = 63; + int lo = 56; + for (int i = 7; i >= 0; --i) { + int8_t ra_sb = Ra.uq<hi:lo>; + int8_t rb_sb = Rb.uq<hi:lo>; + temp = ((temp << 8) + | ((ra_sb > rb_sb) ? Ra.uq<hi:lo> + : Rb.uq<hi:lo>)); + hi -= 8; + lo -= 8; + } + Rc = temp; + }}); + + 0x3f: maxsw4({{ + uint64_t temp = 0; + int hi = 63; + int lo = 48; + for (int i = 3; i >= 0; --i) { + int16_t ra_sw = Ra.uq<hi:lo>; + int16_t rb_sw = Rb.uq<hi:lo>; + temp = ((temp << 16) + | ((ra_sw > rb_sw) ? Ra.uq<hi:lo> + : Rb.uq<hi:lo>)); + hi -= 16; + lo -= 16; + } + Rc = temp; + }}); format BasicOperateWithNopCheck { 0x70: decode RB { diff --git a/src/arch/alpha/process.cc b/src/arch/alpha/process.cc index 9d75d5fa1..1c83f64b2 100644 --- a/src/arch/alpha/process.cc +++ b/src/arch/alpha/process.cc @@ -175,21 +175,22 @@ AlphaLiveProcess::argsInit(int intSize, int pageSize) void AlphaLiveProcess::startup() { - if (checkpointRestored) + ThreadContext *tc = system->getThreadContext(contextIds[0]); + tc->setMiscRegNoEffect(IPR_DTB_ASN, M5_pid << 57); + + if (checkpointRestored) { return; + } Process::startup(); argsInit(MachineBytes, VMPageSize); - ThreadContext *tc = system->getThreadContext(contextIds[0]); tc->setIntReg(GlobalPointerReg, objFile->globalPointer()); //Operate in user mode tc->setMiscRegNoEffect(IPR_ICM, 0x18); //No super page mapping tc->setMiscRegNoEffect(IPR_MCSR, 0); - //Set this to 0 for now, but it should be unique for each process - tc->setMiscRegNoEffect(IPR_DTB_ASN, M5_pid << 57); } AlphaISA::IntReg diff --git a/src/arch/mips/isa.cc b/src/arch/mips/isa.cc index 3c8c9a986..1cad7e4be 100644 --- a/src/arch/mips/isa.cc +++ b/src/arch/mips/isa.cc @@ -91,12 +91,6 @@ ISA::ISA() init(); } -ISA::ISA(BaseCPU *_cpu) -{ - cpu = _cpu; - init(); -} - void ISA::init() { @@ -173,11 +167,10 @@ ISA::expandForMultithreading(ThreadID num_threads, unsigned num_vpes) //@TODO: Use MIPS STYLE CONSTANTS (e.g. TCHALT_H instead of TCH_H) void ISA::reset(std::string core_name, ThreadID num_threads, - unsigned num_vpes, BaseCPU *_cpu) + unsigned num_vpes, BaseCPU *cpu) { DPRINTF(MipsPRA, "Resetting CP0 State with %i TCs and %i VPEs\n", num_threads, num_vpes); - cpu = _cpu; MipsISA::CoreSpecific &cp = cpu->coreParams; @@ -499,7 +492,7 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, miscRegFile[misc_reg][reg_sel] = cp0_val; - scheduleCP0Update(1); + scheduleCP0Update(tc->getCpuPtr(), 1); } /** @@ -528,7 +521,7 @@ ISA::filterCP0Write(int misc_reg, int reg_sel, const MiscReg &val) } void -ISA::scheduleCP0Update(int delay) +ISA::scheduleCP0Update(BaseCPU *cpu, int delay) { if (!cp0Updated) { cp0Updated = true; @@ -540,7 +533,7 @@ ISA::scheduleCP0Update(int delay) } void -ISA::updateCPU() +ISA::updateCPU(BaseCPU *cpu) { /////////////////////////////////////////////////////////////////// // @@ -578,7 +571,7 @@ ISA::CP0Event::process() switch (cp0EventType) { case UpdateCP0: - cp0->updateCPU(); + cp0->updateCPU(cpu); break; } } diff --git a/src/arch/mips/isa.hh b/src/arch/mips/isa.hh index 165adff83..3f7afcdd0 100644 --- a/src/arch/mips/isa.hh +++ b/src/arch/mips/isa.hh @@ -64,18 +64,15 @@ namespace MipsISA std::vector<std::vector<MiscReg> > miscRegFile_WriteMask; std::vector<BankType> bankType; - BaseCPU *cpu; - public: ISA(); - ISA(BaseCPU *_cpu); void init(); void clear(unsigned tid_or_vpn = 0); void reset(std::string core_name, ThreadID num_threads, - unsigned num_vpes, BaseCPU *_cpu); + unsigned num_vpes, BaseCPU *cpu); void expandForMultithreading(ThreadID num_threads, unsigned num_vpes); @@ -147,11 +144,11 @@ namespace MipsISA }; // Schedule a CP0 Update Event - void scheduleCP0Update(int delay = 0); + void scheduleCP0Update(BaseCPU *cpu, int delay = 0); // If any changes have been made, then check the state for changes // and if necessary alert the CPU - void updateCPU(); + void updateCPU(BaseCPU *cpu); // Keep a List of CPU Events that need to be deallocated std::queue<CP0Event*> cp0EventRemoveList; diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa index c531347d2..36533e076 100644 --- a/src/arch/mips/isa/decoder.isa +++ b/src/arch/mips/isa/decoder.isa @@ -2476,10 +2476,14 @@ decode OPCODE_HI default Unknown::unknown() { } } } - 0x3: decode OP_HI { - 0x2: decode OP_LO { - 0x3: FailUnimpl::rdhwr(); + 0x3: decode OP { +#if FULL_SYSTEM + 0x0: FailUnimpl::rdhwr(); +#else + 0x0: decode RD { + 29: BasicOp::rdhwr({{ Rt = TpValue; }}); } +#endif } } } diff --git a/src/arch/mips/isa/operands.isa b/src/arch/mips/isa/operands.isa index 50726cd30..27cb4357a 100644 --- a/src/arch/mips/isa/operands.isa +++ b/src/arch/mips/isa/operands.isa @@ -109,8 +109,11 @@ def operands {{ #LL Flag 'LLFlag': ('ControlReg', 'uw', 'MISCREG_LLFLAG', None, 1), + #Thread pointer value for SE mode + 'TpValue': ('ControlReg', 'ud', 'MISCREG_TP_VALUE', None, 1), + # Index Register - 'Index':('ControlReg','uw','MISCREG_INDEX',None,1), + 'Index': ('ControlReg','uw','MISCREG_INDEX',None,1), 'CP0_RD_SEL': ('ControlReg', 'uw', '(RD << 3 | SEL)', None, 1), diff --git a/src/arch/mips/linux/process.cc b/src/arch/mips/linux/process.cc index c2a05b73b..4c3581ecb 100644 --- a/src/arch/mips/linux/process.cc +++ b/src/arch/mips/linux/process.cc @@ -126,6 +126,16 @@ sys_setsysinfoFunc(SyscallDesc *desc, int callnum, LiveProcess *process, return 1; } +static SyscallReturn +setThreadAreaFunc(SyscallDesc *desc, int callnum, LiveProcess *process, + ThreadContext *tc) +{ + int index = 0; + Addr addr = process->getSyscallArg(tc, index); + tc->setMiscRegNoEffect(MISCREG_TP_VALUE, addr); + return 0; +} + SyscallDesc MipsLinuxProcess::syscallDescs[] = { /* 0 */ SyscallDesc("syscall", unimplementedFunc), /* 1 */ SyscallDesc("exit", exitFunc), @@ -409,7 +419,44 @@ SyscallDesc MipsLinuxProcess::syscallDescs[] = { /* 279 */ SyscallDesc("unknown #279", unimplementedFunc), /* 280 */ SyscallDesc("add_key", unimplementedFunc), /* 281 */ SyscallDesc("request_key", unimplementedFunc), - /* 282 */ SyscallDesc("keyctl", unimplementedFunc) + /* 282 */ SyscallDesc("keyctl", unimplementedFunc), + /* 283 */ SyscallDesc("set_thread_area", setThreadAreaFunc), + /* 284 */ SyscallDesc("inotify_init", unimplementedFunc), + /* 285 */ SyscallDesc("inotify_add_watch", unimplementedFunc), + /* 286 */ SyscallDesc("inotify_rm_watch", unimplementedFunc), + /* 287 */ SyscallDesc("migrate_pages", unimplementedFunc), + /* 288 */ SyscallDesc("openat", unimplementedFunc), + /* 289 */ SyscallDesc("mkdirat", unimplementedFunc), + /* 290 */ SyscallDesc("mknodat", unimplementedFunc), + /* 291 */ SyscallDesc("fchownat", unimplementedFunc), + /* 292 */ SyscallDesc("futimesat", unimplementedFunc), + /* 293 */ SyscallDesc("fstatat64", unimplementedFunc), + /* 294 */ SyscallDesc("unlinkat", unimplementedFunc), + /* 295 */ SyscallDesc("renameat", unimplementedFunc), + /* 296 */ SyscallDesc("linkat", unimplementedFunc), + /* 297 */ SyscallDesc("symlinkat", unimplementedFunc), + /* 298 */ SyscallDesc("readlinkat", unimplementedFunc), + /* 299 */ SyscallDesc("fchmodat", unimplementedFunc), + /* 300 */ SyscallDesc("faccessat", unimplementedFunc), + /* 301 */ SyscallDesc("pselect6", unimplementedFunc), + /* 302 */ SyscallDesc("ppoll", unimplementedFunc), + /* 303 */ SyscallDesc("unshare", unimplementedFunc), + /* 304 */ SyscallDesc("splice", unimplementedFunc), + /* 305 */ SyscallDesc("sync_file_range", unimplementedFunc), + /* 306 */ SyscallDesc("tee", unimplementedFunc), + /* 307 */ SyscallDesc("vmsplice", unimplementedFunc), + /* 308 */ SyscallDesc("move_pages", unimplementedFunc), + /* 309 */ SyscallDesc("set_robust_list", unimplementedFunc), + /* 310 */ SyscallDesc("get_robust_list", unimplementedFunc), + /* 311 */ SyscallDesc("kexec_load", unimplementedFunc), + /* 312 */ SyscallDesc("getcpu", unimplementedFunc), + /* 313 */ SyscallDesc("epoll_pwait", unimplementedFunc), + /* 314 */ SyscallDesc("ioprio_set", unimplementedFunc), + /* 315 */ SyscallDesc("ioprio_get", unimplementedFunc), + /* 316 */ SyscallDesc("utimensat", unimplementedFunc), + /* 317 */ SyscallDesc("signalfd", unimplementedFunc), + /* 318 */ SyscallDesc("timerfd", unimplementedFunc), + /* 319 */ SyscallDesc("eventfd", unimplementedFunc) }; MipsLinuxProcess::MipsLinuxProcess(LiveProcessParams * params, diff --git a/src/arch/mips/process.cc b/src/arch/mips/process.cc index d96b0c81c..2fd9114e9 100644 --- a/src/arch/mips/process.cc +++ b/src/arch/mips/process.cc @@ -34,6 +34,7 @@ #include "arch/mips/process.hh" #include "base/loader/object_file.hh" +#include "base/loader/elf_object.hh" #include "base/misc.hh" #include "cpu/thread_context.hh" @@ -61,8 +62,8 @@ MipsLiveProcess::MipsLiveProcess(LiveProcessParams * params, brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize(); brk_point = roundUp(brk_point, VMPageSize); - // Set up region for mmaps. For now, start at bottom of kuseg space. - mmap_start = mmap_end = 0x10000; + // Set up region for mmaps. Start it 1GB above the top of the heap. + mmap_start = mmap_end = brk_point + 0x40000000L; } void @@ -70,18 +71,52 @@ MipsLiveProcess::startup() { Process::startup(); - argsInit(MachineBytes, VMPageSize); + argsInit<uint32_t>(VMPageSize); } +template<class IntType> void -MipsLiveProcess::argsInit(int intSize, int pageSize) +MipsLiveProcess::argsInit(int pageSize) { + int intSize = sizeof(IntType); + Process::startup(); + // load object file into target memory objFile->loadSections(initVirtMem); - // Calculate how much space we need for arg & env arrays. + typedef AuxVector<IntType> auxv_t; + std::vector<auxv_t> auxv; + + ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile); + if (elfObject) + { + // Set the system page size + auxv.push_back(auxv_t(M5_AT_PAGESZ, MipsISA::VMPageSize)); + // Set the frequency at which time() increments + auxv.push_back(auxv_t(M5_AT_CLKTCK, 100)); + // For statically linked executables, this is the virtual + // address of the program header tables if they appear in the + // executable image. + auxv.push_back(auxv_t(M5_AT_PHDR, elfObject->programHeaderTable())); + DPRINTF(Loader, "auxv at PHDR %08p\n", elfObject->programHeaderTable()); + // This is the size of a program header entry from the elf file. + auxv.push_back(auxv_t(M5_AT_PHENT, elfObject->programHeaderSize())); + // This is the number of program headers from the original elf file. + auxv.push_back(auxv_t(M5_AT_PHNUM, elfObject->programHeaderCount())); + //The entry point to the program + auxv.push_back(auxv_t(M5_AT_ENTRY, objFile->entryPoint())); + //Different user and group IDs + auxv.push_back(auxv_t(M5_AT_UID, uid())); + auxv.push_back(auxv_t(M5_AT_EUID, euid())); + auxv.push_back(auxv_t(M5_AT_GID, gid())); + auxv.push_back(auxv_t(M5_AT_EGID, egid())); + } + + // Calculate how much space we need for arg & env & auxv arrays. int argv_array_size = intSize * (argv.size() + 1); int envp_array_size = intSize * (envp.size() + 1); + int auxv_array_size = intSize * 2 * (auxv.size() + 1); + int arg_data_size = 0; for (vector<string>::size_type i = 0; i < argv.size(); ++i) { arg_data_size += argv[i].size() + 1; @@ -92,9 +127,11 @@ MipsLiveProcess::argsInit(int intSize, int pageSize) } int space_needed = - argv_array_size + envp_array_size + arg_data_size + env_data_size; - if (space_needed < 32*1024) - space_needed = 32*1024; + argv_array_size + + envp_array_size + + auxv_array_size + + arg_data_size + + env_data_size; // set bottom of stack stack_min = stack_base - space_needed; @@ -105,27 +142,16 @@ MipsLiveProcess::argsInit(int intSize, int pageSize) pTable->allocate(stack_min, roundUp(stack_size, pageSize)); // map out initial stack contents - // ======== - // NOTE: Using uint32_t hardcodes MIPS32 and not MIPS64 - // even if MIPS64 was intended. This is because the - // copyStringArray function templates on the parameters. - // Elegant way to check intSize and vary between 32/64? - // ======== - uint32_t argv_array_base = stack_min + intSize; // room for argc - uint32_t envp_array_base = argv_array_base + argv_array_size; - uint32_t arg_data_base = envp_array_base + envp_array_size; - uint32_t env_data_base = arg_data_base + arg_data_size; + IntType argv_array_base = stack_min + intSize; // room for argc + IntType envp_array_base = argv_array_base + argv_array_size; + IntType auxv_array_base = envp_array_base + envp_array_size; + IntType arg_data_base = auxv_array_base + auxv_array_size; + IntType env_data_base = arg_data_base + arg_data_size; // write contents to stack - uint32_t argc = argv.size(); - - if (intSize == 8) - argc = htog((uint64_t)argc); - else if (intSize == 4) - argc = htog((uint32_t)argc); - else - panic("Unknown int size"); + IntType argc = argv.size(); + argc = htog((IntType)argc); initVirtMem->writeBlob(stack_min, (uint8_t*)&argc, intSize); @@ -133,6 +159,21 @@ MipsLiveProcess::argsInit(int intSize, int pageSize) copyStringArray(envp, envp_array_base, env_data_base, initVirtMem); + // Copy the aux vector + for (typename vector<auxv_t>::size_type x = 0; x < auxv.size(); x++) { + initVirtMem->writeBlob(auxv_array_base + x * 2 * intSize, + (uint8_t*)&(auxv[x].a_type), intSize); + initVirtMem->writeBlob(auxv_array_base + (x * 2 + 1) * intSize, + (uint8_t*)&(auxv[x].a_val), intSize); + } + + // Write out the terminating zeroed auxilliary vector + for (unsigned i = 0; i < 2; i++) { + const IntType zero = 0; + const Addr addr = auxv_array_base + 2 * intSize * (auxv.size() + i); + initVirtMem->writeBlob(addr, (uint8_t*)&zero, intSize); + } + ThreadContext *tc = system->getThreadContext(contextIds[0]); setSyscallArg(tc, 0, argc); diff --git a/src/arch/mips/process.hh b/src/arch/mips/process.hh index f35ec8554..f1238b41f 100644 --- a/src/arch/mips/process.hh +++ b/src/arch/mips/process.hh @@ -47,7 +47,8 @@ class MipsLiveProcess : public LiveProcess void startup(); - void argsInit(int intSize, int pageSize); + template<class IntType> + void argsInit(int pageSize); public: MipsISA::IntReg getSyscallArg(ThreadContext *tc, int &i); diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh index fdb04b131..5cf76634d 100644 --- a/src/arch/mips/registers.hh +++ b/src/arch/mips/registers.hh @@ -275,6 +275,7 @@ enum MiscRegIndex{ MISCREG_DESAVE = 248, //Bank 31: 248-256 MISCREG_LLFLAG = 257, + MISCREG_TP_VALUE, MISCREG_NUMREGS }; diff --git a/src/arch/x86/insts/micromediaop.hh b/src/arch/x86/insts/micromediaop.hh index 508ef4e26..854d4de09 100644 --- a/src/arch/x86/insts/micromediaop.hh +++ b/src/arch/x86/insts/micromediaop.hh @@ -35,6 +35,12 @@ namespace X86ISA { + enum MediaFlag { + MediaMultHiOp = 1, + MediaSignedOp = 64, + MediaScalarOp = 128 + }; + class MediaOpBase : public X86MicroopBase { protected: @@ -59,6 +65,30 @@ namespace X86ISA src1(_src1.idx), dest(_dest.idx), srcSize(_srcSize), destSize(_destSize), ext(_ext) {} + + bool + scalarOp() const + { + return ext & MediaScalarOp; + } + + int + numItems(int size) const + { + return scalarOp() ? 1 : (sizeof(FloatRegBits) / size); + } + + bool + multHi() const + { + return ext & MediaMultHiOp; + } + + bool + signedOp() const + { + return ext & MediaSignedOp; + } }; class MediaOpReg : public MediaOpBase diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py index 083d8775d..e4c90b8d9 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py @@ -55,33 +55,33 @@ microcode = ''' def macroop ADDSS_XMM_XMM { - maddf xmml, xmml, xmmlm, size=4, ext=1 + maddf xmml, xmml, xmmlm, size=4, ext=Scalar }; def macroop ADDSS_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - maddf xmml, xmml, ufp1, size=4, ext=1 + maddf xmml, xmml, ufp1, size=4, ext=Scalar }; def macroop ADDSS_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - maddf xmml, xmml, ufp1, size=4, ext=1 + maddf xmml, xmml, ufp1, size=4, ext=Scalar }; def macroop ADDSD_XMM_XMM { - maddf xmml, xmml, xmmlm, size=8, ext=1 + maddf xmml, xmml, xmmlm, size=8, ext=Scalar }; def macroop ADDSD_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - maddf xmml, xmml, ufp1, size=8, ext=1 + maddf xmml, xmml, ufp1, size=8, ext=Scalar }; def macroop ADDSD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - maddf xmml, xmml, ufp1, size=8, ext=1 + maddf xmml, xmml, ufp1, size=8, ext=Scalar }; def macroop ADDPS_XMM_XMM { diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py index 3e565278c..e8f596463 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py @@ -55,33 +55,33 @@ microcode = ''' def macroop DIVSS_XMM_XMM { - mdivf xmml, xmml, xmmlm, size=4, ext=1 + mdivf xmml, xmml, xmmlm, size=4, ext=Scalar }; def macroop DIVSS_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mdivf xmml, xmml, ufp1, size=4, ext=1 + mdivf xmml, xmml, ufp1, size=4, ext=Scalar }; def macroop DIVSS_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mdivf xmml, xmml, ufp1, size=4, ext=1 + mdivf xmml, xmml, ufp1, size=4, ext=Scalar }; def macroop DIVSD_XMM_XMM { - mdivf xmml, xmml, xmmlm, size=8, ext=1 + mdivf xmml, xmml, xmmlm, size=8, ext=Scalar }; def macroop DIVSD_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mdivf xmml, xmml, ufp1, size=8, ext=1 + mdivf xmml, xmml, ufp1, size=8, ext=Scalar }; def macroop DIVSD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mdivf xmml, xmml, ufp1, size=8, ext=1 + mdivf xmml, xmml, ufp1, size=8, ext=Scalar }; def macroop DIVPS_XMM_XMM { diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py index adf7650b9..41c5f719c 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py @@ -57,23 +57,23 @@ microcode = ''' # HADDPS def macroop HADDPD_XMM_XMM { - maddf ufp1, xmmh , xmml, size=8, ext=1 - maddf xmmh, xmmlm, xmmhm, size=8, ext=1 + maddf ufp1, xmmh , xmml, size=8, ext=Scalar + maddf xmmh, xmmlm, xmmhm, size=8, ext=Scalar movfp xmml, ufp1 }; def macroop HADDPD_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT+8", dataSize=8 - maddf xmml, xmmh, xmml, size=8, ext=1 - maddf xmmh, ufp1, ufp2, size=8, ext=1 + maddf xmml, xmmh, xmml, size=8, ext=Scalar + maddf xmmh, ufp1, ufp2, size=8, ext=Scalar }; def macroop HADDPD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT+8", dataSize=8 - maddf xmml, xmmh, xmml, size=8, ext=1 - maddf xmmh, ufp1, ufp2, size=8, ext=1 + maddf xmml, xmmh, xmml, size=8, ext=Scalar + maddf xmmh, ufp1, ufp2, size=8, ext=Scalar }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py index fc28fbda4..c00aa6048 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py @@ -55,33 +55,33 @@ microcode = ''' def macroop MULSS_XMM_XMM { - mmulf xmml, xmml, xmmlm, size=4, ext=1 + mmulf xmml, xmml, xmmlm, size=4, ext=Scalar }; def macroop MULSS_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mmulf xmml, xmml, ufp1, size=4, ext=1 + mmulf xmml, xmml, ufp1, size=4, ext=Scalar }; def macroop MULSS_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mmulf xmml, xmml, ufp1, size=4, ext=1 + mmulf xmml, xmml, ufp1, size=4, ext=Scalar }; def macroop MULSD_XMM_XMM { - mmulf xmml, xmml, xmmlm, size=8, ext=1 + mmulf xmml, xmml, xmmlm, size=8, ext=Scalar }; def macroop MULSD_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mmulf xmml, xmml, ufp1, size=8, ext=1 + mmulf xmml, xmml, ufp1, size=8, ext=Scalar }; def macroop MULSD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mmulf xmml, xmml, ufp1, size=8, ext=1 + mmulf xmml, xmml, ufp1, size=8, ext=Scalar }; def macroop MULPS_XMM_XMM { diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py index fdeb30ddc..dc52a63c3 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py @@ -55,18 +55,18 @@ microcode = ''' def macroop SQRTSS_XMM_XMM { - msqrt xmml, xmmlm, size=4, ext=1 + msqrt xmml, xmmlm, size=4, ext=Scalar }; def macroop SQRTSS_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - msqrt xmml, ufp1, size=4, ext=1 + msqrt xmml, ufp1, size=4, ext=Scalar }; def macroop SQRTSS_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - msqrt xmml, ufp1, size=4, ext=1 + msqrt xmml, ufp1, size=4, ext=Scalar }; def macroop SQRTPS_XMM_XMM { @@ -90,18 +90,18 @@ def macroop SQRTPS_XMM_P { }; def macroop SQRTSD_XMM_XMM { - msqrt xmml, xmmlm, size=8, ext=1 + msqrt xmml, xmmlm, size=8, ext=Scalar }; def macroop SQRTSD_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - msqrt xmml, ufp1, size=8, ext=1 + msqrt xmml, ufp1, size=8, ext=Scalar }; def macroop SQRTSD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - msqrt xmml, ufp1, size=8, ext=1 + msqrt xmml, ufp1, size=8, ext=Scalar }; def macroop SQRTPD_XMM_XMM { diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py index 378abc070..d69ce3831 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py @@ -55,33 +55,33 @@ microcode = ''' def macroop SUBSS_XMM_XMM { - msubf xmml, xmml, xmmlm, size=4, ext=1 + msubf xmml, xmml, xmmlm, size=4, ext=Scalar }; def macroop SUBSS_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - msubf xmml, xmml, ufp1, size=4, ext=1 + msubf xmml, xmml, ufp1, size=4, ext=Scalar }; def macroop SUBSS_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - msubf xmml, xmml, ufp1, size=4, ext=1 + msubf xmml, xmml, ufp1, size=4, ext=Scalar }; def macroop SUBSD_XMM_XMM { - msubf xmml, xmml, xmmlm, size=8, ext=1 + msubf xmml, xmml, xmmlm, size=8, ext=Scalar }; def macroop SUBSD_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - msubf xmml, xmml, ufp1, size=8, ext=1 + msubf xmml, xmml, ufp1, size=8, ext=Scalar }; def macroop SUBSD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - msubf xmml, xmml, ufp1, size=8, ext=1 + msubf xmml, xmml, ufp1, size=8, ext=Scalar }; def macroop SUBPS_XMM_XMM { diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py index 09c34600b..e4449be10 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py @@ -95,32 +95,32 @@ def macroop CMPPD_XMM_P_I { }; def macroop CMPSS_XMM_XMM_I { - mcmpf2r xmml, xmml, xmmlm, size=4, ext="IMMEDIATE | 0x8" + mcmpf2r xmml, xmml, xmmlm, size=4, ext="IMMEDIATE |" + Scalar }; def macroop CMPSS_XMM_M_I { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 - mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE | 0x8" + mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE |" + Scalar }; def macroop CMPSS_XMM_P_I { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 - mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE | 0x8" + mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE |" + Scalar }; def macroop CMPSD_XMM_XMM_I { - mcmpf2r xmml, xmml, xmmlm, size=8, ext="IMMEDIATE | 0x8" + mcmpf2r xmml, xmml, xmmlm, size=8, ext="IMMEDIATE |" + Scalar }; def macroop CMPSD_XMM_M_I { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 - mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE | 0x8" + mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE |" + Scalar }; def macroop CMPSD_XMM_P_I { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 - mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE | 0x8" + mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE |" + Scalar }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py index 17c97662c..0a62ce343 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py @@ -95,33 +95,33 @@ def macroop MINPD_XMM_P { }; def macroop MINSS_XMM_XMM { - mminf xmml, xmml, xmmlm, ext=1, size=4 + mminf xmml, xmml, xmmlm, ext=Scalar, size=4 }; def macroop MINSS_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 - mminf xmml, xmml, ufp1, ext=1, size=4 + mminf xmml, xmml, ufp1, ext=Scalar, size=4 }; def macroop MINSS_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 - mminf xmml, xmml, ufp1, ext=1, size=4 + mminf xmml, xmml, ufp1, ext=Scalar, size=4 }; def macroop MINSD_XMM_XMM { - mminf xmml, xmml, xmmlm, ext=1, size=8 + mminf xmml, xmml, xmmlm, ext=Scalar, size=8 }; def macroop MINSD_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 - mminf xmml, xmml, ufp1, ext=1, size=8 + mminf xmml, xmml, ufp1, ext=Scalar, size=8 }; def macroop MINSD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 - mminf xmml, xmml, ufp1, ext=1, size=8 + mminf xmml, xmml, ufp1, ext=Scalar, size=8 }; def macroop MAXPS_XMM_XMM { @@ -165,32 +165,32 @@ def macroop MAXPD_XMM_P { }; def macroop MAXSS_XMM_XMM { - mmaxf xmml, xmml, xmmlm, ext=1, size=4 + mmaxf xmml, xmml, xmmlm, ext=Scalar, size=4 }; def macroop MAXSS_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 - mmaxf xmml, xmml, ufp1, ext=1, size=4 + mmaxf xmml, xmml, ufp1, ext=Scalar, size=4 }; def macroop MAXSS_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 - mmaxf xmml, xmml, ufp1, ext=1, size=4 + mmaxf xmml, xmml, ufp1, ext=Scalar, size=4 }; def macroop MAXSD_XMM_XMM { - mmaxf xmml, xmml, xmmlm, ext=1, size=8 + mmaxf xmml, xmml, xmmlm, ext=Scalar, size=8 }; def macroop MAXSD_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 - mmaxf xmml, xmml, ufp1, ext=1, size=8 + mmaxf xmml, xmml, ufp1, ext=Scalar, size=8 }; def macroop MAXSD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 - mmaxf xmml, xmml, ufp1, ext=1, size=8 + mmaxf xmml, xmml, ufp1, ext=Scalar, size=8 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py index 1c36f7e45..5988c77ba 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py @@ -55,33 +55,33 @@ microcode = ''' def macroop CVTSS2SD_XMM_XMM { - cvtf2f xmml, xmmlm, destSize=8, srcSize=4, ext=1 + cvtf2f xmml, xmmlm, destSize=8, srcSize=4, ext=Scalar }; def macroop CVTSS2SD_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=1 + cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=Scalar }; def macroop CVTSS2SD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=1 + cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=Scalar }; def macroop CVTSD2SS_XMM_XMM { - cvtf2f xmml, xmmlm, destSize=4, srcSize=8, ext=1 + cvtf2f xmml, xmmlm, destSize=4, srcSize=8, ext=Scalar }; def macroop CVTSD2SS_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=1 + cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=Scalar }; def macroop CVTSD2SS_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=1 + cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=Scalar }; def macroop CVTPS2PD_XMM_XMM { diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py index 16abd96f4..0b7ca5c5b 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py @@ -55,74 +55,74 @@ microcode = ''' def macroop CVTSS2SI_R_XMM { - cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext=(1 | 4) + cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext = Scalar + "| 4" mov2int reg, ufp1, size=dsz }; def macroop CVTSS2SI_R_M { ldfp ufp1, seg, sib, disp, dataSize=8 - cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=(1 | 4) + cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext = Scalar + "| 4" mov2int reg, ufp1, size=dsz }; def macroop CVTSS2SI_R_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=(1 | 4) + cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext = Scalar + "| 4" mov2int reg, ufp1, size=dsz }; def macroop CVTSD2SI_R_XMM { - cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext=(1 | 4) + cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext = Scalar + "| 4" mov2int reg, ufp1, size=dsz }; def macroop CVTSD2SI_R_M { ldfp ufp1, seg, sib, disp, dataSize=8 - cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=(1 | 4) + cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext = Scalar + "| 4" mov2int reg, ufp1, size=dsz }; def macroop CVTSD2SI_R_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=(1 | 4) + cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext = Scalar + "| 4" mov2int reg, ufp1, size=dsz }; def macroop CVTTSS2SI_R_XMM { - cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext=1 + cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext=Scalar mov2int reg, ufp1, size=dsz }; def macroop CVTTSS2SI_R_M { ldfp ufp1, seg, sib, disp, dataSize=8 - cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=1 + cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=Scalar mov2int reg, ufp1, size=dsz }; def macroop CVTTSS2SI_R_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=1 + cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=Scalar mov2int reg, ufp1, size=dsz }; def macroop CVTTSD2SI_R_XMM { - cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext=1 + cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext=Scalar mov2int reg, ufp1, size=dsz }; def macroop CVTTSD2SI_R_M { ldfp ufp1, seg, sib, disp, dataSize=8 - cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=1 + cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=Scalar mov2int reg, ufp1, size=dsz }; def macroop CVTTSD2SI_R_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=1 + cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=Scalar mov2int reg, ufp1, size=dsz }; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py index 05e2b80d5..1e9856562 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py @@ -135,43 +135,43 @@ def macroop PADDQ_XMM_P { }; def macroop PADDSB_XMM_XMM { - maddi xmml, xmml, xmmlm, size=1, ext=4 - maddi xmmh, xmmh, xmmhm, size=1, ext=4 + maddi xmml, xmml, xmmlm, size=1, ext = "2 |" + Signed + maddi xmmh, xmmh, xmmhm, size=1, ext = "2 |" + Signed }; def macroop PADDSB_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - maddi xmml, xmml, ufp1, size=1, ext=4 - maddi xmmh, xmmh, ufp2, size=1, ext=4 + maddi xmml, xmml, ufp1, size=1, ext = "2 |" + Signed + maddi xmmh, xmmh, ufp2, size=1, ext = "2 |" + Signed }; def macroop PADDSB_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - maddi xmml, xmml, ufp1, size=1, ext=4 - maddi xmmh, xmmh, ufp2, size=1, ext=4 + maddi xmml, xmml, ufp1, size=1, ext = "2 |" + Signed + maddi xmmh, xmmh, ufp2, size=1, ext = "2 |" + Signed }; def macroop PADDSW_XMM_XMM { - maddi xmml, xmml, xmmlm, size=2, ext=4 - maddi xmmh, xmmh, xmmhm, size=2, ext=4 + maddi xmml, xmml, xmmlm, size=2, ext = "2 |" + Signed + maddi xmmh, xmmh, xmmhm, size=2, ext = "2 |" + Signed }; def macroop PADDSW_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - maddi xmml, xmml, ufp1, size=2, ext=4 - maddi xmmh, xmmh, ufp2, size=2, ext=4 + maddi xmml, xmml, ufp1, size=2, ext = "2 |" + Signed + maddi xmmh, xmmh, ufp2, size=2, ext = "2 |" + Signed }; def macroop PADDSW_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - maddi xmml, xmml, ufp1, size=2, ext=4 - maddi xmmh, xmmh, ufp2, size=2, ext=4 + maddi xmml, xmml, ufp1, size=2, ext = "2 |" + Signed + maddi xmmh, xmmh, ufp2, size=2, ext = "2 |" + Signed }; def macroop PADDUSB_XMM_XMM { diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py index a5d90c6b2..904bf69f8 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py @@ -55,82 +55,82 @@ microcode = ''' def macroop PMULHW_XMM_XMM { - mmuli xmml, xmml, xmmlm, size=2, ext=(0x2 | 0x8) - mmuli xmmh, xmmh, xmmhm, size=2, ext=(0x2 | 0x8) + mmuli xmml, xmml, xmmlm, size=2, ext = Signed + "|" + MultHi + mmuli xmmh, xmmh, xmmhm, size=2, ext = Signed + "|" + MultHi }; def macroop PMULHW_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - mmuli xmml, xmml, ufp1, size=2, ext=(0x2 | 0x8) - mmuli xmmh, xmmh, ufp2, size=2, ext=(0x2 | 0x8) + mmuli xmml, xmml, ufp1, size=2, ext = Signed + "|" + MultHi + mmuli xmmh, xmmh, ufp2, size=2, ext = Signed + "|" + MultHi }; def macroop PMULHW_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - mmuli xmml, xmml, ufp1, size=2, ext=(0x2 | 0x8) - mmuli xmmh, xmmh, ufp2, size=2, ext=(0x2 | 0x8) + mmuli xmml, xmml, ufp1, size=2, ext = Signed + "|" + MultHi + mmuli xmmh, xmmh, ufp2, size=2, ext = Signed + "|" + MultHi }; def macroop PMULLW_XMM_XMM { - mmuli xmml, xmml, xmmlm, size=2, ext=2 - mmuli xmmh, xmmh, xmmhm, size=2, ext=2 + mmuli xmml, xmml, xmmlm, size=2, ext=Signed + mmuli xmmh, xmmh, xmmhm, size=2, ext=Signed }; def macroop PMULLW_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - mmuli xmml, xmml, ufp1, size=2, ext=2 - mmuli xmmh, xmmh, ufp2, size=2, ext=2 + mmuli xmml, xmml, ufp1, size=2, ext=Signed + mmuli xmmh, xmmh, ufp2, size=2, ext=Signed }; def macroop PMULLW_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - mmuli xmml, xmml, ufp1, size=2, ext=2 - mmuli xmmh, xmmh, ufp2, size=2, ext=2 + mmuli xmml, xmml, ufp1, size=2, ext=Signed + mmuli xmmh, xmmh, ufp2, size=2, ext=Signed }; def macroop PMULHUW_XMM_XMM { - mmuli xmml, xmml, xmmlm, size=2, ext=8 - mmuli xmmh, xmmh, xmmhm, size=2, ext=8 + mmuli xmml, xmml, xmmlm, size=2, ext = MultHi + mmuli xmmh, xmmh, xmmhm, size=2, ext = MultHi }; def macroop PMULHUW_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - mmuli xmml, xmml, ufp1, size=2, ext=8 - mmuli xmmh, xmmh, ufp2, size=2, ext=8 + mmuli xmml, xmml, ufp1, size=2, ext = MultHi + mmuli xmmh, xmmh, ufp2, size=2, ext = MultHi }; def macroop PMULHUW_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - mmuli xmml, xmml, ufp1, size=2, ext=8 - mmuli xmmh, xmmh, ufp2, size=2, ext=8 + mmuli xmml, xmml, ufp1, size=2, ext = MultHi + mmuli xmmh, xmmh, ufp2, size=2, ext = MultHi }; def macroop PMULUDQ_XMM_XMM { - mmuli xmml, xmml, xmmlm, srcSize=4, destSize=8, ext=1 - mmuli xmmh, xmmh, xmmhm, srcSize=4, destSize=8, ext=1 + mmuli xmml, xmml, xmmlm, srcSize=4, destSize=8, ext=Scalar + mmuli xmmh, xmmh, xmmhm, srcSize=4, destSize=8, ext=Scalar }; def macroop PMULUDQ_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=1 - mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=1 + mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar + mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar }; def macroop PMULUDQ_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=1 - mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=1 + mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar + mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar }; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py index f157d165f..64ae05190 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py @@ -55,22 +55,22 @@ microcode = ''' def macroop PMADDWD_XMM_XMM { - mmuli ufp3, xmml, xmmlm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) - mmuli ufp4, xmml, xmmlm, srcSize=2, destSize=4, ext=(0x2 | 0x10) + mmuli ufp3, xmml, xmmlm, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20" + mmuli ufp4, xmml, xmmlm, srcSize=2, destSize=4, ext = Signed + "| 0x10" maddi xmml, ufp3, ufp4, size=4, ext=0 - mmuli ufp3, xmmh, xmmhm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) - mmuli ufp4, xmmh, xmmhm, srcSize=2, destSize=4, ext=(0x2 | 0x10) + mmuli ufp3, xmmh, xmmhm, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20" + mmuli ufp4, xmmh, xmmhm, srcSize=2, destSize=4, ext = Signed + "| 0x10" maddi xmmh, ufp3, ufp4, size=4, ext=0 }; def macroop PMADDWD_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) - mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10) + mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20" + mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10" maddi xmml, ufp3, ufp4, size=4, ext=0 - mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) - mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10) + mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20" + mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext = Signed + "| 0x10" maddi xmmh, ufp3, ufp4, size=4, ext=0 }; @@ -78,11 +78,11 @@ def macroop PMADDWD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) - mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10) + mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20" + mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10" maddi xmml, ufp3, ufp4, size=4, ext=0 - mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) - mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10) + mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20" + mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext = Signed + "| 0x10" maddi xmmh, ufp3, ufp4, size=4, ext=0 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py index fdfb08667..d73434832 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py @@ -135,43 +135,43 @@ def macroop PSUBQ_XMM_P { }; def macroop PSUBSB_XMM_XMM { - msubi xmml, xmml, xmmlm, size=1, ext=4 - msubi xmmh, xmmh, xmmhm, size=1, ext=4 + msubi xmml, xmml, xmmlm, size=1, ext = "2 |" + Signed + msubi xmmh, xmmh, xmmhm, size=1, ext = "2 |" + Signed }; def macroop PSUBSB_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - msubi xmml, xmml, ufp1, size=1, ext=4 - msubi xmmh, xmmh, ufp2, size=1, ext=4 + msubi xmml, xmml, ufp1, size=1, ext = "2 |" + Signed + msubi xmmh, xmmh, ufp2, size=1, ext = "2 |" + Signed }; def macroop PSUBSB_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - msubi xmml, xmml, ufp1, size=1, ext=4 - msubi xmmh, xmmh, ufp2, size=1, ext=4 + msubi xmml, xmml, ufp1, size=1, ext = "2 |" + Signed + msubi xmmh, xmmh, ufp2, size=1, ext = "2 |" + Signed }; def macroop PSUBSW_XMM_XMM { - msubi xmml, xmml, xmmlm, size=2, ext=4 - msubi xmmh, xmmh, xmmhm, size=2, ext=4 + msubi xmml, xmml, xmmlm, size=2, ext = "2 |" + Signed + msubi xmmh, xmmh, xmmhm, size=2, ext = "2 |" + Signed }; def macroop PSUBSW_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - msubi xmml, xmml, ufp1, size=2, ext=4 - msubi xmmh, xmmh, ufp2, size=2, ext=4 + msubi xmml, xmml, ufp1, size=2, ext = "2 |" + Signed + msubi xmmh, xmmh, ufp2, size=2, ext = "2 |" + Signed }; def macroop PSUBSW_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - msubi xmml, xmml, ufp1, size=2, ext=4 - msubi xmmh, xmmh, ufp2, size=2, ext=4 + msubi xmml, xmml, ufp1, size=2, ext = "2 |" + Signed + msubi xmmh, xmmh, ufp2, size=2, ext = "2 |" + Signed }; def macroop PSUBUSB_XMM_XMM { diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py index d3bfbb529..6610e0690 100644 --- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py +++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py @@ -75,23 +75,23 @@ def macroop PMINUB_XMM_P { }; def macroop PMINSW_XMM_XMM { - mmini xmml, xmml, xmmlm, size=2, ext=2 - mmini xmmh, xmmh, xmmhm, size=2, ext=2 + mmini xmml, xmml, xmmlm, size=2, ext=Signed + mmini xmmh, xmmh, xmmhm, size=2, ext=Signed }; def macroop PMINSW_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - mmini xmml, xmml, ufp1, size=2, ext=2 - mmini xmmh, xmmh, ufp2, size=2, ext=2 + mmini xmml, xmml, ufp1, size=2, ext=Signed + mmini xmmh, xmmh, ufp2, size=2, ext=Signed }; def macroop PMINSW_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - mmini xmml, xmml, ufp1, size=2, ext=2 - mmini xmmh, xmmh, ufp2, size=2, ext=2 + mmini xmml, xmml, ufp1, size=2, ext=Signed + mmini xmmh, xmmh, ufp2, size=2, ext=Signed }; def macroop PMAXUB_XMM_XMM { @@ -115,22 +115,22 @@ def macroop PMAXUB_XMM_P { }; def macroop PMAXSW_XMM_XMM { - mmaxi xmml, xmml, xmmlm, size=2, ext=2 - mmaxi xmmh, xmmh, xmmhm, size=2, ext=2 + mmaxi xmml, xmml, xmmlm, size=2, ext=Signed + mmaxi xmmh, xmmh, xmmhm, size=2, ext=Signed }; def macroop PMAXSW_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - mmaxi xmml, xmml, ufp1, size=2, ext=2 - mmaxi xmmh, xmmh, ufp2, size=2, ext=2 + mmaxi xmml, xmml, ufp1, size=2, ext=Signed + mmaxi xmmh, xmmh, ufp2, size=2, ext=Signed }; def macroop PMAXSW_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - mmaxi xmml, xmml, ufp1, size=2, ext=2 - mmaxi xmmh, xmmh, ufp2, size=2, ext=2 + mmaxi xmml, xmml, ufp1, size=2, ext=Signed + mmaxi xmmh, xmmh, ufp2, size=2, ext=Signed }; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py index 8d632a0ac..080be66f6 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py @@ -56,33 +56,33 @@ microcode = ''' def macroop CVTSI2SS_XMM_R { mov2fp ufp1, regm, destSize=dsz, srcSize=dsz - cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1 + cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=Scalar }; def macroop CVTSI2SS_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1 + cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=Scalar }; def macroop CVTSI2SS_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1 + cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=Scalar }; def macroop CVTSI2SD_XMM_R { mov2fp ufp1, regm, destSize=dsz, srcSize=dsz - cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1 + cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=Scalar }; def macroop CVTSI2SD_XMM_M { ldfp ufp1, seg, sib, disp, dataSize=8 - cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1 + cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=Scalar }; def macroop CVTSI2SD_XMM_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1 + cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=Scalar }; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py index 9112a7382..7afee6cbf 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py @@ -55,45 +55,45 @@ microcode = ''' def macroop PACKSSDW_XMM_XMM { - pack ufp1, xmml, xmmh, ext=1, srcSize=4, destSize=2 - pack xmmh, xmmlm, xmmhm, ext=1, srcSize=4, destSize=2 + pack ufp1, xmml, xmmh, ext=Signed, srcSize=4, destSize=2 + pack xmmh, xmmlm, xmmhm, ext=Signed, srcSize=4, destSize=2 movfp xmml, ufp1, dataSize=8 }; def macroop PACKSSDW_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - pack xmml, xmml, xmmh, ext=1, srcSize=4, destSize=2 - pack xmmh, ufp1, ufp2, ext=1, srcSize=4, destSize=2 + pack xmml, xmml, xmmh, ext=Signed, srcSize=4, destSize=2 + pack xmmh, ufp1, ufp2, ext=Signed, srcSize=4, destSize=2 }; def macroop PACKSSDW_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - pack xmml, xmml, xmmh, ext=1, srcSize=4, destSize=2 - pack xmmh, ufp1, ufp2, ext=1, srcSize=4, destSize=2 + pack xmml, xmml, xmmh, ext=Signed, srcSize=4, destSize=2 + pack xmmh, ufp1, ufp2, ext=Signed, srcSize=4, destSize=2 }; def macroop PACKSSWB_XMM_XMM { - pack ufp1, xmml, xmmh, ext=1, srcSize=2, destSize=1 - pack xmmh, xmmlm, xmmhm, ext=1, srcSize=2, destSize=1 + pack ufp1, xmml, xmmh, ext=Signed, srcSize=2, destSize=1 + pack xmmh, xmmlm, xmmhm, ext=Signed, srcSize=2, destSize=1 movfp xmml, ufp1, dataSize=8 }; def macroop PACKSSWB_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - pack xmml, xmml, xmmh, ext=1, srcSize=2, destSize=1 - pack xmmh, ufp1, ufp2, ext=1, srcSize=2, destSize=1 + pack xmml, xmml, xmmh, ext=Signed, srcSize=2, destSize=1 + pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1 }; def macroop PACKSSWB_XMM_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 - pack xmml, xmml, xmmh, ext=1, srcSize=2, destSize=1 - pack xmmh, ufp1, ufp2, ext=1, srcSize=2, destSize=1 + pack xmml, xmml, xmmh, ext=Signed, srcSize=2, destSize=1 + pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1 }; def macroop PACKUSWB_XMM_XMM { @@ -105,8 +105,8 @@ def macroop PACKUSWB_XMM_XMM { def macroop PACKUSWB_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - pack xmml, xmml, xmmh, ext=0, srcSize=2, destSize=1 - pack xmmh, ufp1, ufp2, ext=0, srcSize=2, destSize=1 + pack xmml, xmml, xmmh, ext=Signed, srcSize=2, destSize=1 + pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1 }; def macroop PACKUSWB_XMM_P { diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py index b663d15b7..d376dccce 100644 --- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py +++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py @@ -115,33 +115,33 @@ def macroop PADDQ_MMX_P { }; def macroop PADDSB_MMX_MMX { - maddi mmx, mmx, mmxm, size=1, ext=4 + maddi mmx, mmx, mmxm, size=1, ext = "2 |" + Signed }; def macroop PADDSB_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - maddi mmx, mmx, ufp1, size=1, ext=4 + maddi mmx, mmx, ufp1, size=1, ext = "2 |" + Signed }; def macroop PADDSB_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - maddi mmx, mmx, ufp1, size=1, ext=4 + maddi mmx, mmx, ufp1, size=1, ext = "2 |" + Signed }; def macroop PADDSW_MMX_MMX { - maddi mmx, mmx, mmxm, size=2, ext=4 + maddi mmx, mmx, mmxm, size=2, ext = "2 |" + Signed }; def macroop PADDSW_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - maddi mmx, mmx, ufp1, size=2, ext=4 + maddi mmx, mmx, ufp1, size=2, ext = "2 |" + Signed }; def macroop PADDSW_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - maddi mmx, mmx, ufp1, size=2, ext=4 + maddi mmx, mmx, ufp1, size=2, ext = "2 |" + Signed }; def macroop PADDUSB_MMX_MMX { diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py index 7383a744f..526162e32 100644 --- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py +++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py @@ -55,77 +55,77 @@ microcode = ''' def macroop PMULHW_MMX_MMX { - mmuli mmx, mmx, mmxm, size=2, ext=(0x2 | 0x8) + mmuli mmx, mmx, mmxm, size=2, ext = Signed + "|" + MultHi }; def macroop PMULHW_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x8) + mmuli mmx, mmx, ufp1, size=2, ext = Signed + "|" + MultHi }; def macroop PMULHW_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x8) + mmuli mmx, mmx, ufp1, size=2, ext = Signed + "|" + MultHi }; def macroop PMULLW_MMX_MMX { - mmuli mmx, mmx, mmxm, size=2, ext=2 + mmuli mmx, mmx, mmxm, size=2, ext = Signed }; def macroop PMULLW_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mmuli mmx, mmx, ufp1, size=2, ext=2 + mmuli mmx, mmx, ufp1, size=2, ext = Signed }; def macroop PMULLW_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mmuli mmx, mmx, ufp1, size=2, ext=2 + mmuli mmx, mmx, ufp1, size=2, ext = Signed }; def macroop PMULHRW_MMX_MMX { - mmuli mmx, mmx, mmxm, size=2, ext=(0x2 | 0x4 | 0x8) + mmuli mmx, mmx, mmxm, size=2, ext = Signed + "| 0x4 |" + MultHi }; def macroop PMULHRW_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x4 | 0x8) + mmuli mmx, mmx, ufp1, size=2, ext = Signed + "| 0x4 |" + MultHi }; def macroop PMULHRW_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x4 | 0x8) + mmuli mmx, mmx, ufp1, size=2, ext = Signed + "| 0x4 |" + MultHi }; def macroop PMULHUW_MMX_MMX { - mmuli mmx, mmx, mmxm, size=2, ext=8 + mmuli mmx, mmx, mmxm, size=2, ext = MultHi }; def macroop PMULHUW_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mmuli mmx, mmx, ufp1, size=2, ext=8 + mmuli mmx, mmx, ufp1, size=2, ext = MultHi }; def macroop PMULHUW_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mmuli mmx, mmx, ufp1, size=2, ext=8 + mmuli mmx, mmx, ufp1, size=2, ext = MultHi }; def macroop PMULUDQ_MMX_MMX { - mmuli mmx, mmx, mmxm, srcSize=4, destSize=8, ext=1 + mmuli mmx, mmx, mmxm, srcSize=4, destSize=8, ext=Scalar }; def macroop PMULUDQ_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=1 + mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=Scalar }; def macroop PMULUDQ_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=1 + mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=Scalar }; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py index f6940d159..354cf8722 100644 --- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py +++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py @@ -55,23 +55,23 @@ microcode = ''' def macroop PMADDWD_MMX_MMX { - mmuli ufp3, mmx, mmxm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) - mmuli ufp4, mmx, mmxm, srcSize=2, destSize=4, ext=(0x2 | 0x10) + mmuli ufp3, mmx, mmxm, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20" + mmuli ufp4, mmx, mmxm, srcSize=2, destSize=4, ext = Signed + "| 0x10" maddi mmx, ufp3, ufp4, size=4, ext=0 }; def macroop PMADDWD_MMX_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 - mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) - mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10) + mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20" + mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10" maddi mmx, ufp3, ufp4, size=4, ext=0 }; def macroop PMADDWD_MMX_P { rdip t7 ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 - mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) - mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10) + mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10 | 0x20" + mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext = Signed + "| 0x10" maddi mmx, ufp3, ufp4, size=4, ext=0 }; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py index a60c0b1a8..4ee87e0f8 100644 --- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py +++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py @@ -115,33 +115,33 @@ def macroop PSUBQ_MMX_P { }; def macroop PSUBSB_MMX_MMX { - msubi mmx, mmx, mmxm, size=1, ext=4 + msubi mmx, mmx, mmxm, size=1, ext = "2 |" + Signed }; def macroop PSUBSB_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - msubi mmx, mmx, ufp1, size=1, ext=4 + msubi mmx, mmx, ufp1, size=1, ext = "2 |" + Signed }; def macroop PSUBSB_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - msubi mmx, mmx, ufp1, size=1, ext=4 + msubi mmx, mmx, ufp1, size=1, ext = "2 |" + Signed }; def macroop PSUBSW_MMX_MMX { - msubi mmx, mmx, mmxm, size=2, ext=4 + msubi mmx, mmx, mmxm, size=2, ext = "2 |" + Signed }; def macroop PSUBSW_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - msubi mmx, mmx, ufp1, size=2, ext=4 + msubi mmx, mmx, ufp1, size=2, ext = "2 |" + Signed }; def macroop PSUBSW_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - msubi mmx, mmx, ufp1, size=2, ext=4 + msubi mmx, mmx, ufp1, size=2, ext = "2 |" + Signed }; def macroop PSUBUSB_MMX_MMX { diff --git a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py index 8d8247300..c2eedbb0e 100644 --- a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py +++ b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py @@ -70,18 +70,18 @@ def macroop PMINUB_MMX_P { }; def macroop PMINSW_MMX_MMX { - mmini mmx, mmx, mmxm, size=2, ext=2 + mmini mmx, mmx, mmxm, size=2, ext=Signed }; def macroop PMINSW_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mmini mmx, mmx, ufp1, size=2, ext=2 + mmini mmx, mmx, ufp1, size=2, ext=Signed }; def macroop PMINSW_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mmini mmx, mmx, ufp1, size=2, ext=2 + mmini mmx, mmx, ufp1, size=2, ext=Signed }; def macroop PMAXUB_MMX_MMX { @@ -100,17 +100,17 @@ def macroop PMAXUB_MMX_P { }; def macroop PMAXSW_MMX_MMX { - mmaxi mmx, mmx, mmxm, size=2, ext=2 + mmaxi mmx, mmx, mmxm, size=2, ext=Signed }; def macroop PMAXSW_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - mmaxi mmx, mmx, ufp1, size=2, ext=2 + mmaxi mmx, mmx, ufp1, size=2, ext=Signed }; def macroop PMAXSW_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - mmaxi mmx, mmx, ufp1, size=2, ext=2 + mmaxi mmx, mmx, ufp1, size=2, ext=Signed }; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py index 4235d7f26..cb8b4eaa7 100644 --- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py +++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py @@ -55,33 +55,33 @@ microcode = ''' def macroop PACKSSDW_MMX_MMX { - pack mmx, mmx, mmxm, ext=1, srcSize=4, destSize=2 + pack mmx, mmx, mmxm, ext=Signed, srcSize=4, destSize=2 }; def macroop PACKSSDW_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - pack mmx, mmx, ufp1, ext=1, srcSize=4, destSize=2 + pack mmx, mmx, ufp1, ext=Signed, srcSize=4, destSize=2 }; def macroop PACKSSDW_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - pack mmx, mmx, ufp1, ext=1, srcSize=4, destSize=2 + pack mmx, mmx, ufp1, ext=Signed, srcSize=4, destSize=2 }; def macroop PACKSSWB_MMX_MMX { - pack mmx, mmx, mmxm, ext=1, srcSize=2, destSize=1 + pack mmx, mmx, mmxm, ext=Signed, srcSize=2, destSize=1 }; def macroop PACKSSWB_MMX_M { ldfp ufp1, seg, sib, disp, dataSize=8 - pack mmx, mmx, ufp1, ext=1, srcSize=2, destSize=1 + pack mmx, mmx, ufp1, ext=Signed, srcSize=2, destSize=1 }; def macroop PACKSSWB_MMX_P { rdip t7 ldfp ufp1, seg, riprel, disp, dataSize=8 - pack mmx, mmx, ufp1, ext=1, srcSize=2, destSize=1 + pack mmx, mmx, ufp1, ext=Signed, srcSize=2, destSize=1 }; def macroop PACKUSWB_MMX_MMX { diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index 25b58dfb7..b0b557521 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -181,6 +181,9 @@ let {{ 'kernel_gs_base'): assembler.symbols[reg] = regIdx("MISCREG_%s" % reg.upper()) + for flag in ('Scalar', 'MultHi', 'Signed'): + assembler.symbols[flag] = 'Media%sOp' % flag + # Code literal which forces a default 64 bit operand size in 64 bit mode. assembler.symbols["oszIn64Override"] = ''' if (machInst.mode.submode == SixtyFourBitMode && diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa index 4052f254d..900c166f8 100644 --- a/src/arch/x86/isa/microops/mediaop.isa +++ b/src/arch/x86/isa/microops/mediaop.isa @@ -352,7 +352,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -451,14 +451,14 @@ let {{ // Handle saturation. if (signBit) { if (overflow != mask(destBits - srcBits + 1)) { - if (ext & 0x1) + if (signedOp()) picked = (ULL(1) << (destBits - 1)); else picked = 0; } } else { if (overflow != 0) { - if (ext & 0x1) + if (signedOp()) picked = mask(destBits - 1); else picked = mask(destBits); @@ -479,14 +479,14 @@ let {{ // Handle saturation. if (signBit) { if (overflow != mask(destBits - srcBits + 1)) { - if (ext & 0x1) + if (signedOp()) picked = (ULL(1) << (destBits - 1)); else picked = 0; } } else { if (overflow != 0) { - if (ext & 0x1) + if (signedOp()) picked = mask(destBits - 1); else picked = mask(destBits); @@ -545,7 +545,7 @@ let {{ int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -595,7 +595,7 @@ let {{ int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -634,7 +634,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -648,7 +648,7 @@ let {{ (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); uint64_t resBits; - if (ext & 0x2) { + if (signedOp()) { if (arg1 < arg2) { resBits = arg1Bits; } else { @@ -672,7 +672,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -686,7 +686,7 @@ let {{ (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); uint64_t resBits; - if (ext & 0x2) { + if (signedOp()) { if (arg1 > arg2) { resBits = arg1Bits; } else { @@ -725,7 +725,7 @@ let {{ int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -766,7 +766,7 @@ let {{ int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -812,7 +812,7 @@ let {{ int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -858,7 +858,7 @@ let {{ int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -904,7 +904,7 @@ let {{ int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -938,7 +938,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -949,17 +949,19 @@ let {{ uint64_t resBits = arg1Bits + arg2Bits; if (ext & 0x2) { - if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) - resBits = mask(sizeBits); - } else if (ext & 0x4) { - int arg1Sign = bits(arg1Bits, sizeBits - 1); - int arg2Sign = bits(arg2Bits, sizeBits - 1); - int resSign = bits(resBits, sizeBits - 1); - if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { - if (resSign == 0) - resBits = (ULL(1) << (sizeBits - 1)); - else - resBits = mask(sizeBits - 1); + if (signedOp()) { + int arg1Sign = bits(arg1Bits, sizeBits - 1); + int arg2Sign = bits(arg2Bits, sizeBits - 1); + int resSign = bits(resBits, sizeBits - 1); + if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { + if (resSign == 0) + resBits = (ULL(1) << (sizeBits - 1)); + else + resBits = mask(sizeBits - 1); + } + } else { + if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) + resBits = mask(sizeBits); } } @@ -973,7 +975,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -984,21 +986,23 @@ let {{ uint64_t resBits = arg1Bits - arg2Bits; if (ext & 0x2) { - if (arg2Bits > arg1Bits) { - resBits = 0; - } else if (!findCarry(sizeBits, resBits, - arg1Bits, ~arg2Bits)) { - resBits = mask(sizeBits); - } - } else if (ext & 0x4) { - int arg1Sign = bits(arg1Bits, sizeBits - 1); - int arg2Sign = !bits(arg2Bits, sizeBits - 1); - int resSign = bits(resBits, sizeBits - 1); - if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { - if (resSign == 0) - resBits = (ULL(1) << (sizeBits - 1)); - else - resBits = mask(sizeBits - 1); + if (signedOp()) { + int arg1Sign = bits(arg1Bits, sizeBits - 1); + int arg2Sign = !bits(arg2Bits, sizeBits - 1); + int resSign = bits(resBits, sizeBits - 1); + if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { + if (resSign == 0) + resBits = (ULL(1) << (sizeBits - 1)); + else + resBits = mask(sizeBits - 1); + } + } else { + if (arg2Bits > arg1Bits) { + resBits = 0; + } else if (!findCarry(sizeBits, resBits, + arg1Bits, ~arg2Bits)) { + resBits = mask(sizeBits); + } } } @@ -1013,7 +1017,7 @@ let {{ int destBits = destSize * 8; assert(destBits <= 64); assert(destSize >= srcSize); - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize); + int items = numItems(destSize); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -1030,7 +1034,7 @@ let {{ uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); uint64_t resBits; - if (ext & 0x2) { + if (signedOp()) { int64_t arg1 = arg1Bits | (0 - (arg1Bits & (ULL(1) << (srcBits - 1)))); int64_t arg2 = arg2Bits | @@ -1043,7 +1047,7 @@ let {{ if (ext & 0x4) resBits += (ULL(1) << (destBits - 1)); - if (ext & 0x8) + if (multHi()) resBits >>= destBits; int destHiIndex = (i + 1) * destBits - 1; @@ -1058,7 +1062,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -1098,7 +1102,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t shiftAmt = op2.uqw; uint64_t result = FpDestReg.uqw; @@ -1125,7 +1129,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t shiftAmt = op2.uqw; uint64_t result = FpDestReg.uqw; @@ -1156,7 +1160,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t shiftAmt = op2.uqw; uint64_t result = FpDestReg.uqw; @@ -1201,15 +1205,15 @@ let {{ int srcStart = 0; int destStart = 0; if (srcSize == 2 * destSize) { - items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; + items = numItems(srcSize); if (ext & 0x2) destStart = destSizeBits * items; } else if (destSize == 2 * srcSize) { - items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + items = numItems(destSize); if (ext & 0x2) srcStart = srcSizeBits * items; } else { - items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + items = numItems(destSize); } uint64_t result = FpDestReg.uqw; @@ -1273,15 +1277,15 @@ let {{ int srcStart = 0; int destStart = 0; if (srcSize == 2 * destSize) { - items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; + items = numItems(srcSize); if (ext & 0x2) destStart = destSizeBits * items; } else if (destSize == 2 * srcSize) { - items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + items = numItems(destSize); if (ext & 0x2) srcStart = srcSizeBits * items; } else { - items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + items = numItems(destSize); } uint64_t result = FpDestReg.uqw; @@ -1334,15 +1338,15 @@ let {{ int srcStart = 0; int destStart = 0; if (srcSize == 2 * destSize) { - items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; + items = numItems(srcSize); if (ext & 0x2) destStart = destSizeBits * items; } else if (destSize == 2 * srcSize) { - items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + items = numItems(destSize); if (ext & 0x2) srcStart = srcSizeBits * items; } else { - items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + items = numItems(destSize); } uint64_t result = FpDestReg.uqw; @@ -1393,7 +1397,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { @@ -1432,7 +1436,7 @@ let {{ assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; - int items = (ext & 0x8) ? 1: (sizeof(FloatRegBits) / size); + int items = numItems(size); uint64_t result = FpDestReg.uqw; for (int i = 0; i < items; i++) { diff --git a/src/cpu/base.hh b/src/cpu/base.hh index bfeec0870..b229ddd38 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -274,7 +274,7 @@ class BaseCPU : public MemObject */ virtual BranchPred *getBranchPred() { return NULL; }; - virtual Counter totalInstructions() const { return 0; } + virtual Counter totalInstructions() const = 0; // Function tracing private: diff --git a/src/dev/x86/I82094AA.py b/src/dev/x86/I82094AA.py index 5476becc6..d4ab2cb17 100644 --- a/src/dev/x86/I82094AA.py +++ b/src/dev/x86/I82094AA.py @@ -38,6 +38,8 @@ class I82094AA(BasicPioDevice): pio_latency = Param.Latency('1ns', "Programmed IO latency in simticks") pio_addr = Param.Addr("Device address") int_port = Port("Port for sending and receiving interrupt messages") + int_latency = Param.Latency('1ns', \ + "Latency for an interrupt to propagate through this device.") external_int_pic = Param.I8259(NULL, "External PIC, if any") def pin(self, line): diff --git a/src/dev/x86/i82094aa.cc b/src/dev/x86/i82094aa.cc index 591fee6a4..65b3ee732 100644 --- a/src/dev/x86/i82094aa.cc +++ b/src/dev/x86/i82094aa.cc @@ -36,7 +36,8 @@ #include "mem/packet_access.hh" #include "sim/system.hh" -X86ISA::I82094AA::I82094AA(Params *p) : PioDevice(p), IntDev(this), +X86ISA::I82094AA::I82094AA(Params *p) : PioDevice(p), + IntDev(this, p->int_latency), latency(p->pio_latency), pioAddr(p->pio_addr), extIntPic(p->external_int_pic), lowestPriorityOffset(0) { diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 429928c79..2397a17c5 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -266,7 +266,8 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, return false; } - blk = tags->accessBlock(pkt->getAddr(), lat); + int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; + blk = tags->accessBlock(pkt->getAddr(), lat, id); DPRINTF(Cache, "%s%s %x %s\n", pkt->cmdString(), pkt->req->isInstFetch() ? " (ifetch)" : "", @@ -299,7 +300,8 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, incMissCount(pkt); return false; } - tags->insertBlock(pkt->getAddr(), blk); + int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; + tags->insertBlock(pkt->getAddr(), blk, id); blk->status = BlkValid | BlkReadable; } std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize); @@ -976,7 +978,8 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk, tempBlock->tag = tags->extractTag(addr); DPRINTF(Cache, "using temp block for %x\n", addr); } else { - tags->insertBlock(addr, blk); + int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; + tags->insertBlock(pkt->getAddr(), blk, id); } } else { // existing block... probably an upgrade diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index 122e6e14b..808f9e25a 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -154,7 +154,7 @@ FALRU::invalidateBlk(FALRU::BlkType *blk) } FALRUBlk* -FALRU::accessBlock(Addr addr, int &lat, int *inCache) +FALRU::accessBlock(Addr addr, int &lat, int context_src, int *inCache) { accesses++; int tmp_in_cache = 0; @@ -228,7 +228,7 @@ FALRU::findVictim(Addr addr, PacketList &writebacks) } void -FALRU::insertBlock(Addr addr, FALRU::BlkType *blk) +FALRU::insertBlock(Addr addr, FALRU::BlkType *blk, int context_src) { } diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh index 4e6bccc1d..b20d25d2b 100644 --- a/src/mem/cache/tags/fa_lru.hh +++ b/src/mem/cache/tags/fa_lru.hh @@ -182,7 +182,7 @@ public: * @param inCache The FALRUBlk::inCache flags. * @return Pointer to the cache block. */ - FALRUBlk* accessBlock(Addr addr, int &lat, int *inCache = 0); + FALRUBlk* accessBlock(Addr addr, int &lat, int context_src, int *inCache = 0); /** * Find the block in the cache, do not update the replacement data. @@ -200,7 +200,7 @@ public: */ FALRUBlk* findVictim(Addr addr, PacketList & writebacks); - void insertBlock(Addr addr, BlkType *blk); + void insertBlock(Addr addr, BlkType *blk, int context_src); /** * Return the hit latency of this cache. diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index b9ba5256b..a8ef4e6fb 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -219,7 +219,7 @@ IIC::regStats(const string &name) IICTag* -IIC::accessBlock(Addr addr, int &lat) +IIC::accessBlock(Addr addr, int &lat, int context_src) { Addr tag = extractTag(addr); unsigned set = hash(addr); @@ -338,7 +338,7 @@ IIC::findVictim(Addr addr, PacketList &writebacks) } void -IIC::insertBlock(Addr addr, BlkType* blk) +IIC::insertBlock(Addr addr, BlkType* blk, int context_src) { } diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh index 994f7b8f7..c96cdaf3e 100644 --- a/src/mem/cache/tags/iic.hh +++ b/src/mem/cache/tags/iic.hh @@ -422,7 +422,7 @@ class IIC : public BaseTags * @param lat The access latency. * @return A pointer to the block found, if any. */ - IICTag* accessBlock(Addr addr, int &lat); + IICTag* accessBlock(Addr addr, int &lat, int context_src); /** * Find the block, do not update the replacement data. @@ -440,7 +440,7 @@ class IIC : public BaseTags */ IICTag* findVictim(Addr addr, PacketList &writebacks); - void insertBlock(Addr addr, BlkType *blk); + void insertBlock(Addr addr, BlkType *blk, int context_src); /** * Called at end of simulation to complete average block reference stats. diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 9371f193a..81d82c231 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -150,7 +150,7 @@ LRU::~LRU() } LRUBlk* -LRU::accessBlock(Addr addr, int &lat) +LRU::accessBlock(Addr addr, int &lat, int context_src) { Addr tag = extractTag(addr); unsigned set = extractSet(addr); @@ -200,7 +200,7 @@ LRU::findVictim(Addr addr, PacketList &writebacks) } void -LRU::insertBlock(Addr addr, LRU::BlkType *blk) +LRU::insertBlock(Addr addr, LRU::BlkType *blk, int context_src) { if (!blk->isTouched) { tagsInUse++; diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh index 2874d8f1f..ecd6e861f 100644 --- a/src/mem/cache/tags/lru.hh +++ b/src/mem/cache/tags/lru.hh @@ -172,7 +172,7 @@ public: * @param lat The access latency. * @return Pointer to the cache block if found. */ - LRUBlk* accessBlock(Addr addr, int &lat); + LRUBlk* accessBlock(Addr addr, int &lat, int context_src); /** * Finds the given address in the cache, do not update replacement data. @@ -197,7 +197,7 @@ public: * @param addr The address to update. * @param blk The block to update. */ - void insertBlock(Addr addr, BlkType *blk); + void insertBlock(Addr addr, BlkType *blk, int context_src); /** * Generate the tag from the given address. diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc index 4bc3a4434..bcaf5582a 100644 --- a/src/mem/page_table.cc +++ b/src/mem/page_table.cc @@ -222,6 +222,6 @@ PageTable::unserialize(Checkpoint *cp, const std::string §ion) entry->unserialize(cp, csprintf("%s.Entry%d", process->name(), i)); pTable[vaddr] = *entry; ++i; - } + } } diff --git a/src/mem/physical.cc b/src/mem/physical.cc index 121a6e447..081fbb4cb 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -540,12 +540,8 @@ PhysicalMemory::unserialize(Checkpoint *cp, const string §ion) /* Only copy bytes that are non-zero, so we don't give the VM system hell */ while (curSize < params()->range.size()) { bytesRead = gzread(compressedMem, tempPage, chunkSize); - if (bytesRead != chunkSize && - bytesRead != params()->range.size() - curSize) - fatal("Read failed on physical memory checkpoint file '%s'" - " got %d bytes, expected %d or %d bytes\n", - filename, bytesRead, chunkSize, - params()->range.size() - curSize); + if (bytesRead == 0) + break; assert(bytesRead % sizeof(long) == 0); diff --git a/src/sim/faults.cc b/src/sim/faults.cc index 0fe853785..6149a8335 100644 --- a/src/sim/faults.cc +++ b/src/sim/faults.cc @@ -40,7 +40,7 @@ #if !FULL_SYSTEM void FaultBase::invoke(ThreadContext * tc) { - fatal("fault (%s) detected @ PC %p", name(), tc->readPC()); + panic("fault (%s) detected @ PC %p", name(), tc->readPC()); } #else void FaultBase::invoke(ThreadContext * tc) @@ -54,7 +54,7 @@ void FaultBase::invoke(ThreadContext * tc) void UnimpFault::invoke(ThreadContext * tc) { - fatal("Unimpfault: %s\n", panicStr.c_str()); + panic("Unimpfault: %s\n", panicStr.c_str()); } #if !FULL_SYSTEM diff --git a/src/sim/process.cc b/src/sim/process.cc index 343d2ad5a..957c3cc3e 100644 --- a/src/sim/process.cc +++ b/src/sim/process.cc @@ -507,6 +507,7 @@ Process::serialize(std::ostream &os) nameOut(os, csprintf("%s.FdMap%d", name(), x)); fd_map[x].serialize(os); } + SERIALIZE_SCALAR(M5_pid); } @@ -528,6 +529,11 @@ Process::unserialize(Checkpoint *cp, const std::string §ion) fd_map[x].unserialize(cp, csprintf("%s.FdMap%d", section, x)); } fix_file_offsets(); + UNSERIALIZE_OPT_SCALAR(M5_pid); + // The above returns a bool so that you could do something if you don't + // find the param in the checkpoint if you wanted to, like set a default + // but in this case we'll just stick with the instantianted value if not + // found. checkpointRestored = true; diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc index 5ae9128e5..0e6d9b254 100644 --- a/src/sim/serialize.cc +++ b/src/sim/serialize.cc @@ -204,6 +204,18 @@ paramIn(Checkpoint *cp, const string §ion, const string &name, T ¶m) } } +template <class T> +bool +optParamIn(Checkpoint *cp, const string §ion, const string &name, T ¶m) +{ + string str; + if (!cp->find(section, name, str) || !parseParam(str, param)) { + warn("optional parameter %s:%s not present\n", section, name); + return false; + } else { + return true; + } +} template <class T> void @@ -322,6 +334,9 @@ paramOut(ostream &os, const string &name, type const ¶m); \ template void \ paramIn(Checkpoint *cp, const string §ion, \ const string &name, type & param); \ +template bool \ +optParamIn(Checkpoint *cp, const string §ion, \ + const string &name, type & param); \ template void \ arrayParamOut(ostream &os, const string &name, \ type const *param, unsigned size); \ @@ -422,7 +437,7 @@ Serializable::serializeAll(const string &cpt_dir) time_t t = time(NULL); if (!outstream.is_open()) fatal("Unable to open file %s for writing\n", cpt_file.c_str()); - outstream << "// checkpoint generated: " << ctime(&t); + outstream << "## checkpoint generated: " << ctime(&t); globals.serialize(outstream); SimObject::serializeAll(outstream); diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index 08240c0c0..cf1a672be 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -58,6 +58,10 @@ void paramIn(Checkpoint *cp, const std::string §ion, const std::string &name, T ¶m); template <class T> +bool optParamIn(Checkpoint *cp, const std::string §ion, + const std::string &name, T ¶m); + +template <class T> void arrayParamOut(std::ostream &os, const std::string &name, const T *param, unsigned size); @@ -85,6 +89,7 @@ objParamIn(Checkpoint *cp, const std::string §ion, #define SERIALIZE_SCALAR(scalar) paramOut(os, #scalar, scalar) #define UNSERIALIZE_SCALAR(scalar) paramIn(cp, section, #scalar, scalar) +#define UNSERIALIZE_OPT_SCALAR(scalar) optParamIn(cp, section, #scalar, scalar) // ENUMs are like SCALARs, but we cast them to ints on the way out #define SERIALIZE_ENUM(scalar) paramOut(os, #scalar, (int)scalar) diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini index cf8b99da8..78a86bf82 100644 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini @@ -132,7 +132,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -167,7 +166,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -202,7 +200,6 @@ hash_delay=1 latency=10000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=100000 @@ -244,7 +241,7 @@ egid=100 env= errout=cerr euid=100 -executable=tests/test-progs/hello/bin/mips/linux/hello +executable=/dist/m5/regression/test-progs/hello/bin/mips/linux/hello gid=100 input=cin max_stack_size=67108864 diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout index f04692a1f..581c531f6 100755 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 24 2009 12:19:09 -M5 revision 9bc3e4611009+ 6661+ default tip -M5 started Sep 24 2009 12:19:46 -M5 executing on zooks -command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing +M5 compiled Jan 2 2010 07:01:31 +M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch +M5 started Jan 2 2010 07:03:09 +M5 executing on fajita +command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello World! -Exiting @ tick 29521500 because target called exit() +Exiting @ tick 29940500 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt index a47f185bc..d55c721ca 100644 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt @@ -1,99 +1,99 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 29581 # Simulator instruction rate (inst/s) -host_mem_usage 155804 # Number of bytes of host memory used -host_seconds 0.19 # Real time elapsed on the host -host_tick_rate 153369596 # Simulator tick rate (ticks/s) +host_inst_rate 10400 # Simulator instruction rate (inst/s) +host_mem_usage 205896 # Number of bytes of host memory used +host_seconds 0.56 # Real time elapsed on the host +host_tick_rate 53415864 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 5685 # Number of instructions simulated +sim_insts 5827 # Number of instructions simulated sim_seconds 0.000030 # Number of seconds simulated -sim_ticks 29521500 # Number of ticks simulated -system.cpu.AGEN-Unit.instReqsProcessed 2058 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.instReqsProcessed 5686 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.predictedNotTaken 789 # Number of Branches Predicted As Not Taken (False). -system.cpu.Branch-Predictor.predictedTaken 96 # Number of Branches Predicted As Taken (True). -system.cpu.Decode-Unit.instReqsProcessed 5686 # Number of Instructions Requests that completed in this resource. -system.cpu.Execution-Unit.instReqsProcessed 3624 # Number of Instructions Requests that completed in this resource. -system.cpu.Execution-Unit.predictedNotTakenIncorrect 516 # Number of Branches Incorrectly Predicted As Not Taken). -system.cpu.Execution-Unit.predictedTakenIncorrect 34 # Number of Branches Incorrectly Predicted As Taken. +sim_ticks 29940500 # Number of ticks simulated +system.cpu.AGEN-Unit.instReqsProcessed 2090 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.predictedNotTaken 826 # Number of Branches Predicted As Not Taken (False). +system.cpu.Branch-Predictor.predictedTaken 90 # Number of Branches Predicted As Taken (True). +system.cpu.Decode-Unit.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.instReqsProcessed 3734 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.predictedNotTakenIncorrect 541 # Number of Branches Incorrectly Predicted As Not Taken). +system.cpu.Execution-Unit.predictedTakenIncorrect 35 # Number of Branches Incorrectly Predicted As Taken. system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed. system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Seq-Unit.instReqsProcessed 11373 # Number of Instructions Requests that completed in this resource. -system.cpu.Graduation-Unit.instReqsProcessed 5685 # Number of Instructions Requests that completed in this resource. +system.cpu.Fetch-Seq-Unit.instReqsProcessed 11657 # Number of Instructions Requests that completed in this resource. +system.cpu.Graduation-Unit.instReqsProcessed 5827 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.divInstReqsProcessed 1 # Number of Divide Requests Processed. system.cpu.Mult-Div-Unit.instReqsProcessed 8 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.multInstReqsProcessed 3 # Number of Multiply Requests Processed. -system.cpu.RegFile-Manager.instReqsProcessed 10479 # Number of Instructions Requests that completed in this resource. -system.cpu.committedInsts 5685 # Number of Instructions Simulated (Per-Thread) -system.cpu.committedInsts_total 5685 # Number of Instructions Simulated (Total) -system.cpu.cpi 10.385928 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 10.385928 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 1134 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 56207.317073 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53207.317073 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 1052 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 4609000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.072310 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 82 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 4363000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.072310 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 82 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 924 # number of WriteReq accesses(hits+misses) +system.cpu.RegFile-Manager.instReqsProcessed 10713 # Number of Instructions Requests that completed in this resource. +system.cpu.committedInsts 5827 # Number of Instructions Simulated (Per-Thread) +system.cpu.committedInsts_total 5827 # Number of Instructions Simulated (Total) +system.cpu.cpi 10.276643 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 10.276643 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 1165 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 56201.149425 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53201.149425 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 1078 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 4889500 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.074678 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 87 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 4628500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.074678 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 87 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 925 # number of WriteReq accesses(hits+misses) system.cpu.dcache.WriteReq_avg_miss_latency 56554.687500 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53554.687500 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 860 # number of WriteReq hits +system.cpu.dcache.WriteReq_hits 861 # number of WriteReq hits system.cpu.dcache.WriteReq_miss_latency 3619500 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.069264 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_miss_rate 0.069189 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 64 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_miss_latency 3427500 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.069264 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_miss_rate 0.069189 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 64 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 14.590909 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 14.144928 # Average number of references to valid blocks. system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 2058 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 56359.589041 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 53359.589041 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1912 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 8228500 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.070943 # miss rate for demand accesses -system.cpu.dcache.demand_misses 146 # number of demand (read+write) misses +system.cpu.dcache.demand_accesses 2090 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 56350.993377 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 53350.993377 # average overall mshr miss latency +system.cpu.dcache.demand_hits 1939 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 8509000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.072249 # miss rate for demand accesses +system.cpu.dcache.demand_misses 151 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 7790500 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.070943 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 146 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 8056000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.072249 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 151 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 2058 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 56359.589041 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 53359.589041 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 2090 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 56350.993377 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 53350.993377 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1912 # number of overall hits -system.cpu.dcache.overall_miss_latency 8228500 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.070943 # miss rate for overall accesses -system.cpu.dcache.overall_misses 146 # number of overall misses +system.cpu.dcache.overall_hits 1939 # number of overall hits +system.cpu.dcache.overall_miss_latency 8509000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.072249 # miss rate for overall accesses +system.cpu.dcache.overall_misses 151 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 7790500 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.070943 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 146 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_miss_latency 8056000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.072249 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 151 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.replacements 0 # number of replacements -system.cpu.dcache.sampled_refs 132 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 138 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 84.209307 # Cycle average of tags in use -system.cpu.dcache.total_refs 1926 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 88.212490 # Cycle average of tags in use +system.cpu.dcache.total_refs 1952 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks -system.cpu.dcache_port.instReqsProcessed 2057 # Number of Instructions Requests that completed in this resource. +system.cpu.dcache_port.instReqsProcessed 2089 # Number of Instructions Requests that completed in this resource. system.cpu.dtb.accesses 0 # DTB accesses system.cpu.dtb.hits 0 # DTB hits system.cpu.dtb.misses 0 # DTB misses @@ -103,62 +103,62 @@ system.cpu.dtb.read_misses 0 # DT system.cpu.dtb.write_accesses 0 # DTB write accesses system.cpu.dtb.write_hits 0 # DTB write hits system.cpu.dtb.write_misses 0 # DTB write misses -system.cpu.icache.ReadReq_accesses 5687 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 55773.026316 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 52773.026316 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 5383 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 16955000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.053455 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 304 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_miss_latency 16043000 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.053455 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 304 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_accesses 5829 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 55765.676568 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 52765.676568 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 5526 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 16897000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.051981 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 303 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_miss_latency 15988000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.051981 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 303 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 17.707237 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 18.237624 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 5687 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 55773.026316 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 52773.026316 # average overall mshr miss latency -system.cpu.icache.demand_hits 5383 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 16955000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.053455 # miss rate for demand accesses -system.cpu.icache.demand_misses 304 # number of demand (read+write) misses +system.cpu.icache.demand_accesses 5829 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 55765.676568 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 52765.676568 # average overall mshr miss latency +system.cpu.icache.demand_hits 5526 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 16897000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.051981 # miss rate for demand accesses +system.cpu.icache.demand_misses 303 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 16043000 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.053455 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 304 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_miss_latency 15988000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.051981 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 303 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 5687 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 55773.026316 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 52773.026316 # average overall mshr miss latency +system.cpu.icache.overall_accesses 5829 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 55765.676568 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 52765.676568 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 5383 # number of overall hits -system.cpu.icache.overall_miss_latency 16955000 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.053455 # miss rate for overall accesses -system.cpu.icache.overall_misses 304 # number of overall misses +system.cpu.icache.overall_hits 5526 # number of overall hits +system.cpu.icache.overall_miss_latency 16897000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.051981 # miss rate for overall accesses +system.cpu.icache.overall_misses 303 # number of overall misses system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 16043000 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.053455 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 304 # number of overall MSHR misses +system.cpu.icache.overall_mshr_miss_latency 15988000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.051981 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 303 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.replacements 13 # number of replacements -system.cpu.icache.sampled_refs 304 # Sample count of references to valid blocks. +system.cpu.icache.sampled_refs 303 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 136.385131 # Cycle average of tags in use -system.cpu.icache.total_refs 5383 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 134.267603 # Cycle average of tags in use +system.cpu.icache.total_refs 5526 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.icache_port.instReqsProcessed 5686 # Number of Instructions Requests that completed in this resource. -system.cpu.ipc 0.096284 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.096284 # IPC: Total IPC of All Threads +system.cpu.icache_port.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource. +system.cpu.ipc 0.097308 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.097308 # IPC: Total IPC of All Threads system.cpu.itb.accesses 0 # DTB accesses system.cpu.itb.hits 0 # DTB hits system.cpu.itb.misses 0 # DTB misses @@ -168,83 +168,83 @@ system.cpu.itb.read_misses 0 # DT system.cpu.itb.write_accesses 0 # DTB write accesses system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses -system.cpu.l2cache.ReadExReq_accesses 50 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_accesses 51 # number of ReadExReq accesses(hits+misses) system.cpu.l2cache.ReadExReq_avg_miss_latency 52500 # average ReadExReq miss latency -system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40080 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 2625000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40098.039216 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 2677500 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses -system.cpu.l2cache.ReadExReq_misses 50 # number of ReadExReq misses -system.cpu.l2cache.ReadExReq_mshr_miss_latency 2004000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_misses 51 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_mshr_miss_latency 2045000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses -system.cpu.l2cache.ReadExReq_mshr_misses 50 # number of ReadExReq MSHR misses -system.cpu.l2cache.ReadReq_accesses 386 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52052.083333 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40026.041667 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadExReq_mshr_misses 51 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadReq_accesses 390 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 52052.835052 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40023.195876 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 2 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 19988000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 0.994819 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 384 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 15370000 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.994819 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 384 # number of ReadReq MSHR misses -system.cpu.l2cache.UpgradeReq_accesses 14 # number of UpgradeReq accesses(hits+misses) -system.cpu.l2cache.UpgradeReq_avg_miss_latency 52535.714286 # average UpgradeReq miss latency -system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40071.428571 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 735500 # number of UpgradeReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 20196500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.994872 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 388 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 15529000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.994872 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 388 # number of ReadReq MSHR misses +system.cpu.l2cache.UpgradeReq_accesses 13 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.UpgradeReq_avg_miss_latency 52538.461538 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40076.923077 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 683000 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses -system.cpu.l2cache.UpgradeReq_misses 14 # number of UpgradeReq misses -system.cpu.l2cache.UpgradeReq_mshr_miss_latency 561000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_misses 13 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 521000 # number of UpgradeReq MSHR miss cycles system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses -system.cpu.l2cache.UpgradeReq_mshr_misses 14 # number of UpgradeReq MSHR misses +system.cpu.l2cache.UpgradeReq_mshr_misses 13 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0.005405 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.005333 # Average number of references to valid blocks. system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 436 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52103.686636 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 40032.258065 # average overall mshr miss latency +system.cpu.l2cache.demand_accesses 441 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 52104.783599 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 40031.890661 # average overall mshr miss latency system.cpu.l2cache.demand_hits 2 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 22613000 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 0.995413 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 434 # number of demand (read+write) misses +system.cpu.l2cache.demand_miss_latency 22874000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.995465 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 439 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 17374000 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 0.995413 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 434 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_miss_latency 17574000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.995465 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 439 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 436 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52103.686636 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 40032.258065 # average overall mshr miss latency +system.cpu.l2cache.overall_accesses 441 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 52104.783599 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 40031.890661 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 2 # number of overall hits -system.cpu.l2cache.overall_miss_latency 22613000 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 0.995413 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 434 # number of overall misses +system.cpu.l2cache.overall_miss_latency 22874000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.995465 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 439 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 17374000 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 0.995413 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 434 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_miss_latency 17574000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.995465 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 439 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 370 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 375 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 183.672228 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 185.807591 # Cycle average of tags in use system.cpu.l2cache.total_refs 2 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 59044 # number of cpu cycles simulated +system.cpu.numCycles 59882 # number of cpu cycles simulated system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT) system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.threadCycles 59044 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) -system.cpu.workload.PROG:num_syscalls 13 # Number of system calls +system.cpu.threadCycles 59882 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.workload.PROG:num_syscalls 8 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/mips/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/o3-timing/config.ini index b3bdddcfe..962f6ed05 100644 --- a/tests/quick/00.hello/ref/mips/linux/o3-timing/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/o3-timing/config.ini @@ -163,7 +163,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -335,7 +334,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -370,7 +368,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -412,7 +409,7 @@ egid=100 env= errout=cerr euid=100 -executable=tests/test-progs/hello/bin/mips/linux/hello +executable=/dist/m5/regression/test-progs/hello/bin/mips/linux/hello gid=100 input=cin max_stack_size=67108864 diff --git a/tests/quick/00.hello/ref/mips/linux/o3-timing/simout b/tests/quick/00.hello/ref/mips/linux/o3-timing/simout index 9562c954f..74dedc1d0 100755 --- a/tests/quick/00.hello/ref/mips/linux/o3-timing/simout +++ b/tests/quick/00.hello/ref/mips/linux/o3-timing/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 24 2009 12:19:09 -M5 revision 9bc3e4611009+ 6661+ default tip -M5 started Sep 24 2009 12:19:46 -M5 executing on zooks -command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/o3-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/o3-timing +M5 compiled Jan 2 2010 07:01:31 +M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch +M5 started Jan 2 2010 07:03:10 +M5 executing on fajita +command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/o3-timing -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/o3-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello World! -Exiting @ tick 13914500 because target called exit() +Exiting @ tick 14060500 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/o3-timing/stats.txt b/tests/quick/00.hello/ref/mips/linux/o3-timing/stats.txt index bdce7b5d3..85a5a75dd 100644 --- a/tests/quick/00.hello/ref/mips/linux/o3-timing/stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/o3-timing/stats.txt @@ -1,127 +1,127 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 59567 # Simulator instruction rate (inst/s) -host_mem_usage 155776 # Number of bytes of host memory used -host_seconds 0.09 # Real time elapsed on the host -host_tick_rate 163592222 # Simulator tick rate (ticks/s) +host_inst_rate 48407 # Simulator instruction rate (inst/s) +host_mem_usage 206048 # Number of bytes of host memory used +host_seconds 0.11 # Real time elapsed on the host +host_tick_rate 131379529 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 5049 # Number of instructions simulated +sim_insts 5169 # Number of instructions simulated sim_seconds 0.000014 # Number of seconds simulated -sim_ticks 13914500 # Number of ticks simulated +sim_ticks 14060500 # Number of ticks simulated system.cpu.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. -system.cpu.BPredUnit.BTBHits 552 # Number of BTB hits -system.cpu.BPredUnit.BTBLookups 1939 # Number of BTB lookups -system.cpu.BPredUnit.RASInCorrect 53 # Number of incorrect RAS predictions. -system.cpu.BPredUnit.condIncorrect 722 # Number of conditional branches incorrect -system.cpu.BPredUnit.condPredicted 1555 # Number of conditional branches predicted -system.cpu.BPredUnit.lookups 2357 # Number of BP lookups -system.cpu.BPredUnit.usedRAS 387 # Number of times the RAS was used to get a target. -system.cpu.commit.COM:branches 885 # Number of branches committed -system.cpu.commit.COM:bw_lim_events 63 # number cycles where commit BW limit reached +system.cpu.BPredUnit.BTBHits 572 # Number of BTB hits +system.cpu.BPredUnit.BTBLookups 1960 # Number of BTB lookups +system.cpu.BPredUnit.RASInCorrect 66 # Number of incorrect RAS predictions. +system.cpu.BPredUnit.condIncorrect 751 # Number of conditional branches incorrect +system.cpu.BPredUnit.condPredicted 1593 # Number of conditional branches predicted +system.cpu.BPredUnit.lookups 2416 # Number of BP lookups +system.cpu.BPredUnit.usedRAS 404 # Number of times the RAS was used to get a target. +system.cpu.commit.COM:branches 916 # Number of branches committed +system.cpu.commit.COM:bw_lim_events 65 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits -system.cpu.commit.COM:committed_per_cycle::samples 14230 # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::mean 0.399438 # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::stdev 1.125719 # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::samples 14561 # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::mean 0.400110 # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::stdev 1.121131 # Number of insts commited each cycle system.cpu.commit.COM:committed_per_cycle::underflows 0 0.00% 0.00% # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::0-1 11753 82.59% 82.59% # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::1-2 1168 8.21% 90.80% # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::2-3 499 3.51% 94.31% # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::3-4 284 2.00% 96.30% # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::4-5 291 2.04% 98.35% # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::5-6 72 0.51% 98.85% # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::6-7 62 0.44% 99.29% # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::7-8 38 0.27% 99.56% # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::8 63 0.44% 100.00% # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::0-1 11999 82.41% 82.41% # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::1-2 1213 8.33% 90.74% # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::2-3 529 3.63% 94.37% # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::3-4 291 2.00% 96.37% # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::4-5 294 2.02% 98.39% # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::5-6 71 0.49% 98.87% # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::6-7 62 0.43% 99.30% # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::7-8 37 0.25% 99.55% # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle::8 65 0.45% 100.00% # Number of insts commited each cycle system.cpu.commit.COM:committed_per_cycle::overflows 0 0.00% 100.00% # Number of insts commited each cycle system.cpu.commit.COM:committed_per_cycle::min_value 0 # Number of insts commited each cycle system.cpu.commit.COM:committed_per_cycle::max_value 8 # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle::total 14230 # Number of insts commited each cycle -system.cpu.commit.COM:count 5684 # Number of instructions committed -system.cpu.commit.COM:loads 1133 # Number of loads committed +system.cpu.commit.COM:committed_per_cycle::total 14561 # Number of insts commited each cycle +system.cpu.commit.COM:count 5826 # Number of instructions committed +system.cpu.commit.COM:loads 1164 # Number of loads committed system.cpu.commit.COM:membars 0 # Number of memory barriers committed -system.cpu.commit.COM:refs 2057 # Number of memory references committed +system.cpu.commit.COM:refs 2089 # Number of memory references committed system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed -system.cpu.commit.branchMispredicts 605 # The number of times a branch was mispredicted -system.cpu.commit.commitCommittedInsts 5684 # The number of committed instructions -system.cpu.commit.commitNonSpecStalls 15 # The number of times commit has been forced to stall to communicate backwards -system.cpu.commit.commitSquashedInsts 5973 # The number of squashed insts skipped by commit -system.cpu.committedInsts 5049 # Number of Instructions Simulated -system.cpu.committedInsts_total 5049 # Number of Instructions Simulated -system.cpu.cpi 5.511983 # CPI: Cycles Per Instruction -system.cpu.cpi_total 5.511983 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 2297 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 34007.812500 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 36022.988506 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 2169 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 4353000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.055725 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 128 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 41 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 3134000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.037875 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 87 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 924 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 27701.724138 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 36093.750000 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 634 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 8033500 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.313853 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 290 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_hits 226 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_miss_latency 2310000 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.069264 # mshr miss rate for WriteReq accesses +system.cpu.commit.branchMispredicts 620 # The number of times a branch was mispredicted +system.cpu.commit.commitCommittedInsts 5826 # The number of committed instructions +system.cpu.commit.commitNonSpecStalls 10 # The number of times commit has been forced to stall to communicate backwards +system.cpu.commit.commitSquashedInsts 6017 # The number of squashed insts skipped by commit +system.cpu.committedInsts 5169 # Number of Instructions Simulated +system.cpu.committedInsts_total 5169 # Number of Instructions Simulated +system.cpu.cpi 5.440511 # CPI: Cycles Per Instruction +system.cpu.cpi_total 5.440511 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 2321 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 34074.626866 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 36043.956044 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 2187 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 4566000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.057734 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 134 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 43 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 3280000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.039207 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 91 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 925 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 27570.707071 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 36046.875000 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 628 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 8188500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.321081 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 297 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_hits 233 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_miss_latency 2307000 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.069189 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 64 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 20.889706 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 20.226950 # Average number of references to valid blocks. system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 3221 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 29632.775120 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 36052.980132 # average overall mshr miss latency -system.cpu.dcache.demand_hits 2803 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 12386500 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.129773 # miss rate for demand accesses -system.cpu.dcache.demand_misses 418 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 267 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 5444000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.046880 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 151 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_accesses 3246 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 29592.807425 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 36045.161290 # average overall mshr miss latency +system.cpu.dcache.demand_hits 2815 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 12754500 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.132779 # miss rate for demand accesses +system.cpu.dcache.demand_misses 431 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 276 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 5587000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.047751 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 155 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 3221 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 29632.775120 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 36052.980132 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 3246 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 29592.807425 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 36045.161290 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 2803 # number of overall hits -system.cpu.dcache.overall_miss_latency 12386500 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.129773 # miss rate for overall accesses -system.cpu.dcache.overall_misses 418 # number of overall misses -system.cpu.dcache.overall_mshr_hits 267 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 5444000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.046880 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 151 # number of overall MSHR misses +system.cpu.dcache.overall_hits 2815 # number of overall hits +system.cpu.dcache.overall_miss_latency 12754500 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.132779 # miss rate for overall accesses +system.cpu.dcache.overall_misses 431 # number of overall misses +system.cpu.dcache.overall_mshr_hits 276 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 5587000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.047751 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 155 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.replacements 0 # number of replacements -system.cpu.dcache.sampled_refs 136 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 141 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 87.690614 # Cycle average of tags in use -system.cpu.dcache.total_refs 2841 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 91.308954 # Cycle average of tags in use +system.cpu.dcache.total_refs 2852 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks -system.cpu.decode.DECODE:BlockedCycles 479 # Number of cycles decode is blocked -system.cpu.decode.DECODE:BranchMispred 128 # Number of times decode detected a branch misprediction -system.cpu.decode.DECODE:BranchResolved 128 # Number of times decode resolved a branch -system.cpu.decode.DECODE:DecodedInsts 14211 # Number of instructions handled by decode -system.cpu.decode.DECODE:IdleCycles 9912 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 3839 # Number of cycles decode is running -system.cpu.decode.DECODE:SquashCycles 1056 # Number of cycles decode is squashing -system.cpu.decode.DECODE:SquashedInsts 251 # Number of squashed instructions handled by decode +system.cpu.decode.DECODE:BlockedCycles 519 # Number of cycles decode is blocked +system.cpu.decode.DECODE:BranchMispred 139 # Number of times decode detected a branch misprediction +system.cpu.decode.DECODE:BranchResolved 139 # Number of times decode resolved a branch +system.cpu.decode.DECODE:DecodedInsts 14436 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 10077 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 3965 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 1080 # Number of cycles decode is squashing +system.cpu.decode.DECODE:SquashedInsts 267 # Number of squashed instructions handled by decode system.cpu.dtb.accesses 0 # DTB accesses system.cpu.dtb.hits 0 # DTB hits system.cpu.dtb.misses 0 # DTB misses @@ -131,116 +131,116 @@ system.cpu.dtb.read_misses 0 # DT system.cpu.dtb.write_accesses 0 # DTB write accesses system.cpu.dtb.write_hits 0 # DTB write hits system.cpu.dtb.write_misses 0 # DTB write misses -system.cpu.fetch.Branches 2357 # Number of branches that fetch encountered -system.cpu.fetch.CacheLines 2171 # Number of cache lines fetched -system.cpu.fetch.Cycles 6187 # Number of cycles fetch has run and was not squashing or blocked -system.cpu.fetch.IcacheSquashes 360 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 15337 # Number of instructions fetch has processed -system.cpu.fetch.SquashCycles 738 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.084693 # Number of branch fetches per cycle -system.cpu.fetch.icacheStallCycles 2171 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.predictedBranches 939 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 0.551096 # Number of inst fetches per cycle -system.cpu.fetch.rateDist::samples 15286 # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::mean 1.003336 # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::stdev 2.263199 # Number of instructions fetched each cycle (Total) +system.cpu.fetch.Branches 2416 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 2220 # Number of cache lines fetched +system.cpu.fetch.Cycles 6371 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 355 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 15622 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 767 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.085911 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 2220 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 976 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 0.555508 # Number of inst fetches per cycle +system.cpu.fetch.rateDist::samples 15641 # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::mean 0.998785 # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::stdev 2.252974 # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::underflows 0 0.00% 0.00% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::0-1 11277 73.77% 73.77% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::1-2 1770 11.58% 85.35% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::2-3 198 1.30% 86.65% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::3-4 138 0.90% 87.55% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::4-5 316 2.07% 89.62% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::5-6 114 0.75% 90.36% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::6-7 306 2.00% 92.37% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::7-8 249 1.63% 93.99% # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::8 918 6.01% 100.00% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::0-1 11507 73.57% 73.57% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::1-2 1847 11.81% 85.38% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::2-3 223 1.43% 86.80% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::3-4 141 0.90% 87.71% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::4-5 312 1.99% 89.70% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::5-6 120 0.77% 90.47% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::6-7 308 1.97% 92.44% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::7-8 254 1.62% 94.06% # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist::8 929 5.94% 100.00% # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::overflows 0 0.00% 100.00% # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::min_value 0 # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist::max_value 8 # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist::total 15286 # Number of instructions fetched each cycle (Total) -system.cpu.icache.ReadReq_accesses 2171 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 35436.489607 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 34915.151515 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 1738 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 15344000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.199447 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 433 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 103 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 11522000 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.152004 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 330 # number of ReadReq MSHR misses +system.cpu.fetch.rateDist::total 15641 # Number of instructions fetched each cycle (Total) +system.cpu.icache.ReadReq_accesses 2220 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 35681.279621 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 34902.735562 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 1798 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 15057500 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.190090 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 422 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 93 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 11483000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.148198 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 329 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 5.266667 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 5.465046 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 2171 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 35436.489607 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 34915.151515 # average overall mshr miss latency -system.cpu.icache.demand_hits 1738 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 15344000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.199447 # miss rate for demand accesses -system.cpu.icache.demand_misses 433 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 103 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 11522000 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.152004 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 330 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_accesses 2220 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 35681.279621 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 34902.735562 # average overall mshr miss latency +system.cpu.icache.demand_hits 1798 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 15057500 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.190090 # miss rate for demand accesses +system.cpu.icache.demand_misses 422 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 93 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 11483000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.148198 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 329 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 2171 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 35436.489607 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 34915.151515 # average overall mshr miss latency +system.cpu.icache.overall_accesses 2220 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 35681.279621 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 34902.735562 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 1738 # number of overall hits -system.cpu.icache.overall_miss_latency 15344000 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.199447 # miss rate for overall accesses -system.cpu.icache.overall_misses 433 # number of overall misses -system.cpu.icache.overall_mshr_hits 103 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 11522000 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.152004 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 330 # number of overall MSHR misses +system.cpu.icache.overall_hits 1798 # number of overall hits +system.cpu.icache.overall_miss_latency 15057500 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.190090 # miss rate for overall accesses +system.cpu.icache.overall_misses 422 # number of overall misses +system.cpu.icache.overall_mshr_hits 93 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 11483000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.148198 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 329 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.replacements 16 # number of replacements -system.cpu.icache.sampled_refs 330 # Sample count of references to valid blocks. +system.cpu.icache.sampled_refs 329 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 159.086288 # Cycle average of tags in use -system.cpu.icache.total_refs 1738 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 156.015053 # Cycle average of tags in use +system.cpu.icache.total_refs 1798 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 12544 # Total number of cycles that the CPU has spent unscheduled due to idling -system.cpu.iew.EXEC:branches 1216 # Number of branches executed -system.cpu.iew.EXEC:nop 1820 # number of nop insts executed -system.cpu.iew.EXEC:rate 0.292239 # Inst execution rate -system.cpu.iew.EXEC:refs 3432 # number of memory reference insts executed -system.cpu.iew.EXEC:stores 1048 # Number of stores executed +system.cpu.idleCycles 12481 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 1253 # Number of branches executed +system.cpu.iew.EXEC:nop 1830 # number of nop insts executed +system.cpu.iew.EXEC:rate 0.295249 # Inst execution rate +system.cpu.iew.EXEC:refs 3456 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 1049 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed -system.cpu.iew.WB:consumers 4040 # num instructions consuming a value -system.cpu.iew.WB:count 7355 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 0.694802 # average fanout of values written-back +system.cpu.iew.WB:consumers 4132 # num instructions consuming a value +system.cpu.iew.WB:count 7536 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.703291 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 2807 # num instructions producing a value -system.cpu.iew.WB:rate 0.264283 # insts written-back per cycle -system.cpu.iew.WB:sent 7444 # cumulative count of insts sent to commit -system.cpu.iew.branchMispredicts 663 # Number of branch mispredicts detected at execute -system.cpu.iew.iewBlockCycles 8 # Number of cycles IEW is blocking -system.cpu.iew.iewDispLoadInsts 2795 # Number of dispatched load instructions -system.cpu.iew.iewDispNonSpecInsts 15 # Number of dispatched non-speculative instructions -system.cpu.iew.iewDispSquashedInsts 968 # Number of squashed instructions skipped by dispatch -system.cpu.iew.iewDispStoreInsts 1158 # Number of dispatched store instructions -system.cpu.iew.iewDispatchedInsts 11660 # Number of instructions dispatched to IQ -system.cpu.iew.iewExecLoadInsts 2384 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 531 # Number of squashed instructions skipped in execute -system.cpu.iew.iewExecutedInsts 8133 # Number of executed instructions +system.cpu.iew.WB:producers 2906 # num instructions producing a value +system.cpu.iew.WB:rate 0.267975 # insts written-back per cycle +system.cpu.iew.WB:sent 7618 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 681 # Number of branch mispredicts detected at execute +system.cpu.iew.iewBlockCycles 0 # Number of cycles IEW is blocking +system.cpu.iew.iewDispLoadInsts 2806 # Number of dispatched load instructions +system.cpu.iew.iewDispNonSpecInsts 12 # Number of dispatched non-speculative instructions +system.cpu.iew.iewDispSquashedInsts 963 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 1159 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 11847 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 2407 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 549 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 8303 # Number of executed instructions system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle -system.cpu.iew.iewLSQFullEvents 1 # Number of times the LSQ has become full, causing a stall -system.cpu.iew.iewSquashCycles 1056 # Number of cycles IEW is squashing +system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall +system.cpu.iew.iewSquashCycles 1080 # Number of cycles IEW is squashing system.cpu.iew.iewUnblockCycles 0 # Number of cycles IEW is unblocking system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked @@ -250,68 +250,69 @@ system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Nu system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address system.cpu.iew.lsq.thread.0.memOrderViolation 22 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 0 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.0.squashedLoads 1662 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedLoads 1642 # Number of loads squashed system.cpu.iew.lsq.thread.0.squashedStores 234 # Number of stores squashed system.cpu.iew.memOrderViolationEvents 22 # Number of memory order violations -system.cpu.iew.predictedNotTakenIncorrect 277 # Number of branches that were predicted not taken incorrectly -system.cpu.iew.predictedTakenIncorrect 386 # Number of branches that were predicted taken incorrectly -system.cpu.ipc 0.181423 # IPC: Instructions Per Cycle -system.cpu.ipc_total 0.181423 # IPC: Total IPC of All Threads +system.cpu.iew.predictedNotTakenIncorrect 272 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedTakenIncorrect 409 # Number of branches that were predicted taken incorrectly +system.cpu.ipc 0.183806 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.183806 # IPC: Total IPC of All Threads system.cpu.iq.ISSUE:FU_type_0::No_OpClass 0 0.00% 0.00% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::IntAlu 5020 57.94% 57.94% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::IntMult 5 0.06% 58.00% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::IntDiv 2 0.02% 58.02% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::FloatAdd 2 0.02% 58.04% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::FloatCmp 0 0.00% 58.04% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::FloatCvt 0 0.00% 58.04% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::FloatMult 0 0.00% 58.04% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::FloatDiv 0 0.00% 58.04% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::FloatSqrt 0 0.00% 58.04% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::MemRead 2572 29.69% 87.73% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::MemWrite 1063 12.27% 100.00% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::IntAlu 5184 58.56% 58.56% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::IntMult 5 0.06% 58.62% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::IntDiv 2 0.02% 58.64% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::FloatAdd 2 0.02% 58.66% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::FloatCmp 0 0.00% 58.66% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::FloatCvt 0 0.00% 58.66% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::FloatMult 0 0.00% 58.66% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::FloatDiv 0 0.00% 58.66% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::FloatSqrt 0 0.00% 58.66% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::MemRead 2595 29.32% 87.98% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::MemWrite 1064 12.02% 100.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued -system.cpu.iq.ISSUE:FU_type_0::total 8664 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0::total 8852 # Type of FU issued system.cpu.iq.ISSUE:fu_busy_cnt 162 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.018698 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate 0.018301 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full::No_OpClass 0 0.00% 0.00% # attempts to use FU when none available -system.cpu.iq.ISSUE:fu_full::IntAlu 10 6.17% 6.17% # attempts to use FU when none available -system.cpu.iq.ISSUE:fu_full::IntMult 0 0.00% 6.17% # attempts to use FU when none available -system.cpu.iq.ISSUE:fu_full::IntDiv 0 0.00% 6.17% # attempts to use FU when none available -system.cpu.iq.ISSUE:fu_full::FloatAdd 0 0.00% 6.17% # attempts to use FU when none available -system.cpu.iq.ISSUE:fu_full::FloatCmp 0 0.00% 6.17% # attempts to use FU when none available -system.cpu.iq.ISSUE:fu_full::FloatCvt 0 0.00% 6.17% # attempts to use FU when none available -system.cpu.iq.ISSUE:fu_full::FloatMult 0 0.00% 6.17% # attempts to use FU when none available -system.cpu.iq.ISSUE:fu_full::FloatDiv 0 0.00% 6.17% # attempts to use FU when none available -system.cpu.iq.ISSUE:fu_full::FloatSqrt 0 0.00% 6.17% # attempts to use FU when none available -system.cpu.iq.ISSUE:fu_full::MemRead 98 60.49% 66.67% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full::IntAlu 8 4.94% 4.94% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full::IntMult 0 0.00% 4.94% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full::IntDiv 0 0.00% 4.94% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full::FloatAdd 0 0.00% 4.94% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full::FloatCmp 0 0.00% 4.94% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full::FloatCvt 0 0.00% 4.94% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full::FloatMult 0 0.00% 4.94% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full::FloatDiv 0 0.00% 4.94% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full::FloatSqrt 0 0.00% 4.94% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full::MemRead 100 61.73% 66.67% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full::MemWrite 54 33.33% 100.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available -system.cpu.iq.ISSUE:issued_per_cycle::samples 15286 # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::mean 0.566793 # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::stdev 1.217668 # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::samples 15641 # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::mean 0.565948 # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::stdev 1.209939 # Number of insts issued each cycle system.cpu.iq.ISSUE:issued_per_cycle::underflows 0 0.00% 0.00% # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::0-1 11421 74.72% 74.72% # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::1-2 1678 10.98% 85.69% # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::2-3 792 5.18% 90.87% # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::3-4 722 4.72% 95.60% # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::4-5 333 2.18% 97.78% # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::5-6 200 1.31% 99.08% # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::6-7 91 0.60% 99.68% # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::7-8 34 0.22% 99.90% # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::0-1 11653 74.50% 74.50% # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::1-2 1757 11.23% 85.74% # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::2-3 814 5.20% 90.94% # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::3-4 738 4.72% 95.66% # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::4-5 342 2.19% 97.85% # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::5-6 199 1.27% 99.12% # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::6-7 91 0.58% 99.70% # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle::7-8 32 0.20% 99.90% # Number of insts issued each cycle system.cpu.iq.ISSUE:issued_per_cycle::8 15 0.10% 100.00% # Number of insts issued each cycle system.cpu.iq.ISSUE:issued_per_cycle::overflows 0 0.00% 100.00% # Number of insts issued each cycle system.cpu.iq.ISSUE:issued_per_cycle::min_value 0 # Number of insts issued each cycle system.cpu.iq.ISSUE:issued_per_cycle::max_value 8 # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle::total 15286 # Number of insts issued each cycle -system.cpu.iq.ISSUE:rate 0.311319 # Inst issue rate -system.cpu.iq.iqInstsAdded 9825 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 8664 # Number of instructions issued -system.cpu.iq.iqNonSpecInstsAdded 15 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 4207 # Number of squashed instructions iterated over during squash; mainly for profiling -system.cpu.iq.iqSquashedInstsIssued 30 # Number of squashed instructions issued -system.cpu.iq.iqSquashedOperandsExamined 2761 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.iq.ISSUE:issued_per_cycle::total 15641 # Number of insts issued each cycle +system.cpu.iq.ISSUE:rate 0.314771 # Inst issue rate +system.cpu.iq.iqInstsAdded 10005 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 8852 # Number of instructions issued +system.cpu.iq.iqNonSpecInstsAdded 12 # Number of non-speculative instructions added to the IQ +system.cpu.iq.iqSquashedInstsExamined 4214 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 36 # Number of squashed instructions issued +system.cpu.iq.iqSquashedNonSpecRemoved 2 # Number of squashed non-spec instructions that were removed +system.cpu.iq.iqSquashedOperandsExamined 2725 # Number of squashed operands that are examined and possibly removed from graph system.cpu.itb.accesses 0 # DTB accesses system.cpu.itb.hits 0 # DTB hits system.cpu.itb.misses 0 # DTB misses @@ -321,98 +322,98 @@ system.cpu.itb.read_misses 0 # DT system.cpu.itb.write_accesses 0 # DTB write accesses system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses -system.cpu.l2cache.ReadExReq_accesses 49 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 34704.081633 # average ReadExReq miss latency -system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 31408.163265 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 1700500 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_accesses 50 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_avg_miss_latency 34680 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 31360 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 1734000 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses -system.cpu.l2cache.ReadExReq_misses 49 # number of ReadExReq misses -system.cpu.l2cache.ReadExReq_mshr_miss_latency 1539000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_misses 50 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_mshr_miss_latency 1568000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses -system.cpu.l2cache.ReadExReq_mshr_misses 49 # number of ReadExReq MSHR misses -system.cpu.l2cache.ReadReq_accesses 417 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 34307.506053 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 31130.750605 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadExReq_mshr_misses 50 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadReq_accesses 420 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 34317.307692 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 31138.221154 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 4 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 14169000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 0.990408 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 413 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 12857000 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.990408 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 413 # number of ReadReq MSHR misses -system.cpu.l2cache.UpgradeReq_accesses 15 # number of UpgradeReq accesses(hits+misses) -system.cpu.l2cache.UpgradeReq_avg_miss_latency 34400 # average UpgradeReq miss latency -system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 31166.666667 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 516000 # number of UpgradeReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 14276000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.990476 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 416 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 12953500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.990476 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 416 # number of ReadReq MSHR misses +system.cpu.l2cache.UpgradeReq_accesses 14 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.UpgradeReq_avg_miss_latency 34428.571429 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 31178.571429 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 482000 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses -system.cpu.l2cache.UpgradeReq_misses 15 # number of UpgradeReq misses -system.cpu.l2cache.UpgradeReq_mshr_miss_latency 467500 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_misses 14 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 436500 # number of UpgradeReq MSHR miss cycles system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses -system.cpu.l2cache.UpgradeReq_mshr_misses 15 # number of UpgradeReq MSHR misses +system.cpu.l2cache.UpgradeReq_mshr_misses 14 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0.010050 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.009950 # Average number of references to valid blocks. system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 466 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 34349.567100 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 31160.173160 # average overall mshr miss latency +system.cpu.l2cache.demand_accesses 470 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 34356.223176 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 31162.017167 # average overall mshr miss latency system.cpu.l2cache.demand_hits 4 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 15869500 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 0.991416 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 462 # number of demand (read+write) misses +system.cpu.l2cache.demand_miss_latency 16010000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.991489 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 466 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 14396000 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 0.991416 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 462 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_miss_latency 14521500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.991489 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 466 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 466 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 34349.567100 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 31160.173160 # average overall mshr miss latency +system.cpu.l2cache.overall_accesses 470 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 34356.223176 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 31162.017167 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 4 # number of overall hits -system.cpu.l2cache.overall_miss_latency 15869500 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 0.991416 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 462 # number of overall misses +system.cpu.l2cache.overall_miss_latency 16010000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.991489 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 466 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 14396000 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 0.991416 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 462 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_miss_latency 14521500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.991489 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 466 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 398 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 402 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 209.158769 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 210.151573 # Cycle average of tags in use system.cpu.l2cache.total_refs 4 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.memDep0.conflictingLoads 5 # Number of conflicting loads. system.cpu.memDep0.conflictingStores 2 # Number of conflicting stores. -system.cpu.memDep0.insertedLoads 2795 # Number of loads inserted to the mem dependence unit. -system.cpu.memDep0.insertedStores 1158 # Number of stores inserted to the mem dependence unit. -system.cpu.numCycles 27830 # number of cpu cycles simulated -system.cpu.rename.RENAME:BlockCycles 20 # Number of cycles rename is blocking -system.cpu.rename.RENAME:CommittedMaps 3323 # Number of HB maps that are committed -system.cpu.rename.RENAME:IdleCycles 10291 # Number of cycles rename is idle -system.cpu.rename.RENAME:LSQFullEvents 16 # Number of times rename has blocked due to LSQ full -system.cpu.rename.RENAME:RenameLookups 15666 # Number of register rename lookups that rename has made -system.cpu.rename.RENAME:RenamedInsts 13454 # Number of instructions processed by rename -system.cpu.rename.RENAME:RenamedOperands 8251 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 3462 # Number of cycles rename is running -system.cpu.rename.RENAME:SquashCycles 1056 # Number of cycles rename is squashing -system.cpu.rename.RENAME:UnblockCycles 29 # Number of cycles rename is unblocking -system.cpu.rename.RENAME:UndoneMaps 4928 # Number of HB maps that are undone due to squashing -system.cpu.rename.RENAME:serializeStallCycles 428 # count of cycles rename stalled for serializing inst -system.cpu.rename.RENAME:serializingInsts 20 # count of serializing insts renamed -system.cpu.rename.RENAME:skidInsts 125 # count of insts added to the skid buffer -system.cpu.rename.RENAME:tempSerializingInsts 14 # count of temporary serializing insts renamed -system.cpu.timesIdled 250 # Number of times that the entire CPU went into an idle state and unscheduled itself -system.cpu.workload.PROG:num_syscalls 13 # Number of system calls +system.cpu.memDep0.insertedLoads 2806 # Number of loads inserted to the mem dependence unit. +system.cpu.memDep0.insertedStores 1159 # Number of stores inserted to the mem dependence unit. +system.cpu.numCycles 28122 # number of cpu cycles simulated +system.cpu.rename.RENAME:BlockCycles 5 # Number of cycles rename is blocking +system.cpu.rename.RENAME:CommittedMaps 3410 # Number of HB maps that are committed +system.cpu.rename.RENAME:IdleCycles 10468 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 9 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:RenameLookups 15900 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 13681 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 8420 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 3575 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 1080 # Number of cycles rename is squashing +system.cpu.rename.RENAME:UnblockCycles 19 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 5010 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 494 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:serializingInsts 17 # count of serializing insts renamed +system.cpu.rename.RENAME:skidInsts 111 # count of insts added to the skid buffer +system.cpu.rename.RENAME:tempSerializingInsts 11 # count of temporary serializing insts renamed +system.cpu.timesIdled 249 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.workload.PROG:num_syscalls 8 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/config.ini index cae17207c..eb0e10d29 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/config.ini @@ -135,7 +135,7 @@ port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port [system.physmem] type=RubyMemory clock=1 -config_file=build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic-ruby/ruby.config +config_file=build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic-ruby/ruby.config debug=false debug_file=ruby.debug file= @@ -143,8 +143,10 @@ latency=30000 latency_var=0 null=false num_cpus=1 +num_dmas=1 phase=0 -range=0:134217727 +ports_per_core=2 +range=0:1073741823 stats_file=ruby.stats zero=false port=system.membus.port[0] diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simerr b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simerr index aece78b32..9a6ce1210 100755 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simerr +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simerr @@ -1,4 +1,4 @@ -["-r", "tests/configs/../../src/mem/ruby/config/MI_example-homogeneous.rb", "-p", "1", "-m", "1", "-s", "1024"] +["-r", "tests/configs/../../src/mem/ruby/config/MI_example-homogeneous.rb", "-p", "1", "-m", "1", "-s", "1024", "-C", "32768", "-A", "8", "-D", "1"] print config: 1 warn: Sockets disabled, not accepting gdb connections For more information see: http://www.m5sim.org/warn/d946bea6 diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simout b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simout index 7408d6fc9..7ac0ea8eb 100755 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simout +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Oct 6 2009 20:51:47 -M5 revision 300266bf68ec+ 6674+ default tip -M5 started Oct 6 2009 20:51:48 -M5 executing on zooks -command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic-ruby -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic-ruby +M5 compiled Jan 2 2010 07:01:31 +M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch +M5 started Jan 2 2010 07:03:09 +M5 executing on fajita +command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic-ruby -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic-ruby Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello World! -Exiting @ tick 2842500 because target called exit() +Exiting @ tick 2913500 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/stats.txt index 94d67cedd..3e60620c5 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic-ruby/stats.txt @@ -1,13 +1,13 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 27672 # Simulator instruction rate (inst/s) -host_mem_usage 1265116 # Number of bytes of host memory used -host_seconds 0.21 # Real time elapsed on the host -host_tick_rate 13820616 # Simulator tick rate (ticks/s) +host_inst_rate 57498 # Simulator instruction rate (inst/s) +host_mem_usage 2303472 # Number of bytes of host memory used +host_seconds 0.10 # Real time elapsed on the host +host_tick_rate 28699061 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 5685 # Number of instructions simulated +sim_insts 5827 # Number of instructions simulated sim_seconds 0.000003 # Number of seconds simulated -sim_ticks 2842500 # Number of ticks simulated +sim_ticks 2913500 # Number of ticks simulated system.cpu.dtb.accesses 0 # DTB accesses system.cpu.dtb.hits 0 # DTB hits system.cpu.dtb.misses 0 # DTB misses @@ -28,9 +28,9 @@ system.cpu.itb.write_accesses 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 5686 # number of cpu cycles simulated -system.cpu.num_insts 5685 # Number of instructions executed -system.cpu.num_refs 2058 # Number of memory references -system.cpu.workload.PROG:num_syscalls 13 # Number of system calls +system.cpu.numCycles 5828 # number of cpu cycles simulated +system.cpu.num_insts 5827 # Number of instructions executed +system.cpu.num_refs 2090 # Number of memory references +system.cpu.workload.PROG:num_syscalls 8 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini index 296171530..5d677c743 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini @@ -111,7 +111,7 @@ egid=100 env= errout=cerr euid=100 -executable=tests/test-progs/hello/bin/mips/linux/hello +executable=/dist/m5/regression/test-progs/hello/bin/mips/linux/hello gid=100 input=cin max_stack_size=67108864 diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/simout b/tests/quick/00.hello/ref/mips/linux/simple-atomic/simout index 77cc5d321..a364f6e08 100755 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/simout +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 24 2009 12:19:09 -M5 revision 9bc3e4611009+ 6661+ default tip -M5 started Sep 24 2009 12:19:47 -M5 executing on zooks -command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic +M5 compiled Jan 2 2010 07:01:31 +M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch +M5 started Jan 2 2010 07:03:10 +M5 executing on fajita +command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-atomic Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello World! -Exiting @ tick 2842500 because target called exit() +Exiting @ tick 2913500 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-atomic/stats.txt index d36fc469a..090c28d32 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/stats.txt @@ -1,13 +1,13 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 588083 # Simulator instruction rate (inst/s) -host_mem_usage 149516 # Number of bytes of host memory used +host_inst_rate 449580 # Simulator instruction rate (inst/s) +host_mem_usage 197348 # Number of bytes of host memory used host_seconds 0.01 # Real time elapsed on the host -host_tick_rate 285563593 # Simulator tick rate (ticks/s) +host_tick_rate 220987561 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 5685 # Number of instructions simulated +sim_insts 5827 # Number of instructions simulated sim_seconds 0.000003 # Number of seconds simulated -sim_ticks 2842500 # Number of ticks simulated +sim_ticks 2913500 # Number of ticks simulated system.cpu.dtb.accesses 0 # DTB accesses system.cpu.dtb.hits 0 # DTB hits system.cpu.dtb.misses 0 # DTB misses @@ -28,9 +28,9 @@ system.cpu.itb.write_accesses 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 5686 # number of cpu cycles simulated -system.cpu.num_insts 5685 # Number of instructions executed -system.cpu.num_refs 2058 # Number of memory references -system.cpu.workload.PROG:num_syscalls 13 # Number of system calls +system.cpu.numCycles 5828 # number of cpu cycles simulated +system.cpu.num_insts 5827 # Number of instructions executed +system.cpu.num_refs 2090 # Number of memory references +system.cpu.workload.PROG:num_syscalls 8 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/config.ini index 1562d7d6a..a290f96a4 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/config.ini @@ -132,7 +132,7 @@ port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port [system.physmem] type=RubyMemory clock=1 -config_file=build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing-ruby/ruby.config +config_file=build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing-ruby/ruby.config debug=false debug_file=ruby.debug file= @@ -140,8 +140,10 @@ latency=30000 latency_var=0 null=false num_cpus=1 +num_dmas=1 phase=0 -range=0:134217727 +ports_per_core=2 +range=0:1073741823 stats_file=ruby.stats zero=false port=system.membus.port[0] diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simerr b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simerr index aece78b32..9a6ce1210 100755 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simerr +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simerr @@ -1,4 +1,4 @@ -["-r", "tests/configs/../../src/mem/ruby/config/MI_example-homogeneous.rb", "-p", "1", "-m", "1", "-s", "1024"] +["-r", "tests/configs/../../src/mem/ruby/config/MI_example-homogeneous.rb", "-p", "1", "-m", "1", "-s", "1024", "-C", "32768", "-A", "8", "-D", "1"] print config: 1 warn: Sockets disabled, not accepting gdb connections For more information see: http://www.m5sim.org/warn/d946bea6 diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simout b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simout index 6c7350461..cf385d81f 100755 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simout +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Oct 6 2009 20:43:14 -M5 revision 300266bf68ec 6674 default tip -M5 started Oct 6 2009 20:47:38 -M5 executing on zooks -command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing-ruby -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing-ruby +M5 compiled Jan 2 2010 07:01:31 +M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch +M5 started Jan 2 2010 07:03:09 +M5 executing on fajita +command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing-ruby -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing-ruby Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello World! -Exiting @ tick 23227000 because target called exit() +Exiting @ tick 23749000 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/stats.txt index 15c68a6b0..8a4afd8c9 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing-ruby/stats.txt @@ -1,13 +1,13 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 3701 # Simulator instruction rate (inst/s) -host_mem_usage 1265204 # Number of bytes of host memory used -host_seconds 1.54 # Real time elapsed on the host -host_tick_rate 15119806 # Simulator tick rate (ticks/s) +host_inst_rate 6560 # Simulator instruction rate (inst/s) +host_mem_usage 2303716 # Number of bytes of host memory used +host_seconds 0.89 # Real time elapsed on the host +host_tick_rate 26729951 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 5685 # Number of instructions simulated -sim_seconds 0.000023 # Number of seconds simulated -sim_ticks 23227000 # Number of ticks simulated +sim_insts 5827 # Number of instructions simulated +sim_seconds 0.000024 # Number of seconds simulated +sim_ticks 23749000 # Number of ticks simulated system.cpu.dtb.accesses 0 # DTB accesses system.cpu.dtb.hits 0 # DTB hits system.cpu.dtb.misses 0 # DTB misses @@ -28,9 +28,9 @@ system.cpu.itb.write_accesses 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 46454 # number of cpu cycles simulated -system.cpu.num_insts 5685 # Number of instructions executed -system.cpu.num_refs 2058 # Number of memory references -system.cpu.workload.PROG:num_syscalls 13 # Number of system calls +system.cpu.numCycles 47498 # number of cpu cycles simulated +system.cpu.num_insts 5827 # Number of instructions executed +system.cpu.num_refs 2090 # Number of memory references +system.cpu.workload.PROG:num_syscalls 8 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini index 2edca998b..3e36bc6f8 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini @@ -99,7 +99,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -134,7 +133,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -169,7 +167,6 @@ hash_delay=1 latency=10000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=100000 @@ -211,7 +208,7 @@ egid=100 env= errout=cerr euid=100 -executable=tests/test-progs/hello/bin/mips/linux/hello +executable=/dist/m5/regression/test-progs/hello/bin/mips/linux/hello gid=100 input=cin max_stack_size=67108864 diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/simout b/tests/quick/00.hello/ref/mips/linux/simple-timing/simout index 15331f633..f5b9b6f90 100755 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/simout +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Sep 24 2009 12:19:09 -M5 revision 9bc3e4611009+ 6661+ default tip -M5 started Sep 24 2009 12:19:31 -M5 executing on zooks -command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing +M5 compiled Jan 2 2010 07:01:31 +M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch +M5 started Jan 2 2010 07:03:09 +M5 executing on fajita +command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/simple-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello World! -Exiting @ tick 32409000 because target called exit() +Exiting @ tick 32803000 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-timing/stats.txt index 3bfaf3540..14247d496 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/stats.txt @@ -1,74 +1,74 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 303832 # Simulator instruction rate (inst/s) -host_mem_usage 155376 # Number of bytes of host memory used -host_seconds 0.02 # Real time elapsed on the host -host_tick_rate 1703674499 # Simulator tick rate (ticks/s) +host_inst_rate 21056 # Simulator instruction rate (inst/s) +host_mem_usage 204976 # Number of bytes of host memory used +host_seconds 0.28 # Real time elapsed on the host +host_tick_rate 118397165 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 5685 # Number of instructions simulated -sim_seconds 0.000032 # Number of seconds simulated -sim_ticks 32409000 # Number of ticks simulated -system.cpu.dcache.ReadReq_accesses 1133 # number of ReadReq accesses(hits+misses) +sim_insts 5827 # Number of instructions simulated +sim_seconds 0.000033 # Number of seconds simulated +sim_ticks 32803000 # Number of ticks simulated +system.cpu.dcache.ReadReq_accesses 1164 # number of ReadReq accesses(hits+misses) system.cpu.dcache.ReadReq_avg_miss_latency 56000 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53000 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 1051 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 4592000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.072374 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 82 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 4346000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.072374 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 82 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 924 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.ReadReq_hits 1077 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 4872000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.074742 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 87 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 4611000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.074742 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 87 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 925 # number of WriteReq accesses(hits+misses) system.cpu.dcache.WriteReq_avg_miss_latency 56000 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53000 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 860 # number of WriteReq hits +system.cpu.dcache.WriteReq_hits 861 # number of WriteReq hits system.cpu.dcache.WriteReq_miss_latency 3584000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.069264 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_miss_rate 0.069189 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 64 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_miss_latency 3392000 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.069264 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_miss_rate 0.069189 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 64 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 14.583333 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 14.137681 # Average number of references to valid blocks. system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 2057 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses 2089 # number of demand (read+write) accesses system.cpu.dcache.demand_avg_miss_latency 56000 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency 53000 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1911 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 8176000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.070977 # miss rate for demand accesses -system.cpu.dcache.demand_misses 146 # number of demand (read+write) misses +system.cpu.dcache.demand_hits 1938 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 8456000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.072283 # miss rate for demand accesses +system.cpu.dcache.demand_misses 151 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 7738000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.070977 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 146 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 8003000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.072283 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 151 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 2057 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses 2089 # number of overall (read+write) accesses system.cpu.dcache.overall_avg_miss_latency 56000 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency 53000 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1911 # number of overall hits -system.cpu.dcache.overall_miss_latency 8176000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.070977 # miss rate for overall accesses -system.cpu.dcache.overall_misses 146 # number of overall misses +system.cpu.dcache.overall_hits 1938 # number of overall hits +system.cpu.dcache.overall_miss_latency 8456000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.072283 # miss rate for overall accesses +system.cpu.dcache.overall_misses 151 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 7738000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.070977 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 146 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_miss_latency 8003000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.072283 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 151 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.replacements 0 # number of replacements -system.cpu.dcache.sampled_refs 132 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 138 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 83.830110 # Cycle average of tags in use -system.cpu.dcache.total_refs 1925 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 87.887695 # Cycle average of tags in use +system.cpu.dcache.total_refs 1951 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.dtb.accesses 0 # DTB accesses @@ -80,57 +80,57 @@ system.cpu.dtb.read_misses 0 # DT system.cpu.dtb.write_accesses 0 # DTB write accesses system.cpu.dtb.write_hits 0 # DTB write hits system.cpu.dtb.write_misses 0 # DTB write misses -system.cpu.icache.ReadReq_accesses 5687 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 55723.684211 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 52723.684211 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 5383 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 16940000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.053455 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 304 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_miss_latency 16028000 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.053455 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 304 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_accesses 5829 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 55722.772277 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 52722.772277 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 5526 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 16884000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.051981 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 303 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_miss_latency 15975000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.051981 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 303 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 17.707237 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 18.237624 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 5687 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 55723.684211 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 52723.684211 # average overall mshr miss latency -system.cpu.icache.demand_hits 5383 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 16940000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.053455 # miss rate for demand accesses -system.cpu.icache.demand_misses 304 # number of demand (read+write) misses +system.cpu.icache.demand_accesses 5829 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 55722.772277 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 52722.772277 # average overall mshr miss latency +system.cpu.icache.demand_hits 5526 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 16884000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.051981 # miss rate for demand accesses +system.cpu.icache.demand_misses 303 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 16028000 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.053455 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 304 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_miss_latency 15975000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.051981 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 303 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 5687 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 55723.684211 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 52723.684211 # average overall mshr miss latency +system.cpu.icache.overall_accesses 5829 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 55722.772277 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 52722.772277 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 5383 # number of overall hits -system.cpu.icache.overall_miss_latency 16940000 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.053455 # miss rate for overall accesses -system.cpu.icache.overall_misses 304 # number of overall misses +system.cpu.icache.overall_hits 5526 # number of overall hits +system.cpu.icache.overall_miss_latency 16884000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.051981 # miss rate for overall accesses +system.cpu.icache.overall_misses 303 # number of overall misses system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 16028000 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.053455 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 304 # number of overall MSHR misses +system.cpu.icache.overall_mshr_miss_latency 15975000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.051981 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 303 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.replacements 13 # number of replacements -system.cpu.icache.sampled_refs 304 # Sample count of references to valid blocks. +system.cpu.icache.sampled_refs 303 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 135.394401 # Cycle average of tags in use -system.cpu.icache.total_refs 5383 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 133.475693 # Cycle average of tags in use +system.cpu.icache.total_refs 5526 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idle_fraction 0 # Percentage of idle cycles @@ -143,81 +143,81 @@ system.cpu.itb.read_misses 0 # DT system.cpu.itb.write_accesses 0 # DTB write accesses system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses -system.cpu.l2cache.ReadExReq_accesses 50 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_accesses 51 # number of ReadExReq accesses(hits+misses) system.cpu.l2cache.ReadExReq_avg_miss_latency 52000 # average ReadExReq miss latency system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 2600000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_latency 2652000 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses -system.cpu.l2cache.ReadExReq_misses 50 # number of ReadExReq misses -system.cpu.l2cache.ReadExReq_mshr_miss_latency 2000000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_misses 51 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_mshr_miss_latency 2040000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses -system.cpu.l2cache.ReadExReq_mshr_misses 50 # number of ReadExReq MSHR misses -system.cpu.l2cache.ReadReq_accesses 386 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_mshr_misses 51 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadReq_accesses 390 # number of ReadReq accesses(hits+misses) system.cpu.l2cache.ReadReq_avg_miss_latency 52000 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40000 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 2 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 19968000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 0.994819 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 384 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 15360000 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.994819 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 384 # number of ReadReq MSHR misses -system.cpu.l2cache.UpgradeReq_accesses 14 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_miss_latency 20176000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.994872 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 388 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 15520000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.994872 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 388 # number of ReadReq MSHR misses +system.cpu.l2cache.UpgradeReq_accesses 13 # number of UpgradeReq accesses(hits+misses) system.cpu.l2cache.UpgradeReq_avg_miss_latency 52000 # average UpgradeReq miss latency system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 728000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_latency 676000 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses -system.cpu.l2cache.UpgradeReq_misses 14 # number of UpgradeReq misses -system.cpu.l2cache.UpgradeReq_mshr_miss_latency 560000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_misses 13 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 520000 # number of UpgradeReq MSHR miss cycles system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses -system.cpu.l2cache.UpgradeReq_mshr_misses 14 # number of UpgradeReq MSHR misses +system.cpu.l2cache.UpgradeReq_mshr_misses 13 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0.005405 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.005333 # Average number of references to valid blocks. system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 436 # number of demand (read+write) accesses +system.cpu.l2cache.demand_accesses 441 # number of demand (read+write) accesses system.cpu.l2cache.demand_avg_miss_latency 52000 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 40000 # average overall mshr miss latency system.cpu.l2cache.demand_hits 2 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 22568000 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 0.995413 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 434 # number of demand (read+write) misses +system.cpu.l2cache.demand_miss_latency 22828000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.995465 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 439 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 17360000 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 0.995413 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 434 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_miss_latency 17560000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.995465 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 439 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 436 # number of overall (read+write) accesses +system.cpu.l2cache.overall_accesses 441 # number of overall (read+write) accesses system.cpu.l2cache.overall_avg_miss_latency 52000 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 40000 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 2 # number of overall hits -system.cpu.l2cache.overall_miss_latency 22568000 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 0.995413 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 434 # number of overall misses +system.cpu.l2cache.overall_miss_latency 22828000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.995465 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 439 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 17360000 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 0.995413 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 434 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_miss_latency 17560000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.995465 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 439 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 370 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 375 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 182.412916 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 184.758016 # Cycle average of tags in use system.cpu.l2cache.total_refs 2 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 64818 # number of cpu cycles simulated -system.cpu.num_insts 5685 # Number of instructions executed -system.cpu.num_refs 2058 # Number of memory references -system.cpu.workload.PROG:num_syscalls 13 # Number of system calls +system.cpu.numCycles 65606 # number of cpu cycles simulated +system.cpu.num_insts 5827 # Number of instructions executed +system.cpu.num_refs 2090 # Number of memory references +system.cpu.workload.PROG:num_syscalls 8 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/util/checkpoint-aggregator.py b/util/checkpoint-aggregator.py new file mode 100755 index 000000000..6e40db01e --- /dev/null +++ b/util/checkpoint-aggregator.py @@ -0,0 +1,167 @@ +# Copyright (c) 2009 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Lisa Hsu + +from ConfigParser import ConfigParser +import gzip + +import sys, re, optparse, os + +class myCP(ConfigParser): + def __init__(self): + ConfigParser.__init__(self) + + def optionxform(self, optionstr): + return optionstr + +def aggregate(options, args): + merged = myCP() + page_ptr = 0 + + allfiles = os.listdir(os.getcwd()) + cpts = [] + for arg in args: + found = False + for f in allfiles: + if re.compile("cpt." + arg + ".\d+").search(f): + found = True + cpts.append(f) + break + if not found: + print "missing checkpoint: ", arg + sys.exit(1) + + dirname = "-".join([options.prefix, "cpt"]) + print dirname + agg_name = "-".join(args) + print agg_name + fullpath = os.path.join("..", dirname, "cpt." + agg_name + ".10000") + if not os.path.isdir(fullpath): + os.system("mkdir -p " + fullpath) + + myfile = open(fullpath + "/system.physmem.physmem", "wb+") + merged_mem = gzip.GzipFile(fileobj=myfile, mode="wb") + + max_curtick = 0 + when = 0 + for (i, arg) in enumerate(args): + config = myCP() + config.readfp(open(cpts[i] + "/m5.cpt")) + + for sec in config.sections(): + if re.compile("cpu").search(sec): + newsec = re.sub("cpu", "cpu" + str(i), sec) + merged.add_section(newsec) + if re.compile("workload$").search(sec): + merged.set(newsec, "M5_pid", i) + + items = config.items(sec) + for item in items: + if item[0] == "ppn": + if config.getint(sec, "tag") != 0: + merged.set(newsec, item[0], int(item[1]) + page_ptr) + continue + elif item[0] == "asn": + tmp = re.compile("(.*).Entry(\d+)").search(sec).groups() + if config.has_option(tmp[0], "nlu"): + size = config.getint(tmp[0], "nlu") + if int(tmp[1]) < size: + merged.set(newsec, item[0], i) + continue + else: + merged.set(newsec, item[0], i) + continue + merged.set(newsec, item[0], item[1]) + elif sec == "system": + pass + elif sec == "Globals": + tick = config.getint(sec, "curTick") + if tick > max_curtick: + max_curtick = tick + when = config.getint("system.cpu.tickEvent", "_when") + else: + if i == 0: + print sec + merged.add_section(sec) + for item in config.items(sec): + merged.set(sec, item[0], item[1]) + if item[0] == "curtick": + merged.optionxform(str("curTick")) + elif item[0] == "numevents": + merged.optionxform(str("numEvents")) + + page_ptr = page_ptr + int(config.get("system", "page_ptr")) + + ### memory stuff + f = open(cpts[i] + "/system.physmem.physmem", "rb") + gf = gzip.GzipFile(fileobj=f, mode="rb") + bytes = int(config.get("system", "page_ptr")) << 13 + print "bytes to be read: ", bytes + + bytesRead = gf.read(int(config.get("system", "page_ptr")) << 13) + merged_mem.write(bytesRead) + + gf.close() + f.close() + + merged.add_section("system") + merged.set("system", "page_ptr", page_ptr) + print "WARNING: " + print "Make sure the simulation using this checkpoint has at least " + if page_ptr > (1<<20): + print "8G ", + elif page_ptr > (1<<19): + print "4G ", + elif page_ptr > (1<<18): + print "2G ", + elif page_ptr > (1<<17): + print "1G ", + elif page_ptr > (1<<16): + print "512KB ", + else: + print "this is a small sim, you're probably fine", + print "of memory." + + merged.add_section("Globals") + merged.set("Globals", "curTick", max_curtick) + + for i in xrange(len(args)): + merged.set("system.cpu" + str(i) + ".tickEvent", "_when", when) + + merged.write(file(fullpath + "/m5.cpt", "wb")) + merged_mem.close() + myfile.close() + +if __name__ == "__main__": + + parser = optparse.OptionParser() + parser.add_option("--prefix", type="string", default="agg") + + (options, args) = parser.parse_args() + + aggregate(options, args) + |