diff options
42 files changed, 1209 insertions, 1102 deletions
diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa index b62372f66..af1a91a62 100644 --- a/src/arch/alpha/isa/decoder.isa +++ b/src/arch/alpha/isa/decoder.isa @@ -728,8 +728,10 @@ decode OPCODE default Unknown::unknown() { 0: OpcdecFault::hw_st_quad(); 1: decode HW_LDST_QUAD { format HwLoad { - 0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, L); - 1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, Q); + 0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, + L, IsSerializing, IsSerializeBefore); + 1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, + Q, IsSerializing, IsSerializeBefore); } } } @@ -740,9 +742,9 @@ decode OPCODE default Unknown::unknown() { 1: decode HW_LDST_COND { 0: decode HW_LDST_QUAD { 0: hw_st({{ EA = (Rb + disp) & ~3; }}, - {{ Mem.ul = Ra<31:0>; }}, L); + {{ Mem.ul = Ra<31:0>; }}, L, IsSerializing, IsSerializeBefore); 1: hw_st({{ EA = (Rb + disp) & ~7; }}, - {{ Mem.uq = Ra.uq; }}, Q); + {{ Mem.uq = Ra.uq; }}, Q, IsSerializing, IsSerializeBefore); } 1: FailUnimpl::hw_st_cond(); diff --git a/src/arch/sparc/isa/decoder.isa b/src/arch/sparc/isa/decoder.isa index 556bb4bca..68b2183ad 100644 --- a/src/arch/sparc/isa/decoder.isa +++ b/src/arch/sparc/isa/decoder.isa @@ -1324,8 +1324,14 @@ decode OP default Unknown::unknown() 0x05: stb({{Mem.ub = Rd.sb;}}); 0x06: sth({{Mem.uhw = Rd.shw;}}); 0x07: sttw({{ - (Mem.tuw).a = RdLow<31:0>; - (Mem.tuw).b = RdHigh<31:0>; + //This temporary needs to be here so that the parser + //will correctly identify this instruction as a store. + //It's probably either the parenthesis or referencing + //the member variable that throws confuses it. + Twin32_t temp; + temp.a = RdLow<31:0>; + temp.b = RdHigh<31:0>; + Mem.tuw = temp; }}); } format Load { @@ -1417,8 +1423,14 @@ decode OP default Unknown::unknown() 0x15: stba({{Mem.ub = Rd;}}, {{EXT_ASI}}); 0x16: stha({{Mem.uhw = Rd;}}, {{EXT_ASI}}); 0x17: sttwa({{ - (Mem.tuw).a = RdLow<31:0>; - (Mem.tuw).b = RdHigh<31:0>; + //This temporary needs to be here so that the parser + //will correctly identify this instruction as a store. + //It's probably either the parenthesis or referencing + //the member variable that throws confuses it. + Twin32_t temp; + temp.a = RdLow<31:0>; + temp.b = RdHigh<31:0>; + Mem.tuw = temp; }}, {{EXT_ASI}}); } format LoadAlt { diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 3e0be6ad8..4dccee0d3 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -226,7 +226,8 @@ BaseCPU::startup() #endif if (params->progress_interval) { - new CPUProgressEvent(&mainEventQueue, params->progress_interval, + new CPUProgressEvent(&mainEventQueue, + cycles(params->progress_interval), this); } } diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 9ccdcdccc..6c6d90076 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -171,15 +171,15 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The kind of fault this instruction has generated. */ Fault fault; - /** The memory request. */ - Request *req; - /** Pointer to the data for the memory access. */ uint8_t *memData; /** The effective virtual address (lds & stores only). */ Addr effAddr; + /** Is the effective virtual address valid. */ + bool effAddrValid; + /** The effective physical address. */ Addr physEffAddr; @@ -601,12 +601,18 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns whether or not this instruction is ready to issue. */ bool readyToIssue() const { return status[CanIssue]; } + /** Clears this instruction being able to issue. */ + void clearCanIssue() { status.reset(CanIssue); } + /** Sets this instruction as issued from the IQ. */ void setIssued() { status.set(Issued); } /** Returns whether or not this instruction has issued. */ bool isIssued() const { return status[Issued]; } + /** Clears this instruction as being issued. */ + void clearIssued() { status.reset(Issued); } + /** Sets this instruction as executed. */ void setExecuted() { status.set(Executed); } @@ -729,6 +735,12 @@ class BaseDynInst : public FastAlloc, public RefCounted */ bool eaCalcDone; + /** Is this instruction's memory access uncacheable. */ + bool isUncacheable; + + /** Has this instruction generated a memory request. */ + bool reqMade; + public: /** Sets the effective address. */ void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } @@ -745,6 +757,12 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Whether or not the memory operation is done. */ bool memOpDone; + /** Is this instruction's memory access uncacheable. */ + bool uncacheable() { return isUncacheable; } + + /** Has this instruction generated a memory request. */ + bool hasRequest() { return reqMade; } + public: /** Load queue index. */ int16_t lqIdx; @@ -776,25 +794,25 @@ template<class T> inline Fault BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags) { - // Sometimes reads will get retried, so they may come through here - // twice. - if (!req) { - req = new Request(); - req->setVirt(asid, addr, sizeof(T), flags, this->PC); - req->setThreadContext(thread->readCpuId(), threadNumber); - } else { - assert(addr == req->getVaddr()); - } + reqMade = true; + Request *req = new Request(); + req->setVirt(asid, addr, sizeof(T), flags, this->PC); + req->setThreadContext(thread->readCpuId(), threadNumber); if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() > TheISA::VMPageSize) { + delete req; return TheISA::genAlignmentFault(); } fault = cpu->translateDataReadReq(req, thread); + if (req->isUncacheable()) + isUncacheable = true; + if (fault == NoFault) { effAddr = req->getVaddr(); + effAddrValid = true; physEffAddr = req->getPaddr(); memReqFlags = req->getFlags(); @@ -817,6 +835,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags) // Commit will have to clean up whatever happened. Set this // instruction as executed. this->setExecuted(); + delete req; } if (traceData) { @@ -837,21 +856,25 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) traceData->setData(data); } - assert(req == NULL); - - req = new Request(); + reqMade = true; + Request *req = new Request(); req->setVirt(asid, addr, sizeof(T), flags, this->PC); req->setThreadContext(thread->readCpuId(), threadNumber); if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() > TheISA::VMPageSize) { + delete req; return TheISA::genAlignmentFault(); } fault = cpu->translateDataWriteReq(req, thread); + if (req->isUncacheable()) + isUncacheable = true; + if (fault == NoFault) { effAddr = req->getVaddr(); + effAddrValid = true; physEffAddr = req->getPaddr(); memReqFlags = req->getFlags(); #if 0 @@ -863,12 +886,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) #else fault = cpu->write(req, data, sqIdx); #endif - } - - if (res) { - // always return some result to keep misspeculated paths - // (which will ignore faults) deterministic - *res = (fault == NoFault) ? req->getExtraData() : 0; + } else { + delete req; } return fault; diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index c3d71e428..a1c866336 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -92,11 +92,13 @@ template <class Impl> void BaseDynInst<Impl>::initVars() { - req = NULL; memData = NULL; effAddr = 0; + effAddrValid = false; physEffAddr = 0; + isUncacheable = false; + reqMade = false; readyRegs = 0; instResult.integer = 0; @@ -140,10 +142,6 @@ BaseDynInst<Impl>::initVars() template <class Impl> BaseDynInst<Impl>::~BaseDynInst() { - if (req) { - delete req; - } - if (memData) { delete [] memData; } @@ -271,7 +269,7 @@ void BaseDynInst<Impl>::markSrcRegReady() { if (++readyRegs == numSrcRegs()) { - status.set(CanIssue); + setCanIssue(); } } diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 5a375a4b8..34754d3c5 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param<int> clock; Param<int> phase; Param<int> numThreads; +Param<int> cpu_id; Param<int> activity; #if FULL_SYSTEM SimObjectParam<System *> system; -Param<int> cpu_id; SimObjectParam<AlphaISA::ITB *> itb; SimObjectParam<AlphaISA::DTB *> dtb; Param<Tick> profile; @@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(numThreads, "number of HW thread contexts"), + INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM_DFLT(activity, "Initial activity count", 0), #if FULL_SYSTEM INIT_PARAM(system, "System object"), - INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(itb, "Instruction translation buffer"), INIT_PARAM(dtb, "Data translation buffer"), INIT_PARAM(profile, ""), @@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU) AlphaSimpleParams *params = new AlphaSimpleParams; params->clock = clock; + params->phase = phase; params->name = getInstanceName(); params->numberOfThreads = actual_num_threads; + params->cpu_id = cpu_id; params->activity = activity; #if FULL_SYSTEM params->system = system; - params->cpu_id = cpu_id; params->itb = itb; params->dtb = dtb; params->profile = profile; diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh index b91972704..304ee6c38 100644 --- a/src/cpu/o3/alpha/cpu_impl.hh +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -114,6 +114,7 @@ AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params) #endif // Give the thread the TC. this->thread[i]->tc = tc; + this->thread[i]->setCpuId(params->cpu_id); // Add the TC to the CPU's list of TC's. this->threadContexts.push_back(tc); diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 0d7d82529..e2ad23954 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -247,6 +247,11 @@ class DefaultCommit /** Handles squashing due to an TC write. */ void squashFromTC(unsigned tid); +#if FULL_SYSTEM + /** Handles processing an interrupt. */ + void handleInterrupt(); +#endif // FULL_SYSTEM + /** Commits as many instructions as possible. */ void commitInsts(); @@ -409,6 +414,16 @@ class DefaultCommit /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; + /** Records if there is a trap currently in flight. */ + bool trapInFlight[Impl::MaxThreads]; + + /** Records if there were any stores committed this cycle. */ + bool committedStores[Impl::MaxThreads]; + + /** Records if commit should check if the ROB is truly empty (see + commit_impl.hh). */ + bool checkEmptyROB[Impl::MaxThreads]; + /** Pointer to the list of active threads. */ std::list<unsigned> *activeThreads; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 18fb2aaa3..3fd85595f 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -118,6 +118,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) for (int i=0; i < numThreads; i++) { commitStatus[i] = Idle; changedROBNumEntries[i] = false; + checkEmptyROB[i] = false; + trapInFlight[i] = false; + committedStores[i] = false; trapSquash[i] = false; tcSquash[i] = false; PC[i] = nextPC[i] = nextNPC[i] = 0; @@ -335,6 +338,7 @@ DefaultCommit<Impl>::initStage() for (int i=0; i < numThreads; i++) { toIEW->commitInfo[i].usedROB = true; toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i); + toIEW->commitInfo[i].emptyROB = true; } cpu->activityThisCycle(); @@ -473,14 +477,14 @@ DefaultCommit<Impl>::generateTrapEvent(unsigned tid) TrapEvent *trap = new TrapEvent(this, tid); trap->schedule(curTick + trapLatency); - - thread[tid]->trapPending = true; + trapInFlight[tid] = true; } template <class Impl> void DefaultCommit<Impl>::generateTCEvent(unsigned tid) { + assert(!trapInFlight[tid]); DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid); tcSquash[tid] = true; @@ -495,7 +499,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid) // Hopefully this doesn't mess things up. Basically I want to squash // all instructions of this thread. InstSeqNum squashed_inst = rob->isEmpty() ? - 0 : rob->readHeadInst(tid)->seqNum - 1;; + 0 : rob->readHeadInst(tid)->seqNum - 1; // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB (0 in this case. @@ -532,6 +536,7 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid) thread[tid]->trapPending = false; thread[tid]->inSyscall = false; + trapInFlight[tid] = false; trapSquash[tid] = false; @@ -580,6 +585,10 @@ DefaultCommit<Impl>::tick() while (threads != end) { unsigned tid = *threads++; + // Clear the bit saying if the thread has committed stores + // this cycle. + committedStores[tid] = false; + if (commitStatus[tid] == ROBSquashing) { if (rob->isDoneSquashing(tid)) { @@ -635,16 +644,11 @@ DefaultCommit<Impl>::tick() updateStatus(); } +#if FULL_SYSTEM template <class Impl> void -DefaultCommit<Impl>::commit() +DefaultCommit<Impl>::handleInterrupt() { - - ////////////////////////////////////// - // Check for interrupts - ////////////////////////////////////// - -#if FULL_SYSTEM if (interrupt != NoFault) { // Wait until the ROB is empty and all stores have drained in // order to enter the interrupt. @@ -653,6 +657,12 @@ DefaultCommit<Impl>::commit() // an interrupt needed to be handled. DPRINTF(Commit, "Interrupt detected.\n"); + Fault new_interrupt = cpu->getInterrupts(); + assert(new_interrupt != NoFault); + + // Clear the interrupt now that it's going to be handled + toIEW->commitInfo[0].clearInterrupt = true; + assert(!thread[0]->inSyscall); thread[0]->inSyscall = true; @@ -666,16 +676,14 @@ DefaultCommit<Impl>::commit() // Generate trap squash event. generateTrapEvent(0); - // Clear the interrupt now that it's been handled - toIEW->commitInfo[0].clearInterrupt = true; interrupt = NoFault; } else { DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n"); } - } else if (cpu->check_interrupts(cpu->tcBase(0)) && - commitStatus[0] != TrapPending && - !trapSquash[0] && - !tcSquash[0]) { + } else if (commitStatus[0] != TrapPending && + cpu->check_interrupts(cpu->tcBase(0)) && + !trapSquash[0] && + !tcSquash[0]) { // Process interrupts if interrupts are enabled, not in PAL // mode, and no other traps or external squashes are currently // pending. @@ -691,7 +699,21 @@ DefaultCommit<Impl>::commit() toIEW->commitInfo[0].interruptPending = true; } } +} +#endif // FULL_SYSTEM + +template <class Impl> +void +DefaultCommit<Impl>::commit() +{ +#if FULL_SYSTEM + // Check for any interrupt, and start processing it. Or if we + // have an outstanding interrupt and are at a point when it is + // valid to take an interrupt, process it. + if (cpu->check_interrupts(cpu->tcBase(0))) { + handleInterrupt(); + } #endif // FULL_SYSTEM //////////////////////////////////// @@ -709,6 +731,7 @@ DefaultCommit<Impl>::commit() assert(!tcSquash[tid]); squashFromTrap(tid); } else if (tcSquash[tid] == true) { + assert(commitStatus[tid] != TrapPending); squashFromTC(tid); } @@ -753,6 +776,7 @@ DefaultCommit<Impl>::commit() bdelay_done_seq_num--; #endif } + // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB. youngestSeqNum[tid] = squashed_inst; @@ -817,13 +841,29 @@ DefaultCommit<Impl>::commit() toIEW->commitInfo[tid].usedROB = true; toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid); - if (rob->isEmpty(tid)) { - toIEW->commitInfo[tid].emptyROB = true; - } - wroteToTimeBuffer = true; changedROBNumEntries[tid] = false; + if (rob->isEmpty(tid)) + checkEmptyROB[tid] = true; } + + // ROB is only considered "empty" for previous stages if: a) + // ROB is empty, b) there are no outstanding stores, c) IEW + // stage has received any information regarding stores that + // committed. + // c) is checked by making sure to not consider the ROB empty + // on the same cycle as when stores have been committed. + // @todo: Make this handle multi-cycle communication between + // commit and IEW. + if (checkEmptyROB[tid] && rob->isEmpty(tid) && + !iewStage->hasStoresToWB() && !committedStores[tid]) { + checkEmptyROB[tid] = false; + toIEW->commitInfo[tid].usedROB = true; + toIEW->commitInfo[tid].emptyROB = true; + toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid); + wroteToTimeBuffer = true; + } + } } @@ -966,8 +1006,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // and committed this instruction. thread[tid]->funcExeInst--; - head_inst->setAtCommit(); - if (head_inst->isNonSpeculative() || head_inst->isStoreConditional() || head_inst->isMemBarrier() || @@ -977,19 +1015,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) "instruction [sn:%lli] at the head of the ROB, PC %#x.\n", head_inst->seqNum, head_inst->readPC()); - // Hack to make sure syscalls/memory barriers/quiesces - // aren't executed until all stores write back their data. - // This direct communication shouldn't be used for - // anything other than this. - if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() || - head_inst->isQuiesce()) && - iewStage->hasStoresToWB()) - { + if (inst_num > 0 || iewStage->hasStoresToWB()) { DPRINTF(Commit, "Waiting for all stores to writeback.\n"); return false; - } else if (inst_num > 0 || iewStage->hasStoresToWB()) { - DPRINTF(Commit, "Waiting to become head of commit.\n"); - return false; } toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum; @@ -1002,6 +1030,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } else if (head_inst->isLoad()) { + if (inst_num > 0 || iewStage->hasStoresToWB()) { + DPRINTF(Commit, "Waiting for all stores to writeback.\n"); + return false; + } + + assert(head_inst->uncacheable()); DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n", head_inst->seqNum, head_inst->readPC()); @@ -1025,8 +1059,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) panic("Thread sync instructions are not handled yet.\n"); } + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = head_inst->getFault(); + // Stores mark themselves as completed. - if (!head_inst->isStore()) { + if (!head_inst->isStore() && inst_fault == NoFault) { head_inst->setCompleted(); } @@ -1038,9 +1075,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) } #endif - // Check if the instruction caused a fault. If so, trap. - Fault inst_fault = head_inst->getFault(); - // DTB will sometimes need the machine instruction for when // faults happen. So we will set it here, prior to the DTB // possibly needing it for its fault. @@ -1048,7 +1082,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) static_cast<TheISA::MachInst>(head_inst->staticInst->machInst)); if (inst_fault != NoFault) { - head_inst->setCompleted(); DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", head_inst->seqNum, head_inst->readPC()); @@ -1057,6 +1090,8 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } + head_inst->setCompleted(); + #if USE_CHECKER if (cpu->checker && head_inst->isStore()) { cpu->checker->verify(head_inst); @@ -1082,6 +1117,14 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) commitStatus[tid] = TrapPending; + if (head_inst->traceData) { + head_inst->traceData->setFetchSeq(head_inst->seqNum); + head_inst->traceData->setCPSeq(thread[tid]->numInst); + head_inst->traceData->dump(); + delete head_inst->traceData; + head_inst->traceData = NULL; + } + // Generate trap squash event. generateTrapEvent(tid); // warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC()); @@ -1123,6 +1166,10 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Finally clear the head ROB entry. rob->retireHead(tid); + // If this was a store, record it for this cycle. + if (head_inst->isStore()) + committedStores[tid] = true; + // Return true to indicate that we have committed an instruction. return true; } @@ -1167,7 +1214,8 @@ DefaultCommit<Impl>::getInsts() int tid = inst->threadNumber; if (!inst->isSquashed() && - commitStatus[tid] != ROBSquashing) { + commitStatus[tid] != ROBSquashing && + commitStatus[tid] != TrapPending) { changedROBNumEntries[tid] = true; DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n", diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 38e6a0b5b..354e3c490 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -466,7 +466,7 @@ FullO3CPU<Impl>::tick() lastRunningCycle = curTick; timesIdled++; } else { - tickEvent.schedule(curTick + cycles(1)); + tickEvent.schedule(nextCycle(curTick + cycles(1))); DPRINTF(O3CPU, "Scheduling next tick!\n"); } } @@ -886,7 +886,7 @@ FullO3CPU<Impl>::resume() #endif if (!tickEvent.scheduled()) - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); _status = Running; } @@ -979,11 +979,11 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) ThreadContext *tc = threadContexts[i]; if (tc->status() == ThreadContext::Active && _status != Running) { _status = Running; - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); } } if (!tickEvent.scheduled()) - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); } template <class Impl> @@ -1393,7 +1393,7 @@ FullO3CPU<Impl>::wakeCPU() idleCycles += (curTick - 1) - lastRunningCycle; - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); } template <class Impl> diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index ea374dd57..0ab20ba2a 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -146,9 +146,9 @@ class FullO3CPU : public BaseO3CPU void scheduleTickEvent(int delay) { if (tickEvent.squashed()) - tickEvent.reschedule(curTick + cycles(delay)); + tickEvent.reschedule(nextCycle(curTick + cycles(delay))); else if (!tickEvent.scheduled()) - tickEvent.schedule(curTick + cycles(delay)); + tickEvent.schedule(nextCycle(curTick + cycles(delay))); } /** Unschedule tick event, regardless of its current state. */ @@ -186,9 +186,11 @@ class FullO3CPU : public BaseO3CPU { // Schedule thread to activate, regardless of its current state. if (activateThreadEvent[tid].squashed()) - activateThreadEvent[tid].reschedule(curTick + cycles(delay)); + activateThreadEvent[tid]. + reschedule(nextCycle(curTick + cycles(delay))); else if (!activateThreadEvent[tid].scheduled()) - activateThreadEvent[tid].schedule(curTick + cycles(delay)); + activateThreadEvent[tid]. + schedule(nextCycle(curTick + cycles(delay))); } /** Unschedule actiavte thread event, regardless of its current state. */ @@ -235,9 +237,11 @@ class FullO3CPU : public BaseO3CPU { // Schedule thread to activate, regardless of its current state. if (deallocateContextEvent[tid].squashed()) - deallocateContextEvent[tid].reschedule(curTick + cycles(delay)); + deallocateContextEvent[tid]. + reschedule(nextCycle(curTick + cycles(delay))); else if (!deallocateContextEvent[tid].scheduled()) - deallocateContextEvent[tid].schedule(curTick + cycles(delay)); + deallocateContextEvent[tid]. + schedule(nextCycle(curTick + cycles(delay))); } /** Unschedule thread deallocation in CPU */ diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 1256dd233..663cd3142 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -620,6 +620,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid fault = TheISA::genMachineCheckFault(); delete mem_req; memReq[tid] = NULL; + warn("Bad address!\n"); } assert(retryPkt == NULL); assert(retryTid == -1); @@ -670,11 +671,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, // Get rid of the retrying packet if it was from this thread. if (retryTid == tid) { assert(cacheBlocked); - cacheBlocked = false; - retryTid = -1; - delete retryPkt->req; - delete retryPkt; + if (retryPkt) { + delete retryPkt->req; + delete retryPkt; + } retryPkt = NULL; + retryTid = -1; } fetchStatus[tid] = Squashing; @@ -1150,7 +1152,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) ///FIXME This needs to be more robust in dealing with delay slots #if !ISA_HAS_DELAY_SLOT - predicted_branch |= +// predicted_branch |= #endif lookupAndUpdateNextPC(instruction, next_PC, next_NPC); predicted_branch |= (next_PC != fetch_NPC); @@ -1221,7 +1223,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) // until commit handles the fault. The only other way it can // wake up is if a squash comes along and changes the PC. #if FULL_SYSTEM - assert(numInst != fetchWidth); + assert(numInst < fetchWidth); // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); // We will use a nop in order to carry the fault. diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index f24eaf2c4..4883e5a5c 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1153,19 +1153,6 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) inst->setCanCommit(); instQueue.insertBarrier(inst); add_to_iq = false; - } else if (inst->isNonSpeculative()) { - DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction " - "encountered, skipping.\n", tid); - - // Same as non-speculative stores. - inst->setCanCommit(); - - // Specifically insert it as nonspeculative. - instQueue.insertNonSpec(inst); - - ++iewDispNonSpecInsts; - - add_to_iq = false; } else if (inst->isNop()) { DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, " "skipping.\n", tid); @@ -1193,6 +1180,20 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) } else { add_to_iq = true; } + if (inst->isNonSpeculative()) { + DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction " + "encountered, skipping.\n", tid); + + // Same as non-speculative stores. + inst->setCanCommit(); + + // Specifically insert it as nonspeculative. + instQueue.insertNonSpec(inst); + + ++iewDispNonSpecInsts; + + add_to_iq = false; + } // If the instruction queue is not full, then add the // instruction. @@ -1379,6 +1380,7 @@ DefaultIEW<Impl>::executeInsts() predictedNotTakenIncorrect++; } } else if (ldstQueue.violation(tid)) { + assert(inst->isMemRef()); // If there was an ordering violation, then get the // DynInst that caused the violation. Note that this // clears the violation signal. @@ -1391,10 +1393,10 @@ DefaultIEW<Impl>::executeInsts() // Ensure the violating instruction is older than // current squash - if (fetchRedirect[tid] && - violator->seqNum >= toCommit->squashedSeqNum[tid]) +/* if (fetchRedirect[tid] && + violator->seqNum >= toCommit->squashedSeqNum[tid] + 1) continue; - +*/ fetchRedirect[tid] = true; // Tell the instruction queue that a violation has occured. @@ -1414,6 +1416,33 @@ DefaultIEW<Impl>::executeInsts() squashDueToMemBlocked(inst, tid); } + } else { + // Reset any state associated with redirects that will not + // be used. + if (ldstQueue.violation(tid)) { + assert(inst->isMemRef()); + + DynInstPtr violator = ldstQueue.getMemDepViolator(tid); + + DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + DPRINTF(IEW, "Violation will not be handled because " + "already squashing\n"); + + ++memOrderViolationEvents; + } + if (ldstQueue.loadBlocked(tid) && + !ldstQueue.isLoadBlockedHandled(tid)) { + DPRINTF(IEW, "Load operation couldn't execute because the " + "memory system is blocked. PC: %#x [sn:%lli]\n", + inst->readPC(), inst->seqNum); + DPRINTF(IEW, "Blocked load will not be handled because " + "already squashing\n"); + + ldstQueue.setLoadBlockedHandled(tid); + } + } } @@ -1563,6 +1592,7 @@ DefaultIEW<Impl>::tick() //DPRINTF(IEW,"NonspecInst from thread %i",tid); if (fromCommit->commitInfo[tid].uncached) { instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad); + fromCommit->commitInfo[tid].uncachedLoad->setAtCommit(); } else { instQueue.scheduleNonSpec( fromCommit->commitInfo[tid].nonSpecSeqNum); diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index d5781d89d..79e03d4bf 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -829,6 +829,8 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst) unsigned tid = (*inst_it).second->threadNumber; + (*inst_it).second->setAtCommit(); + (*inst_it).second->setCanIssue(); if (!(*inst_it).second->isMemRef()) { @@ -960,6 +962,8 @@ template <class Impl> void InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst) { + DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum); + resched_inst->clearCanIssue(); memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); } @@ -984,7 +988,6 @@ InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst) completed_inst->memOpDone = true; memDepUnit[tid].completed(completed_inst); - count[tid]--; } @@ -1084,16 +1087,21 @@ InstructionQueue<Impl>::doSquash(unsigned tid) ++iqSquashedOperandsExamined; } - } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) { + } else if (!squashed_inst->isStoreConditional() || + !squashed_inst->isCompleted()) { NonSpecMapIt ns_inst_it = nonSpecInsts.find(squashed_inst->seqNum); assert(ns_inst_it != nonSpecInsts.end()); + if (ns_inst_it == nonSpecInsts.end()) { + assert(squashed_inst->getFault() != NoFault); + } else { - (*ns_inst_it).second = NULL; + (*ns_inst_it).second = NULL; - nonSpecInsts.erase(ns_inst_it); + nonSpecInsts.erase(ns_inst_it); - ++iqSquashedNonSpecRemoved; + ++iqSquashedNonSpecRemoved; + } } // Might want to also clear out the head of the dependency graph. diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 2419afe29..1b10843f5 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -497,6 +497,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); ++lsqRescheduledLoads; + + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the proper + // place to really handle request deletes. + delete req; return TheISA::genMachineCheckFault(); } @@ -534,6 +539,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) if (store_size == 0) continue; + else if (storeQueue[store_idx].inst->uncacheable()) + continue; + + assert(storeQueue[store_idx].inst->effAddrValid); // Check if the store data is within the lower and upper bounds of // addresses that the request needs. @@ -550,7 +559,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) storeQueue[store_idx].inst->effAddr; // If the store's data has all of the data needed, we can forward. - if (store_has_lower_limit && store_has_upper_limit) { + if ((store_has_lower_limit && store_has_upper_limit)) { // Get shift amount for offset into the store's data. int shift_amt = req->getVaddr() & (store_size - 1); // @todo: Magic number, assumes byte addressing @@ -596,6 +605,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // If it's already been written back, then don't worry about // stalling on it. if (storeQueue[store_idx].completed) { + panic("Should not check one of these"); continue; } @@ -614,6 +624,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // rescheduled eventually iewStage->rescheduleMemInst(load_inst); iewStage->decrWb(load_inst->seqNum); + load_inst->clearIssued(); ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not @@ -622,7 +633,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) "Store idx %i to load addr %#x\n", store_idx, req->getVaddr()); - ++lsqBlockedLoads; + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the + // proper place to really handle request deletes. + delete req; + return NoFault; } } @@ -654,8 +669,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // Delete state and data packet because a load retry // initiates a pipeline restart; it does not retry. delete state; + delete data_pkt->req; delete data_pkt; + req = NULL; + if (result == Packet::BadAddress) { return TheISA::genMachineCheckFault(); } @@ -669,6 +687,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // If the cache was blocked, or has become blocked due to the access, // handle it. if (lsq->cacheBlocked()) { + if (req) + delete req; + ++lsqCacheBlocked; iewStage->decrWb(load_inst->seqNum); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 3ba22a530..e70c960b3 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -81,6 +81,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) if (isSwitchedOut() || inst->isSquashed()) { iewStage->decrWb(inst->seqNum); delete state; + delete pkt->req; delete pkt; return; } else { @@ -94,6 +95,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) } delete state; + delete pkt->req; delete pkt; } @@ -403,12 +405,15 @@ template <class Impl> Fault LSQUnit<Impl>::executeLoad(DynInstPtr &inst) { + using namespace TheISA; // Execute a specific load. Fault load_fault = NoFault; DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n", inst->readPC(),inst->seqNum); + assert(!inst->isSquashed()); + load_fault = inst->initiateAcc(); // If the instruction faulted, then we need to send it along to commit @@ -418,12 +423,44 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst) // realizes there is activity. // Mark it as executed unless it is an uncached load that // needs to hit the head of commit. - if (!(inst->req && inst->req->isUncacheable()) || + if (!(inst->hasRequest() && inst->uncacheable()) || inst->isAtCommit()) { inst->setExecuted(); } iewStage->instToCommit(inst); iewStage->activityThisCycle(); + } else if (!loadBlocked()) { + assert(inst->effAddrValid); + int load_idx = inst->lqIdx; + incrLdIdx(load_idx); + while (load_idx != loadTail) { + // Really only need to check loads that have actually executed + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + + // @todo: Fix this, magic number being used here + if (loadQueue[load_idx]->effAddrValid && + (loadQueue[load_idx]->effAddr >> 8) == + (inst->effAddr >> 8)) { + // A load incorrectly passed this load. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + DynInstPtr violator = loadQueue[load_idx]; + if (!memDepViolator || + (violator->seqNum < memDepViolator->seqNum)) { + memDepViolator = violator; + } else { + break; + } + + ++lsqMemOrderViolation; + + return genMachineCheckFault(); + } + + incrLdIdx(load_idx); + } } return load_fault; @@ -442,6 +479,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n", store_inst->readPC(), store_inst->seqNum); + assert(!store_inst->isSquashed()); + // Check the recently completed loads to see if any match this store's // address. If so, then we have a memory ordering violation. int load_idx = store_inst->lqIdx; @@ -465,32 +504,36 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) ++storesToWB; } - if (!memDepViolator) { - while (load_idx != loadTail) { - // Really only need to check loads that have actually executed - // It's safe to check all loads because effAddr is set to - // InvalAddr when the dyn inst is created. - - // @todo: For now this is extra conservative, detecting a - // violation if the addresses match assuming all accesses - // are quad word accesses. - - // @todo: Fix this, magic number being used here - if ((loadQueue[load_idx]->effAddr >> 8) == - (store_inst->effAddr >> 8)) { - // A load incorrectly passed this store. Squash and refetch. - // For now return a fault to show that it was unsuccessful. - memDepViolator = loadQueue[load_idx]; - ++lsqMemOrderViolation; - - return genMachineCheckFault(); + assert(store_inst->effAddrValid); + while (load_idx != loadTail) { + // Really only need to check loads that have actually executed + // It's safe to check all loads because effAddr is set to + // InvalAddr when the dyn inst is created. + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + + // @todo: Fix this, magic number being used here + if (loadQueue[load_idx]->effAddrValid && + (loadQueue[load_idx]->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + DynInstPtr violator = loadQueue[load_idx]; + if (!memDepViolator || + (violator->seqNum < memDepViolator->seqNum)) { + memDepViolator = violator; + } else { + break; } - incrLdIdx(load_idx); + ++lsqMemOrderViolation; + + return genMachineCheckFault(); } - // If we've reached this point, there was no violation. - memDepViolator = NULL; + incrLdIdx(load_idx); } return store_fault; @@ -660,7 +703,7 @@ LSQUnit<Impl>::writebackStores() panic("LSQ sent out a bad address for a completed store!"); } // Need to handle becoming blocked on a store. - DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will" + DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" "retry later\n", inst->seqNum); isStoreBlocked = true; @@ -735,6 +778,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) } } + if (memDepViolator && squashed_num < memDepViolator->seqNum) { + memDepViolator = NULL; + } + int store_idx = storeTail; decrStIdx(store_idx); @@ -764,6 +811,11 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the proper + // place to really handle request deletes. + delete storeQueue[store_idx].req; + storeQueue[store_idx].req = NULL; --stores; diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index f19980fd5..64558efaa 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -214,6 +214,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst) inst_entry->regsReady = true; } + // Clear the bit saying this instruction can issue. + inst->clearCanIssue(); + // Add this instruction to the list of dependents. store_entry->dependInsts.push_back(inst_entry); @@ -357,7 +360,6 @@ void MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst) { DynInstPtr temp_inst; - bool found_inst = false; // For now this replay function replays all waiting memory ops. while (!instsToReplay.empty()) { @@ -371,14 +373,8 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst) moveToReady(inst_entry); - if (temp_inst == inst) { - found_inst = true; - } - instsToReplay.pop_front(); } - - assert(found_inst); } template <class MemDepPred, class Impl> diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index 620daf691..b436ec1c3 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -192,8 +192,6 @@ SimpleRenameMap::rename(RegIndex arch_reg) // known that the prev reg was outside the range of normal registers // so the free list can avoid adding it. prev_reg = renamed_reg; - - assert(renamed_reg < numPhysicalRegs + numMiscRegs); } DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n", diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 4988df3c5..6e6ba2380 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -171,8 +171,12 @@ Bus::recvTiming(PacketPtr pkt) } short dest = pkt->getDest(); + + // Make sure to clear the snoop commit flag so it doesn't think an + // access has been handled twice. if (dest == Packet::Broadcast) { port = findPort(pkt->getAddr(), pkt->getSrc()); + pkt->flags &= ~SNOOP_COMMIT; if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) { bool success; diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 5c6ab0950..fc4660269 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -545,8 +545,13 @@ Cache<TagStore,Coherence>::access(PacketPtr &pkt) //We are determining prefetches on access stream, call prefetcher prefetcher->handleMiss(pkt, curTick); } + + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); + if (!pkt->req->isUncacheable()) { - blk = handleAccess(pkt, lat, writebacks); + if (!missQueue->findMSHR(blk_addr)) { + blk = handleAccess(pkt, lat, writebacks); + } } else { size = pkt->getSize(); } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 25b8fcbeb..24ca9cfa2 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -599,6 +599,7 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time) MemCmd cmd = mshr->getTarget()->cmd; mshr->pkt->setDest(Packet::Broadcast); mshr->pkt->result = Packet::Unknown; + mshr->pkt->req = mshr->getTarget()->req; mq.markPending(mshr, cmd); mshr->order = order++; cache->setMasterRequest(Request_MSHR, time); diff --git a/tests/configs/o3-timing.py b/tests/configs/o3-timing.py index a66cd436e..5600d9f22 100644 --- a/tests/configs/o3-timing.py +++ b/tests/configs/o3-timing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2006 The Regents of The University of Michigan +# Copyright (c) 2006-2007 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -37,7 +37,7 @@ class MyCache(BaseCache): mshrs = 10 tgts_per_mshr = 5 -cpu = DerivO3CPU() +cpu = DerivO3CPU(cpu_id=0) cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), MyCache(size = '2MB')) diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini new file mode 100644 index 000000000..73a28200e --- /dev/null +++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini @@ -0,0 +1,64 @@ +[root] +type=Root +children=system +dummy=0 + +[system] +type=System +children=cpu membus physmem +mem_mode=atomic +physmem=system.physmem + +[system.cpu] +type=AtomicSimpleCPU +children=workload +clock=1 +cpu_id=0 +defer_registration=false +function_trace=false +function_trace_start=0 +max_insts_all_threads=0 +max_insts_any_thread=0 +max_loads_all_threads=0 +max_loads_any_thread=0 +phase=0 +progress_interval=0 +simulate_stalls=false +system=system +width=1 +workload=system.cpu.workload +dcache_port=system.membus.port[2] +icache_port=system.membus.port[1] + +[system.cpu.workload] +type=LiveProcess +cmd=mcf mcf.in +cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic +egid=100 +env= +euid=100 +executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf +gid=100 +input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in +output=cout +pid=100 +ppid=99 +system=system +uid=100 + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port + +[system.physmem] +type=PhysicalMemory +file= +latency=1 +range=0:134217727 +zero=false +port=system.membus.port[0] + diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out new file mode 100644 index 000000000..2b86e6bfb --- /dev/null +++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out @@ -0,0 +1,57 @@ +[root] +type=Root +dummy=0 + +[system.physmem] +type=PhysicalMemory +file= +range=[0,134217727] +latency=1 +zero=false + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false + +[system.cpu.workload] +type=LiveProcess +cmd=mcf mcf.in +executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf +input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in +output=cout +env= +cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic +system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 + +[system.cpu] +type=AtomicSimpleCPU +max_insts_any_thread=0 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +progress_interval=0 +system=system +cpu_id=0 +workload=system.cpu.workload +clock=1 +phase=0 +defer_registration=false +width=1 +function_trace=false +function_trace_start=0 +simulate_stalls=false + diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt new file mode 100644 index 000000000..41e6bfc52 --- /dev/null +++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt @@ -0,0 +1,18 @@ + +---------- Begin Simulation Statistics ---------- +host_inst_rate 624449 # Simulator instruction rate (inst/s) +host_mem_usage 148644 # Number of bytes of host memory used +host_seconds 2753.78 # Real time elapsed on the host +host_tick_rate 624449 # Simulator tick rate (ticks/s) +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 1719594534 # Number of instructions simulated +sim_seconds 0.001720 # Number of seconds simulated +sim_ticks 1719594533 # Number of ticks simulated +system.cpu.idle_fraction 0 # Percentage of idle cycles +system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles +system.cpu.numCycles 1719594534 # number of cpu cycles simulated +system.cpu.num_insts 1719594534 # Number of instructions executed +system.cpu.num_refs 774793634 # Number of memory references +system.cpu.workload.PROG:num_syscalls 632 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stderr b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stderr new file mode 100644 index 000000000..9c09fd847 --- /dev/null +++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stderr @@ -0,0 +1,7 @@ +warn: More than two loadable segments in ELF object. +warn: Ignoring segment @ 0xa2000 length 0x10. +warn: More than two loadable segments in ELF object. +warn: Ignoring segment @ 0x0 length 0x0. +0: system.remote_gdb.listener: listening for remote gdb on port 7000 +warn: Entering event queue @ 0. Starting simulation... +warn: Ignoring request to flush register windows. diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout new file mode 100644 index 000000000..6711761e8 --- /dev/null +++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout @@ -0,0 +1,33 @@ + +MCF SPEC version 1.6.I +by Andreas Loebel +Copyright (c) 1998,1999 ZIB Berlin +All Rights Reserved. + +nodes : 1800 +active arcs : 8190 +simplex iterations : 6837 +flow value : 12860044181 +new implicit arcs : 300000 +active arcs : 308190 +simplex iterations : 11843 +flow value : 9360043604 +new implicit arcs : 22787 +active arcs : 330977 +simplex iterations : 11931 +flow value : 9360043512 +checksum : 798014 +optimal +M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Mar 23 2007 22:37:06 +M5 started Fri Mar 23 22:37:22 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic tests/run.py long/10.mcf/sparc/linux/simple-atomic +Global frequency set at 1000000000000 ticks per second +Exiting @ tick 1719594533 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini index 2296e2545..cc4477d68 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini @@ -1,48 +1,7 @@ [root] type=Root children=system -checkpoint= -clock=1000000000000 -max_tick=0 -output_file=cout -progress_interval=0 - -[exetrace] -intel_format=false -legion_lockstep=false -pc_symbol=true -print_cpseq=false -print_cycle=true -print_data=true -print_effaddr=true -print_fetchseq=false -print_iregs=false -print_opclass=true -print_thread=true -speculative=true -trace_system=client - -[serialize] -count=10 -cycle=0 -dir=cpt.%012d -period=0 - -[stats] -descriptions=true -dump_cycle=0 -dump_period=0 -dump_reset=false -ignore_events= -mysql_db= -mysql_host= -mysql_password= -mysql_user= -project_name=test -simulation_name=test -simulation_sample=0 -text_compat=true -text_file=m5stats.txt +dummy=0 [system] type=System @@ -70,6 +29,7 @@ commitToFetchDelay=1 commitToIEWDelay=1 commitToRenameDelay=1 commitWidth=8 +cpu_id=0 decodeToFetchDelay=1 decodeToRenameDelay=1 decodeWidth=8 @@ -417,12 +377,3 @@ range=0:134217727 zero=false port=system.membus.port[0] -[trace] -bufsize=0 -cycle=0 -dump_on_exit=false -file=cout -flags= -ignore= -start=0 - diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out index 1b1b58f1b..f50559125 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out @@ -1,9 +1,6 @@ [root] type=Root -clock=1000000000000 -max_tick=0 -progress_interval=0 -output_file=cout +dummy=0 [system.physmem] type=PhysicalMemory @@ -173,6 +170,7 @@ type=DerivO3CPU clock=1 phase=0 numThreads=1 +cpu_id=0 activity=0 workload=system.cpu.workload checker=null @@ -367,51 +365,3 @@ clock=1000 width=64 responder_set=false -[trace] -flags= -start=0 -cycle=0 -bufsize=0 -file=cout -dump_on_exit=false -ignore= - -[stats] -descriptions=true -project_name=test -simulation_name=test -simulation_sample=0 -text_file=m5stats.txt -text_compat=true -mysql_db= -mysql_user= -mysql_password= -mysql_host= -events_start=-1 -dump_reset=false -dump_cycle=0 -dump_period=0 -ignore_events= - -[random] -seed=1 - -[exetrace] -speculative=true -print_cycle=true -print_opclass=true -print_thread=true -print_effaddr=true -print_data=true -print_iregs=false -print_fetchseq=false -print_cpseq=false -print_reg_delta=false -pc_symbol=true -intel_format=false -legion_lockstep=false -trace_system=client - -[statsreset] -reset_cycle=0 - diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt index 4e3fdbcd2..4b323618c 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt @@ -1,40 +1,40 @@ ---------- Begin Simulation Statistics ---------- global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. -global.BPredUnit.BTBHits 675 # Number of BTB hits -global.BPredUnit.BTBLookups 2343 # Number of BTB lookups +global.BPredUnit.BTBHits 669 # Number of BTB hits +global.BPredUnit.BTBLookups 2338 # Number of BTB lookups global.BPredUnit.RASInCorrect 76 # Number of incorrect RAS predictions. global.BPredUnit.condIncorrect 437 # Number of conditional branches incorrect -global.BPredUnit.condPredicted 1563 # Number of conditional branches predicted -global.BPredUnit.lookups 5229 # Number of BP lookups +global.BPredUnit.condPredicted 1559 # Number of conditional branches predicted +global.BPredUnit.lookups 5224 # Number of BP lookups global.BPredUnit.usedRAS 2821 # Number of times the RAS was used to get a target. -host_inst_rate 11609 # Simulator instruction rate (inst/s) -host_mem_usage 177052 # Number of bytes of host memory used -host_seconds 0.48 # Real time elapsed on the host -host_tick_rate 2887871 # Simulator tick rate (ticks/s) -memdepunit.memDep.conflictingLoads 23 # Number of conflicting loads. -memdepunit.memDep.conflictingStores 117 # Number of conflicting stores. -memdepunit.memDep.insertedLoads 3775 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedStores 3734 # Number of stores inserted to the mem dependence unit. +host_inst_rate 12539 # Simulator instruction rate (inst/s) +host_mem_usage 156028 # Number of bytes of host memory used +host_seconds 0.45 # Real time elapsed on the host +host_tick_rate 3120138 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 24 # Number of conflicting loads. +memdepunit.memDep.conflictingStores 12 # Number of conflicting stores. +memdepunit.memDep.insertedLoads 3770 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 3723 # Number of stores inserted to the mem dependence unit. sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5623 # Number of instructions simulated sim_seconds 0.000001 # Number of seconds simulated -sim_ticks 1400135 # Number of ticks simulated +sim_ticks 1400134 # Number of ticks simulated system.cpu.commit.COM:branches 862 # Number of branches committed -system.cpu.commit.COM:bw_lim_events 97 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_lim_events 101 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 51243 +system.cpu.commit.COM:committed_per_cycle.samples 52214 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 48519 9468.42% - 1 1590 310.29% - 2 483 94.26% - 3 227 44.30% - 4 131 25.56% - 5 104 20.30% - 6 61 11.90% - 7 31 6.05% - 8 97 18.93% + 0 49499 9480.02% + 1 1576 301.83% + 2 483 92.50% + 3 233 44.62% + 4 133 25.47% + 5 102 19.53% + 6 60 11.49% + 7 27 5.17% + 8 101 19.34% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -46,66 +46,66 @@ system.cpu.commit.COM:swp_count 0 # Nu system.cpu.commit.branchMispredicts 368 # The number of times a branch was mispredicted system.cpu.commit.commitCommittedInsts 5640 # The number of committed instructions system.cpu.commit.commitNonSpecStalls 17 # The number of times commit has been forced to stall to communicate backwards -system.cpu.commit.commitSquashedInsts 13830 # The number of squashed insts skipped by commit +system.cpu.commit.commitSquashedInsts 13804 # The number of squashed insts skipped by commit system.cpu.committedInsts 5623 # Number of Instructions Simulated system.cpu.committedInsts_total 5623 # Number of Instructions Simulated -system.cpu.cpi 249.001423 # CPI: Cycles Per Instruction -system.cpu.cpi_total 249.001423 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 1600 # number of ReadReq accesses(hits+misses) +system.cpu.cpi 249.001245 # CPI: Cycles Per Instruction +system.cpu.cpi_total 249.001245 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 1596 # number of ReadReq accesses(hits+misses) system.cpu.dcache.ReadReq_avg_miss_latency 6986.684848 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 6882.626263 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 1435 # number of ReadReq hits +system.cpu.dcache.ReadReq_hits 1431 # number of ReadReq hits system.cpu.dcache.ReadReq_miss_latency 1152803 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.103125 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_miss_rate 0.103383 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 165 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 66 # number of ReadReq MSHR hits system.cpu.dcache.ReadReq_mshr_miss_latency 681380 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.061875 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_miss_rate 0.062030 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 99 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 812 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 5293.047244 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 5141.082192 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 5293.200787 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 5141.095890 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 558 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 1344434 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 1344473 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.312808 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 254 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_hits 181 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_miss_latency 375299 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 375300 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.089901 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 73 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked -system.cpu.dcache.avg_blocked_cycles_no_targets 3366.651163 # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 11.587209 # Average number of references to valid blocks. +system.cpu.dcache.avg_blocked_cycles_no_targets 3366.930233 # average number of cycles each access was blocked +system.cpu.dcache.avg_refs 11.563953 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 43 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked -system.cpu.dcache.blocked_cycles_no_targets 144766 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_targets 144778 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 2412 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 5959.992840 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 6143.482558 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1993 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 2497237 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.173715 # miss rate for demand accesses +system.cpu.dcache.demand_accesses 2408 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 5960.085919 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 6143.488372 # average overall mshr miss latency +system.cpu.dcache.demand_hits 1989 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 2497276 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.174003 # miss rate for demand accesses system.cpu.dcache.demand_misses 419 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 247 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 1056679 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.071310 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_latency 1056680 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.071429 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 172 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 2412 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 5959.992840 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 6143.482558 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 2408 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 5960.085919 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 6143.488372 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1993 # number of overall hits -system.cpu.dcache.overall_miss_latency 2497237 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.173715 # miss rate for overall accesses +system.cpu.dcache.overall_hits 1989 # number of overall hits +system.cpu.dcache.overall_miss_latency 2497276 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.174003 # miss rate for overall accesses system.cpu.dcache.overall_misses 419 # number of overall misses system.cpu.dcache.overall_mshr_hits 247 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 1056679 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.071310 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_latency 1056680 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.071429 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 172 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -121,88 +121,88 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 172 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 101.349720 # Cycle average of tags in use -system.cpu.dcache.total_refs 1993 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 101.349670 # Cycle average of tags in use +system.cpu.dcache.total_refs 1989 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.decode.DECODE:BlockedCycles 17501 # Number of cycles decode is blocked system.cpu.decode.DECODE:BranchMispred 70 # Number of times decode detected a branch misprediction -system.cpu.decode.DECODE:BranchResolved 168 # Number of times decode resolved a branch -system.cpu.decode.DECODE:DecodedInsts 29666 # Number of instructions handled by decode -system.cpu.decode.DECODE:IdleCycles 28130 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 5553 # Number of cycles decode is running -system.cpu.decode.DECODE:SquashCycles 2529 # Number of cycles decode is squashing +system.cpu.decode.DECODE:BranchResolved 167 # Number of times decode resolved a branch +system.cpu.decode.DECODE:DecodedInsts 29609 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 29114 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 5540 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 2527 # Number of cycles decode is squashing system.cpu.decode.DECODE:SquashedInsts 200 # Number of squashed instructions handled by decode system.cpu.decode.DECODE:UnblockCycles 60 # Number of cycles decode is unblocking -system.cpu.fetch.Branches 5229 # Number of branches that fetch encountered -system.cpu.fetch.CacheLines 6371 # Number of cache lines fetched -system.cpu.fetch.Cycles 13322 # Number of cycles fetch has run and was not squashing or blocked -system.cpu.fetch.IcacheSquashes 296 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 35572 # Number of instructions fetch has processed +system.cpu.fetch.Branches 5224 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 6367 # Number of cache lines fetched +system.cpu.fetch.Cycles 13308 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 295 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 35526 # Number of instructions fetch has processed system.cpu.fetch.SquashCycles 2057 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.097242 # Number of branch fetches per cycle -system.cpu.fetch.icacheStallCycles 6371 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.predictedBranches 3496 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 0.661522 # Number of inst fetches per cycle +system.cpu.fetch.branchRate 0.095429 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 7360 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 3490 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 0.648972 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 53773 +system.cpu.fetch.rateDist.samples 54742 system.cpu.fetch.rateDist.min_value 0 - 0 46825 8707.90% - 1 199 37.01% - 2 504 93.73% - 3 1429 265.75% - 4 1462 271.88% - 5 245 45.56% - 6 322 59.88% - 7 1223 227.44% - 8 1564 290.85% + 0 47805 8732.78% + 1 199 36.35% + 2 500 91.34% + 3 1426 260.49% + 4 1459 266.52% + 5 244 44.57% + 6 327 59.73% + 7 1225 223.78% + 8 1557 284.43% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist -system.cpu.icache.ReadReq_accesses 6370 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 5088.614350 # average ReadReq miss latency +system.cpu.icache.ReadReq_accesses 6366 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 5085.923937 # average ReadReq miss latency system.cpu.icache.ReadReq_avg_mshr_miss_latency 4278.032258 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 5924 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 2269522 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.070016 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 446 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 136 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_hits 5919 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 2273408 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.070217 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 447 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 137 # number of ReadReq MSHR hits system.cpu.icache.ReadReq_mshr_miss_latency 1326190 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.048666 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_miss_rate 0.048696 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 310 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked -system.cpu.icache.avg_blocked_cycles_no_targets 3444.375000 # average number of cycles each access was blocked -system.cpu.icache.avg_refs 19.109677 # Average number of references to valid blocks. +system.cpu.icache.avg_blocked_cycles_no_targets 3443.500000 # average number of cycles each access was blocked +system.cpu.icache.avg_refs 19.093548 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 8 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked -system.cpu.icache.blocked_cycles_no_targets 27555 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_targets 27548 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 6370 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 5088.614350 # average overall miss latency +system.cpu.icache.demand_accesses 6366 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 5085.923937 # average overall miss latency system.cpu.icache.demand_avg_mshr_miss_latency 4278.032258 # average overall mshr miss latency -system.cpu.icache.demand_hits 5924 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 2269522 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.070016 # miss rate for demand accesses -system.cpu.icache.demand_misses 446 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 136 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_hits 5919 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 2273408 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.070217 # miss rate for demand accesses +system.cpu.icache.demand_misses 447 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 137 # number of demand (read+write) MSHR hits system.cpu.icache.demand_mshr_miss_latency 1326190 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.048666 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_miss_rate 0.048696 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 310 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 6370 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 5088.614350 # average overall miss latency +system.cpu.icache.overall_accesses 6366 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 5085.923937 # average overall miss latency system.cpu.icache.overall_avg_mshr_miss_latency 4278.032258 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 5924 # number of overall hits -system.cpu.icache.overall_miss_latency 2269522 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.070016 # miss rate for overall accesses -system.cpu.icache.overall_misses 446 # number of overall misses -system.cpu.icache.overall_mshr_hits 136 # number of overall MSHR hits +system.cpu.icache.overall_hits 5919 # number of overall hits +system.cpu.icache.overall_miss_latency 2273408 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.070217 # miss rate for overall accesses +system.cpu.icache.overall_misses 447 # number of overall misses +system.cpu.icache.overall_mshr_hits 137 # number of overall MSHR hits system.cpu.icache.overall_mshr_miss_latency 1326190 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.048666 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_miss_rate 0.048696 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 310 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -218,59 +218,59 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 310 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 147.070827 # Cycle average of tags in use -system.cpu.icache.total_refs 5924 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 147.070711 # Cycle average of tags in use +system.cpu.icache.total_refs 5919 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 1346363 # Total number of cycles that the CPU has spent unscheduled due to idling -system.cpu.iew.EXEC:branches 2364 # Number of branches executed +system.cpu.idleCycles 1345393 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 2362 # Number of branches executed system.cpu.iew.EXEC:nop 48 # number of nop insts executed -system.cpu.iew.EXEC:rate 0.251650 # Inst execution rate -system.cpu.iew.EXEC:refs 5460 # number of memory reference insts executed -system.cpu.iew.EXEC:stores 2123 # Number of stores executed +system.cpu.iew.EXEC:rate 0.247123 # Inst execution rate +system.cpu.iew.EXEC:refs 5464 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 2131 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed system.cpu.iew.WB:consumers 6466 # num instructions consuming a value -system.cpu.iew.WB:count 11620 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 0.798639 # average fanout of values written-back +system.cpu.iew.WB:count 11625 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.798948 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 5164 # num instructions producing a value -system.cpu.iew.WB:rate 0.216094 # insts written-back per cycle -system.cpu.iew.WB:sent 11692 # cumulative count of insts sent to commit +system.cpu.iew.WB:producers 5166 # num instructions producing a value +system.cpu.iew.WB:rate 0.212360 # insts written-back per cycle +system.cpu.iew.WB:sent 11698 # cumulative count of insts sent to commit system.cpu.iew.branchMispredicts 401 # Number of branch mispredicts detected at execute system.cpu.iew.iewBlockCycles 7230 # Number of cycles IEW is blocking -system.cpu.iew.iewDispLoadInsts 3775 # Number of dispatched load instructions +system.cpu.iew.iewDispLoadInsts 3770 # Number of dispatched load instructions system.cpu.iew.iewDispNonSpecInsts 24 # Number of dispatched non-speculative instructions -system.cpu.iew.iewDispSquashedInsts 2557 # Number of squashed instructions skipped by dispatch -system.cpu.iew.iewDispStoreInsts 3734 # Number of dispatched store instructions -system.cpu.iew.iewDispatchedInsts 19465 # Number of instructions dispatched to IQ -system.cpu.iew.iewExecLoadInsts 3337 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 308 # Number of squashed instructions skipped in execute -system.cpu.iew.iewExecutedInsts 13532 # Number of executed instructions +system.cpu.iew.iewDispSquashedInsts 2547 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 3723 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 19439 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 3333 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 305 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 13528 # Number of executed instructions system.cpu.iew.iewIQFullEvents 10 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle system.cpu.iew.iewLSQFullEvents 1 # Number of times the LSQ has become full, causing a stall -system.cpu.iew.iewSquashCycles 2529 # Number of cycles IEW is squashing +system.cpu.iew.iewSquashCycles 2527 # Number of cycles IEW is squashing system.cpu.iew.iewUnblockCycles 39 # Number of cycles IEW is unblocking -system.cpu.iew.lsq.thread.0.blockedLoads 1 # Number of blocked loads due to partial load-store forwarding +system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.0.cacheBlocked 1656 # Number of times an access to memory failed due to the cache being blocked system.cpu.iew.lsq.thread.0.forwLoads 81 # Number of loads that had data forwarded from stores system.cpu.iew.lsq.thread.0.ignoredResponses 3 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address -system.cpu.iew.lsq.thread.0.memOrderViolation 40 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.memOrderViolation 61 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.0.squashedLoads 2796 # Number of loads squashed -system.cpu.iew.lsq.thread.0.squashedStores 2922 # Number of stores squashed -system.cpu.iew.memOrderViolationEvents 40 # Number of memory order violations +system.cpu.iew.lsq.thread.0.squashedLoads 2791 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 2911 # Number of stores squashed +system.cpu.iew.memOrderViolationEvents 61 # Number of memory order violations system.cpu.iew.predictedNotTakenIncorrect 279 # Number of branches that were predicted not taken incorrectly system.cpu.iew.predictedTakenIncorrect 122 # Number of branches that were predicted taken incorrectly system.cpu.ipc 0.004016 # IPC: Instructions Per Cycle system.cpu.ipc_total 0.004016 # IPC: Total IPC of All Threads -system.cpu.iq.ISSUE:FU_type_0 13840 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0 13833 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist (null) 2 0.01% # Type of FU issued - IntAlu 8249 59.60% # Type of FU issued + IntAlu 8240 59.57% # Type of FU issued IntMult 1 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 2 0.01% # Type of FU issued @@ -279,16 +279,16 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 3432 24.80% # Type of FU issued - MemWrite 2154 15.56% # Type of FU issued + MemRead 3428 24.78% # Type of FU issued + MemWrite 2160 15.61% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.end_dist -system.cpu.iq.ISSUE:fu_busy_cnt 86 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.006214 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_cnt 87 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.006289 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full.start_dist (null) 0 0.00% # attempts to use FU when none available - IntAlu 1 1.16% # attempts to use FU when none available + IntAlu 1 1.15% # attempts to use FU when none available IntMult 0 0.00% # attempts to use FU when none available IntDiv 0 0.00% # attempts to use FU when none available FloatAdd 0 0.00% # attempts to use FU when none available @@ -297,38 +297,38 @@ system.cpu.iq.ISSUE:fu_full.start_dist FloatMult 0 0.00% # attempts to use FU when none available FloatDiv 0 0.00% # attempts to use FU when none available FloatSqrt 0 0.00% # attempts to use FU when none available - MemRead 53 61.63% # attempts to use FU when none available - MemWrite 32 37.21% # attempts to use FU when none available + MemRead 54 62.07% # attempts to use FU when none available + MemWrite 32 36.78% # attempts to use FU when none available IprAccess 0 0.00% # attempts to use FU when none available InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 53773 +system.cpu.iq.ISSUE:issued_per_cycle.samples 54742 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 46903 8722.41% - 1 3262 606.62% - 2 1316 244.73% - 3 1665 309.63% - 4 333 61.93% - 5 188 34.96% - 6 73 13.58% - 7 23 4.28% - 8 10 1.86% + 0 47874 8745.39% + 1 3270 597.35% + 2 1302 237.84% + 3 1673 305.62% + 4 327 59.73% + 5 188 34.34% + 6 75 13.70% + 7 22 4.02% + 8 11 2.01% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 0.257378 # Inst issue rate -system.cpu.iq.iqInstsAdded 19393 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 13840 # Number of instructions issued +system.cpu.iq.ISSUE:rate 0.252694 # Inst issue rate +system.cpu.iq.iqInstsAdded 19367 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 13833 # Number of instructions issued system.cpu.iq.iqNonSpecInstsAdded 24 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 13381 # Number of squashed instructions iterated over during squash; mainly for profiling -system.cpu.iq.iqSquashedInstsIssued 72 # Number of squashed instructions issued +system.cpu.iq.iqSquashedInstsExamined 13339 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 73 # Number of squashed instructions issued system.cpu.iq.iqSquashedNonSpecRemoved 7 # Number of squashed non-spec instructions that were removed -system.cpu.iq.iqSquashedOperandsExamined 9575 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.iq.iqSquashedOperandsExamined 9527 # Number of squashed operands that are examined and possibly removed from graph system.cpu.l2cache.ReadReq_accesses 480 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 4520.691667 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 4520.693750 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2303.372917 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 2169932 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 2169933 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 480 # number of ReadReq misses system.cpu.l2cache.ReadReq_mshr_miss_latency 1105619 # number of ReadReq MSHR miss cycles @@ -343,10 +343,10 @@ system.cpu.l2cache.blocked_cycles_no_mshrs 0 # system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 480 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 4520.691667 # average overall miss latency +system.cpu.l2cache.demand_avg_miss_latency 4520.693750 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 2303.372917 # average overall mshr miss latency system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 2169932 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 2169933 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses system.cpu.l2cache.demand_misses 480 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits @@ -357,11 +357,11 @@ system.cpu.l2cache.fast_writes 0 # nu system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 480 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 4520.691667 # average overall miss latency +system.cpu.l2cache.overall_avg_miss_latency 4520.693750 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 2303.372917 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 2169932 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 2169933 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses system.cpu.l2cache.overall_misses 480 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits @@ -382,27 +382,27 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 480 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 248.469634 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 248.469469 # Cycle average of tags in use system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 53773 # number of cpu cycles simulated -system.cpu.rename.RENAME:BlockCycles 7860 # Number of cycles rename is blocking +system.cpu.numCycles 54742 # number of cpu cycles simulated +system.cpu.rename.RENAME:BlockCycles 7851 # Number of cycles rename is blocking system.cpu.rename.RENAME:CommittedMaps 4051 # Number of HB maps that are committed system.cpu.rename.RENAME:IQFullEvents 2 # Number of times rename has blocked due to IQ full -system.cpu.rename.RENAME:IdleCycles 28280 # Number of cycles rename is idle -system.cpu.rename.RENAME:LSQFullEvents 453 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:IdleCycles 29263 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 458 # Number of times rename has blocked due to LSQ full system.cpu.rename.RENAME:ROBFullEvents 8 # Number of times rename has blocked due to ROB full -system.cpu.rename.RENAME:RenameLookups 36016 # Number of register rename lookups that rename has made -system.cpu.rename.RENAME:RenamedInsts 29203 # Number of instructions processed by rename -system.cpu.rename.RENAME:RenamedOperands 20142 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 5460 # Number of cycles rename is running -system.cpu.rename.RENAME:SquashCycles 2529 # Number of cycles rename is squashing -system.cpu.rename.RENAME:UnblockCycles 483 # Number of cycles rename is unblocking -system.cpu.rename.RENAME:UndoneMaps 16091 # Number of HB maps that are undone due to squashing -system.cpu.rename.RENAME:serializeStallCycles 9161 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:RenameLookups 35953 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 29156 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 20115 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 5451 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 2527 # Number of cycles rename is squashing +system.cpu.rename.RENAME:UnblockCycles 486 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 16064 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 9164 # count of cycles rename stalled for serializing inst system.cpu.rename.RENAME:serializingInsts 27 # count of serializing insts renamed -system.cpu.rename.RENAME:skidInsts 828 # count of insts added to the skid buffer +system.cpu.rename.RENAME:skidInsts 831 # count of insts added to the skid buffer system.cpu.rename.RENAME:tempSerializingInsts 21 # count of temporary serializing insts renamed system.cpu.timesIdled 369 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload.PROG:num_syscalls 17 # Number of system calls diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr index eb1796ead..684350ff9 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr @@ -1,2 +1,3 @@ -0: system.remote_gdb.listener: listening for remote gdb on port 7000 +0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 warn: Entering event queue @ 0. Starting simulation... +warn: Increasing stack size by one page. diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout index 511bc594d..cbdc4ee25 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout @@ -6,8 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jan 22 2007 23:06:52 -M5 started Mon Jan 22 23:06:54 2007 -M5 executing on ewok -command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing -Exiting @ tick 1400135 because target called exit() +M5 compiled Mar 24 2007 13:51:02 +M5 started Sat Mar 24 13:51:12 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing +Global frequency set at 1000000000000 ticks per second +Exiting @ tick 1400134 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini index db88e7673..ea499f4f1 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini @@ -1,48 +1,7 @@ [root] type=Root children=system -checkpoint= -clock=1000000000000 -max_tick=0 -output_file=cout -progress_interval=0 - -[exetrace] -intel_format=false -legion_lockstep=false -pc_symbol=true -print_cpseq=false -print_cycle=true -print_data=true -print_effaddr=true -print_fetchseq=false -print_iregs=false -print_opclass=true -print_thread=true -speculative=true -trace_system=client - -[serialize] -count=10 -cycle=0 -dir=cpt.%012d -period=0 - -[stats] -descriptions=true -dump_cycle=0 -dump_period=0 -dump_reset=false -ignore_events= -mysql_db= -mysql_host= -mysql_password= -mysql_user= -project_name=test -simulation_name=test -simulation_sample=0 -text_compat=true -text_file=m5stats.txt +dummy=0 [system] type=System @@ -70,6 +29,7 @@ commitToFetchDelay=1 commitToIEWDelay=1 commitToRenameDelay=1 commitWidth=8 +cpu_id=0 decodeToFetchDelay=1 decodeToRenameDelay=1 decodeWidth=8 @@ -417,12 +377,3 @@ range=0:134217727 zero=false port=system.membus.port[0] -[trace] -bufsize=0 -cycle=0 -dump_on_exit=false -file=cout -flags= -ignore= -start=0 - diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out index 9ee1931ca..6672039dd 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out @@ -1,9 +1,6 @@ [root] type=Root -clock=1000000000000 -max_tick=0 -progress_interval=0 -output_file=cout +dummy=0 [system.physmem] type=PhysicalMemory @@ -173,6 +170,7 @@ type=DerivO3CPU clock=1 phase=0 numThreads=1 +cpu_id=0 activity=0 workload=system.cpu.workload checker=null @@ -367,51 +365,3 @@ clock=1000 width=64 responder_set=false -[trace] -flags= -start=0 -cycle=0 -bufsize=0 -file=cout -dump_on_exit=false -ignore= - -[stats] -descriptions=true -project_name=test -simulation_name=test -simulation_sample=0 -text_file=m5stats.txt -text_compat=true -mysql_db= -mysql_user= -mysql_password= -mysql_host= -events_start=-1 -dump_reset=false -dump_cycle=0 -dump_period=0 -ignore_events= - -[random] -seed=1 - -[exetrace] -speculative=true -print_cycle=true -print_opclass=true -print_thread=true -print_effaddr=true -print_data=true -print_iregs=false -print_fetchseq=false -print_cpseq=false -print_reg_delta=false -pc_symbol=true -intel_format=false -legion_lockstep=false -trace_system=client - -[statsreset] -reset_cycle=0 - diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt index 3aae57d12..f855ff850 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt @@ -8,10 +8,10 @@ global.BPredUnit.condIncorrect 218 # Nu global.BPredUnit.condPredicted 459 # Number of conditional branches predicted global.BPredUnit.lookups 898 # Number of BP lookups global.BPredUnit.usedRAS 171 # Number of times the RAS was used to get a target. -host_inst_rate 22132 # Simulator instruction rate (inst/s) -host_mem_usage 176684 # Number of bytes of host memory used -host_seconds 0.11 # Real time elapsed on the host -host_tick_rate 6945216 # Simulator tick rate (ticks/s) +host_inst_rate 12517 # Simulator instruction rate (inst/s) +host_mem_usage 155528 # Number of bytes of host memory used +host_seconds 0.19 # Real time elapsed on the host +host_tick_rate 3937113 # Simulator tick rate (ticks/s) memdepunit.memDep.conflictingLoads 10 # Number of conflicting loads. memdepunit.memDep.conflictingStores 8 # Number of conflicting stores. memdepunit.memDep.insertedLoads 783 # Number of loads inserted to the mem dependence unit. @@ -26,14 +26,14 @@ system.cpu.commit.COM:bw_limited 0 # nu system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle system.cpu.commit.COM:committed_per_cycle.samples 28200 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 27270 9670.21% - 1 239 84.75% - 2 332 117.73% + 0 27273 9671.28% + 1 240 85.11% + 2 328 116.31% 3 127 45.04% - 4 83 29.43% + 4 80 28.37% 5 54 19.15% - 6 26 9.22% - 7 18 6.38% + 6 28 9.93% + 7 19 6.74% 8 51 18.09% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -52,14 +52,14 @@ system.cpu.committedInsts_total 2387 # Nu system.cpu.cpi 315.051529 # CPI: Cycles Per Instruction system.cpu.cpi_total 315.051529 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 560 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 7231.967391 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 7288.377049 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_miss_latency 7232.163043 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 7288.491803 # average ReadReq mshr miss latency system.cpu.dcache.ReadReq_hits 468 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 665341 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency 665359 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.164286 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 92 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 31 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 444591 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency 444598 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.108929 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 61 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 294 # number of WriteReq accesses(hits+misses) @@ -74,37 +74,37 @@ system.cpu.dcache.WriteReq_mshr_miss_latency 157720 system.cpu.dcache.WriteReq_mshr_miss_rate 0.081633 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 24 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked -system.cpu.dcache.avg_blocked_cycles_no_targets 2980 # average number of cycles each access was blocked +system.cpu.dcache.avg_blocked_cycles_no_targets 2980.375000 # average number of cycles each access was blocked system.cpu.dcache.avg_refs 8.141176 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 8 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked -system.cpu.dcache.blocked_cycles_no_targets 23840 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_targets 23843 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 854 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 6979.500000 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 7086.011765 # average overall mshr miss latency +system.cpu.dcache.demand_avg_miss_latency 6979.611111 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 7086.094118 # average overall mshr miss latency system.cpu.dcache.demand_hits 692 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 1130679 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 1130697 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.189696 # miss rate for demand accesses system.cpu.dcache.demand_misses 162 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 77 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 602311 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 602318 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.099532 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 85 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.overall_accesses 854 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 6979.500000 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 7086.011765 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 6979.611111 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 7086.094118 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.dcache.overall_hits 692 # number of overall hits -system.cpu.dcache.overall_miss_latency 1130679 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 1130697 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.189696 # miss rate for overall accesses system.cpu.dcache.overall_misses 162 # number of overall misses system.cpu.dcache.overall_mshr_hits 77 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 602311 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 602318 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.099532 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 85 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -125,18 +125,18 @@ system.cpu.dcache.tagsinuse 46.684988 # Cy system.cpu.dcache.total_refs 692 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks -system.cpu.decode.DECODE:BlockedCycles 21865 # Number of cycles decode is blocked +system.cpu.decode.DECODE:BlockedCycles 21870 # Number of cycles decode is blocked system.cpu.decode.DECODE:BranchMispred 79 # Number of times decode detected a branch misprediction system.cpu.decode.DECODE:BranchResolved 150 # Number of times decode resolved a branch system.cpu.decode.DECODE:DecodedInsts 4900 # Number of instructions handled by decode system.cpu.decode.DECODE:IdleCycles 5406 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 928 # Number of cycles decode is running +system.cpu.decode.DECODE:RunCycles 923 # Number of cycles decode is running system.cpu.decode.DECODE:SquashCycles 336 # Number of cycles decode is squashing system.cpu.decode.DECODE:SquashedInsts 286 # Number of squashed instructions handled by decode system.cpu.decode.DECODE:UnblockCycles 2 # Number of cycles decode is unblocking system.cpu.fetch.Branches 898 # Number of branches that fetch encountered system.cpu.fetch.CacheLines 813 # Number of cache lines fetched -system.cpu.fetch.Cycles 1774 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.Cycles 1769 # Number of cycles fetch has run and was not squashing or blocked system.cpu.fetch.IcacheSquashes 146 # Number of outstanding Icache misses that were squashed system.cpu.fetch.Insts 5593 # Number of instructions fetch has processed system.cpu.fetch.SquashCycles 258 # Number of cycles fetch has spent squashing @@ -147,27 +147,27 @@ system.cpu.fetch.rate 0.195991 # Nu system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) system.cpu.fetch.rateDist.samples 28537 system.cpu.fetch.rateDist.min_value 0 - 0 27576 9663.24% + 0 27581 9665.00% 1 50 17.52% - 2 92 32.24% - 3 74 25.93% - 4 117 41.00% - 5 71 24.88% - 6 43 15.07% + 2 84 29.44% + 3 78 27.33% + 4 118 41.35% + 5 67 23.48% + 6 41 14.37% 7 56 19.62% - 8 458 160.49% + 8 462 161.90% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist system.cpu.icache.ReadReq_accesses 813 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 4955.450199 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 4151.809783 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_avg_miss_latency 4955.454183 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 4151.815217 # average ReadReq mshr miss latency system.cpu.icache.ReadReq_hits 562 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 1243818 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency 1243819 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.308733 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 251 # number of ReadReq misses system.cpu.icache.ReadReq_mshr_hits 67 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 763933 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency 763934 # number of ReadReq MSHR miss cycles system.cpu.icache.ReadReq_mshr_miss_rate 0.226322 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 184 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked @@ -179,29 +179,29 @@ system.cpu.icache.blocked_cycles_no_mshrs 0 # n system.cpu.icache.blocked_cycles_no_targets 13780 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed system.cpu.icache.demand_accesses 813 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 4955.450199 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 4151.809783 # average overall mshr miss latency +system.cpu.icache.demand_avg_miss_latency 4955.454183 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 4151.815217 # average overall mshr miss latency system.cpu.icache.demand_hits 562 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 1243818 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency 1243819 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.308733 # miss rate for demand accesses system.cpu.icache.demand_misses 251 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 67 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 763933 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 763934 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate 0.226322 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 184 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.icache.overall_accesses 813 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 4955.450199 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 4151.809783 # average overall mshr miss latency +system.cpu.icache.overall_avg_miss_latency 4955.454183 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 4151.815217 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.icache.overall_hits 562 # number of overall hits -system.cpu.icache.overall_miss_latency 1243818 # number of overall miss cycles +system.cpu.icache.overall_miss_latency 1243819 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.308733 # miss rate for overall accesses system.cpu.icache.overall_misses 251 # number of overall misses system.cpu.icache.overall_mshr_hits 67 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 763933 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 763934 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate 0.226322 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 184 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -231,14 +231,14 @@ system.cpu.iew.EXEC:stores 341 # Nu system.cpu.iew.EXEC:swp 0 # number of swp insts executed system.cpu.iew.WB:consumers 1860 # num instructions consuming a value system.cpu.iew.WB:count 3219 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 0.785484 # average fanout of values written-back +system.cpu.iew.WB:fanout 0.786022 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 1461 # num instructions producing a value +system.cpu.iew.WB:producers 1462 # num instructions producing a value system.cpu.iew.WB:rate 0.112801 # insts written-back per cycle system.cpu.iew.WB:sent 3234 # cumulative count of insts sent to commit system.cpu.iew.branchMispredicts 152 # Number of branch mispredicts detected at execute -system.cpu.iew.iewBlockCycles 14742 # Number of cycles IEW is blocking +system.cpu.iew.iewBlockCycles 14743 # Number of cycles IEW is blocking system.cpu.iew.iewDispLoadInsts 783 # Number of dispatched load instructions system.cpu.iew.iewDispNonSpecInsts 6 # Number of dispatched non-speculative instructions system.cpu.iew.iewDispSquashedInsts 79 # Number of squashed instructions skipped by dispatch @@ -258,11 +258,11 @@ system.cpu.iew.lsq.thread.0.forwLoads 29 # Nu system.cpu.iew.lsq.thread.0.ignoredResponses 0 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address -system.cpu.iew.lsq.thread.0.memOrderViolation 12 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.memOrderViolation 15 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 0 # Number of loads that were rescheduled system.cpu.iew.lsq.thread.0.squashedLoads 368 # Number of loads squashed system.cpu.iew.lsq.thread.0.squashedStores 87 # Number of stores squashed -system.cpu.iew.memOrderViolationEvents 12 # Number of memory order violations +system.cpu.iew.memOrderViolationEvents 15 # Number of memory order violations system.cpu.iew.predictedNotTakenIncorrect 95 # Number of branches that were predicted not taken incorrectly system.cpu.iew.predictedTakenIncorrect 57 # Number of branches that were predicted taken incorrectly system.cpu.ipc 0.003174 # IPC: Instructions Per Cycle @@ -305,12 +305,12 @@ system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle system.cpu.iq.ISSUE:issued_per_cycle.samples 28537 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 27012 9465.61% - 1 616 215.86% - 2 356 124.75% + 0 27014 9466.31% + 1 617 216.21% + 2 351 123.00% 3 247 86.55% - 4 177 62.02% - 5 81 28.38% + 4 178 62.38% + 5 82 28.73% 6 32 11.21% 7 11 3.85% 8 5 1.75% @@ -326,12 +326,12 @@ system.cpu.iq.iqSquashedInstsIssued 25 # Nu system.cpu.iq.iqSquashedNonSpecRemoved 2 # Number of squashed non-spec instructions that were removed system.cpu.iq.iqSquashedOperandsExamined 801 # Number of squashed operands that are examined and possibly removed from graph system.cpu.l2cache.ReadReq_accesses 269 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 4621.724907 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2296.401487 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 1243244 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_avg_miss_latency 4621.754647 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2296.408922 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_miss_latency 1243252 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 269 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 617732 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 617734 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 269 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked @@ -343,29 +343,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs 0 # system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 269 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 4621.724907 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 2296.401487 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 4621.754647 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 2296.408922 # average overall mshr miss latency system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 1243244 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 1243252 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses system.cpu.l2cache.demand_misses 269 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 617732 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 617734 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 269 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 269 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 4621.724907 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 2296.401487 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 4621.754647 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 2296.408922 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 1243244 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 1243252 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses system.cpu.l2cache.overall_misses 269 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 617732 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 617734 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 269 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -387,7 +387,7 @@ system.cpu.l2cache.total_refs 0 # To system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.numCycles 28537 # number of cpu cycles simulated -system.cpu.rename.RENAME:BlockCycles 14783 # Number of cycles rename is blocking +system.cpu.rename.RENAME:BlockCycles 14784 # Number of cycles rename is blocking system.cpu.rename.RENAME:CommittedMaps 1768 # Number of HB maps that are committed system.cpu.rename.RENAME:IQFullEvents 18 # Number of times rename has blocked due to IQ full system.cpu.rename.RENAME:IdleCycles 5489 # Number of cycles rename is idle @@ -396,11 +396,11 @@ system.cpu.rename.RENAME:ROBFullEvents 2 # Nu system.cpu.rename.RENAME:RenameLookups 5285 # Number of register rename lookups that rename has made system.cpu.rename.RENAME:RenamedInsts 4708 # Number of instructions processed by rename system.cpu.rename.RENAME:RenamedOperands 3399 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 852 # Number of cycles rename is running +system.cpu.rename.RENAME:RunCycles 847 # Number of cycles rename is running system.cpu.rename.RENAME:SquashCycles 336 # Number of cycles rename is squashing system.cpu.rename.RENAME:UnblockCycles 25 # Number of cycles rename is unblocking system.cpu.rename.RENAME:UndoneMaps 1631 # Number of HB maps that are undone due to squashing -system.cpu.rename.RENAME:serializeStallCycles 7052 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:serializeStallCycles 7056 # count of cycles rename stalled for serializing inst system.cpu.rename.RENAME:serializingInsts 8 # count of serializing insts renamed system.cpu.rename.RENAME:skidInsts 88 # count of insts added to the skid buffer system.cpu.rename.RENAME:tempSerializingInsts 6 # count of temporary serializing insts renamed diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr index fb2137f1e..313de3c46 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr @@ -1,3 +1,4 @@ -0: system.remote_gdb.listener: listening for remote gdb on port 7000 +0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 warn: Entering event queue @ 0. Starting simulation... +warn: Increasing stack size by one page. warn: ignoring syscall sigprocmask(1, 18446744073709547831, ...) diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout index 6436baf8f..233834343 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout @@ -6,8 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jan 22 2007 23:06:52 -M5 started Mon Jan 22 23:07:09 2007 -M5 executing on ewok -command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing +M5 compiled Mar 24 2007 13:51:02 +M5 started Sat Mar 24 13:51:14 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing +Global frequency set at 1000000000000 ticks per second Exiting @ tick 752028 because target called exit() diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini index 6eef745b4..e75a10c54 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini @@ -1,48 +1,7 @@ [root] type=Root children=system -checkpoint= -clock=1000000000000 -max_tick=0 -output_file=cout -progress_interval=0 - -[exetrace] -intel_format=false -legion_lockstep=false -pc_symbol=true -print_cpseq=false -print_cycle=true -print_data=true -print_effaddr=true -print_fetchseq=false -print_iregs=false -print_opclass=true -print_thread=true -speculative=true -trace_system=client - -[serialize] -count=10 -cycle=0 -dir=cpt.%012d -period=0 - -[stats] -descriptions=true -dump_cycle=0 -dump_period=0 -dump_reset=false -ignore_events= -mysql_db= -mysql_host= -mysql_password= -mysql_user= -project_name=test -simulation_name=test -simulation_sample=0 -text_compat=true -text_file=m5stats.txt +dummy=0 [system] type=System @@ -70,6 +29,7 @@ commitToFetchDelay=1 commitToIEWDelay=1 commitToRenameDelay=1 commitWidth=8 +cpu_id=0 decodeToFetchDelay=1 decodeToRenameDelay=1 decodeWidth=8 @@ -433,12 +393,3 @@ range=0:134217727 zero=false port=system.membus.port[0] -[trace] -bufsize=0 -cycle=0 -dump_on_exit=false -file=cout -flags= -ignore= -start=0 - diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out index f36f666af..9489e27c0 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out @@ -1,9 +1,6 @@ [root] type=Root -clock=1000000000000 -max_tick=0 -progress_interval=0 -output_file=cout +dummy=0 [system.physmem] type=PhysicalMemory @@ -189,6 +186,7 @@ type=DerivO3CPU clock=1 phase=0 numThreads=1 +cpu_id=0 activity=0 workload=system.cpu.workload0 system.cpu.workload1 checker=null @@ -383,51 +381,3 @@ clock=1000 width=64 responder_set=false -[trace] -flags= -start=0 -cycle=0 -bufsize=0 -file=cout -dump_on_exit=false -ignore= - -[stats] -descriptions=true -project_name=test -simulation_name=test -simulation_sample=0 -text_file=m5stats.txt -text_compat=true -mysql_db= -mysql_user= -mysql_password= -mysql_host= -events_start=-1 -dump_reset=false -dump_cycle=0 -dump_period=0 -ignore_events= - -[random] -seed=1 - -[exetrace] -speculative=true -print_cycle=true -print_opclass=true -print_thread=true -print_effaddr=true -print_data=true -print_iregs=false -print_fetchseq=false -print_cpseq=false -print_reg_delta=false -pc_symbol=true -intel_format=false -legion_lockstep=false -trace_system=client - -[statsreset] -reset_cycle=0 - diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt index bb9e9360c..74e8f8d83 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt @@ -1,48 +1,48 @@ ---------- Begin Simulation Statistics ---------- global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. -global.BPredUnit.BTBHits 1334 # Number of BTB hits -global.BPredUnit.BTBLookups 6012 # Number of BTB lookups +global.BPredUnit.BTBHits 1320 # Number of BTB hits +global.BPredUnit.BTBLookups 6181 # Number of BTB lookups global.BPredUnit.RASInCorrect 173 # Number of incorrect RAS predictions. -global.BPredUnit.condIncorrect 1201 # Number of conditional branches incorrect -global.BPredUnit.condPredicted 4031 # Number of conditional branches predicted -global.BPredUnit.lookups 12370 # Number of BP lookups -global.BPredUnit.usedRAS 6337 # Number of times the RAS was used to get a target. -host_inst_rate 11366 # Simulator instruction rate (inst/s) -host_mem_usage 178064 # Number of bytes of host memory used -host_seconds 0.99 # Real time elapsed on the host -host_tick_rate 2259917 # Simulator tick rate (ticks/s) -memdepunit.memDep.conflictingLoads 27 # Number of conflicting loads. -memdepunit.memDep.conflictingLoads 20 # Number of conflicting loads. -memdepunit.memDep.conflictingStores 97 # Number of conflicting stores. -memdepunit.memDep.conflictingStores 3 # Number of conflicting stores. -memdepunit.memDep.insertedLoads 5749 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedLoads 2822 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedStores 4490 # Number of stores inserted to the mem dependence unit. -memdepunit.memDep.insertedStores 1747 # Number of stores inserted to the mem dependence unit. +global.BPredUnit.condIncorrect 1181 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 4228 # Number of conditional branches predicted +global.BPredUnit.lookups 12535 # Number of BP lookups +global.BPredUnit.usedRAS 6333 # Number of times the RAS was used to get a target. +host_inst_rate 6990 # Simulator instruction rate (inst/s) +host_mem_usage 156628 # Number of bytes of host memory used +host_seconds 1.61 # Real time elapsed on the host +host_tick_rate 1386962 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 26 # Number of conflicting loads. +memdepunit.memDep.conflictingLoads 23 # Number of conflicting loads. +memdepunit.memDep.conflictingStores 4 # Number of conflicting stores. +memdepunit.memDep.conflictingStores 1 # Number of conflicting stores. +memdepunit.memDep.insertedLoads 3657 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedLoads 5285 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 1780 # Number of stores inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 4439 # Number of stores inserted to the mem dependence unit. sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 11247 # Number of instructions simulated sim_seconds 0.000002 # Number of seconds simulated -sim_ticks 2237162 # Number of ticks simulated +sim_ticks 2232164 # Number of ticks simulated system.cpu.commit.COM:branches 1724 # Number of branches committed system.cpu.commit.COM:branches_0 862 # Number of branches committed system.cpu.commit.COM:branches_1 862 # Number of branches committed -system.cpu.commit.COM:bw_lim_events 128 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_lim_events 123 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:bw_limited_0 0 # number of insts not committed due to BW limits system.cpu.commit.COM:bw_limited_1 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 188940 +system.cpu.commit.COM:committed_per_cycle.samples 189138 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 183303 9701.65% - 1 3121 165.18% - 2 1239 65.58% - 3 531 28.10% - 4 275 14.55% - 5 154 8.15% - 6 128 6.77% + 0 183476 9700.64% + 1 3161 167.13% + 2 1212 64.08% + 3 544 28.76% + 4 279 14.75% + 5 155 8.20% + 6 127 6.71% 7 61 3.23% - 8 128 6.77% + 8 123 6.50% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -61,97 +61,97 @@ system.cpu.commit.COM:refs_1 1791 # Nu system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed system.cpu.commit.COM:swp_count_0 0 # Number of s/w prefetches committed system.cpu.commit.COM:swp_count_1 0 # Number of s/w prefetches committed -system.cpu.commit.branchMispredicts 943 # The number of times a branch was mispredicted +system.cpu.commit.branchMispredicts 938 # The number of times a branch was mispredicted system.cpu.commit.commitCommittedInsts 11281 # The number of committed instructions system.cpu.commit.commitNonSpecStalls 34 # The number of times commit has been forced to stall to communicate backwards -system.cpu.commit.commitSquashedInsts 28509 # The number of squashed insts skipped by commit +system.cpu.commit.commitSquashedInsts 29588 # The number of squashed insts skipped by commit system.cpu.committedInsts_0 5624 # Number of Instructions Simulated system.cpu.committedInsts_1 5623 # Number of Instructions Simulated system.cpu.committedInsts_total 11247 # Number of Instructions Simulated -system.cpu.cpi_0 397.788407 # CPI: Cycles Per Instruction -system.cpu.cpi_1 397.859150 # CPI: Cycles Per Instruction -system.cpu.cpi_total 198.911888 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 3186 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_accesses_0 3186 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 9969.378125 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_miss_latency_0 9969.378125 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 10500.608040 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 10500.608040 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 2866 # number of ReadReq hits -system.cpu.dcache.ReadReq_hits_0 2866 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 3190201 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_latency_0 3190201 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.100439 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_miss_rate_0 0.100439 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 320 # number of ReadReq misses -system.cpu.dcache.ReadReq_misses_0 320 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 121 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_hits_0 121 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 2089621 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_latency_0 2089621 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.062461 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_miss_rate_0 0.062461 # mshr miss rate for ReadReq accesses +system.cpu.cpi_0 396.899716 # CPI: Cycles Per Instruction +system.cpu.cpi_1 396.970301 # CPI: Cycles Per Instruction +system.cpu.cpi_total 198.467502 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 3176 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_accesses_0 3176 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 9976.257143 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_miss_latency_0 9976.257143 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 10425.356784 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 10425.356784 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 2861 # number of ReadReq hits +system.cpu.dcache.ReadReq_hits_0 2861 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 3142521 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency_0 3142521 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.099181 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_miss_rate_0 0.099181 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 315 # number of ReadReq misses +system.cpu.dcache.ReadReq_misses_0 315 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 116 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_hits_0 116 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 2074646 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency_0 2074646 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.062657 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_miss_rate_0 0.062657 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 199 # number of ReadReq MSHR misses system.cpu.dcache.ReadReq_mshr_misses_0 199 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 1624 # number of WriteReq accesses(hits+misses) system.cpu.dcache.WriteReq_accesses_0 1624 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 6540.875740 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_miss_latency_0 6540.875740 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7803.746575 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0 7803.746575 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 6512.846154 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_miss_latency_0 6512.846154 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7776.006849 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0 7776.006849 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 1117 # number of WriteReq hits system.cpu.dcache.WriteReq_hits_0 1117 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 3316224 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_latency_0 3316224 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 3302013 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency_0 3302013 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.312192 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_miss_rate_0 0.312192 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 507 # number of WriteReq misses system.cpu.dcache.WriteReq_misses_0 507 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_hits 361 # number of WriteReq MSHR hits system.cpu.dcache.WriteReq_mshr_hits_0 361 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_miss_latency 1139347 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_latency_0 1139347 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 1135297 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency_0 1135297 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.089901 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_miss_rate_0 0.089901 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 146 # number of WriteReq MSHR misses system.cpu.dcache.WriteReq_mshr_misses_0 146 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs 3973 # average number of cycles each access was blocked -system.cpu.dcache.avg_blocked_cycles_no_targets 3625.380952 # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 11.544928 # Average number of references to valid blocks. +system.cpu.dcache.avg_blocked_cycles_no_targets 3613.488095 # average number of cycles each access was blocked +system.cpu.dcache.avg_refs 11.563953 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 1 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 84 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 3973 # number of cycles access was blocked -system.cpu.dcache.blocked_cycles_no_targets 304532 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_targets 303533 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 4810 # number of demand (read+write) accesses -system.cpu.dcache.demand_accesses_0 4810 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses 4800 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses_0 4800 # number of demand (read+write) accesses system.cpu.dcache.demand_accesses_1 0 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 7867.503023 # average overall miss latency -system.cpu.dcache.demand_avg_miss_latency_0 7867.503023 # average overall miss latency +system.cpu.dcache.demand_avg_miss_latency 7840.065693 # average overall miss latency +system.cpu.dcache.demand_avg_miss_latency_0 7840.065693 # average overall miss latency system.cpu.dcache.demand_avg_miss_latency_1 <err: div-0> # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 9359.327536 # average overall mshr miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency_0 9359.327536 # average overall mshr miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 9304.182609 # average overall mshr miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency_0 9304.182609 # average overall mshr miss latency system.cpu.dcache.demand_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency -system.cpu.dcache.demand_hits 3983 # number of demand (read+write) hits -system.cpu.dcache.demand_hits_0 3983 # number of demand (read+write) hits +system.cpu.dcache.demand_hits 3978 # number of demand (read+write) hits +system.cpu.dcache.demand_hits_0 3978 # number of demand (read+write) hits system.cpu.dcache.demand_hits_1 0 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 6506425 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_latency_0 6506425 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 6444534 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency_0 6444534 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.171933 # miss rate for demand accesses -system.cpu.dcache.demand_miss_rate_0 0.171933 # miss rate for demand accesses +system.cpu.dcache.demand_miss_rate 0.171250 # miss rate for demand accesses +system.cpu.dcache.demand_miss_rate_0 0.171250 # miss rate for demand accesses system.cpu.dcache.demand_miss_rate_1 <err: div-0> # miss rate for demand accesses -system.cpu.dcache.demand_misses 827 # number of demand (read+write) misses -system.cpu.dcache.demand_misses_0 827 # number of demand (read+write) misses +system.cpu.dcache.demand_misses 822 # number of demand (read+write) misses +system.cpu.dcache.demand_misses_0 822 # number of demand (read+write) misses system.cpu.dcache.demand_misses_1 0 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 482 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_hits_0 482 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_hits 477 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_hits_0 477 # number of demand (read+write) MSHR hits system.cpu.dcache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 3228968 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_latency_0 3228968 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 3209943 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency_0 3209943 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.071726 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_miss_rate_0 0.071726 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_rate 0.071875 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_rate_0 0.071875 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_miss_rate_1 <err: div-0> # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 345 # number of demand (read+write) MSHR misses system.cpu.dcache.demand_mshr_misses_0 345 # number of demand (read+write) MSHR misses @@ -161,38 +161,38 @@ system.cpu.dcache.mshr_cap_events 0 # nu system.cpu.dcache.mshr_cap_events_0 0 # number of times MSHR cap was activated system.cpu.dcache.mshr_cap_events_1 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 4810 # number of overall (read+write) accesses -system.cpu.dcache.overall_accesses_0 4810 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses 4800 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses_0 4800 # number of overall (read+write) accesses system.cpu.dcache.overall_accesses_1 0 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 7867.503023 # average overall miss latency -system.cpu.dcache.overall_avg_miss_latency_0 7867.503023 # average overall miss latency +system.cpu.dcache.overall_avg_miss_latency 7840.065693 # average overall miss latency +system.cpu.dcache.overall_avg_miss_latency_0 7840.065693 # average overall miss latency system.cpu.dcache.overall_avg_miss_latency_1 <err: div-0> # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 9359.327536 # average overall mshr miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency_0 9359.327536 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 9304.182609 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency_0 9304.182609 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency_0 <err: div-0> # average overall mshr uncacheable latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency_1 <err: div-0> # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 3983 # number of overall hits -system.cpu.dcache.overall_hits_0 3983 # number of overall hits +system.cpu.dcache.overall_hits 3978 # number of overall hits +system.cpu.dcache.overall_hits_0 3978 # number of overall hits system.cpu.dcache.overall_hits_1 0 # number of overall hits -system.cpu.dcache.overall_miss_latency 6506425 # number of overall miss cycles -system.cpu.dcache.overall_miss_latency_0 6506425 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 6444534 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency_0 6444534 # number of overall miss cycles system.cpu.dcache.overall_miss_latency_1 0 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.171933 # miss rate for overall accesses -system.cpu.dcache.overall_miss_rate_0 0.171933 # miss rate for overall accesses +system.cpu.dcache.overall_miss_rate 0.171250 # miss rate for overall accesses +system.cpu.dcache.overall_miss_rate_0 0.171250 # miss rate for overall accesses system.cpu.dcache.overall_miss_rate_1 <err: div-0> # miss rate for overall accesses -system.cpu.dcache.overall_misses 827 # number of overall misses -system.cpu.dcache.overall_misses_0 827 # number of overall misses +system.cpu.dcache.overall_misses 822 # number of overall misses +system.cpu.dcache.overall_misses_0 822 # number of overall misses system.cpu.dcache.overall_misses_1 0 # number of overall misses -system.cpu.dcache.overall_mshr_hits 482 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_hits_0 482 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_hits 477 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_hits_0 477 # number of overall MSHR hits system.cpu.dcache.overall_mshr_hits_1 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 3228968 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_latency_0 3228968 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 3209943 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency_0 3209943 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.071726 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_miss_rate_0 0.071726 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_rate 0.071875 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_rate_0 0.071875 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_miss_rate_1 <err: div-0> # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 345 # number of overall MSHR misses system.cpu.dcache.overall_mshr_misses_0 345 # number of overall MSHR misses @@ -215,153 +215,153 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.replacements_0 0 # number of replacements system.cpu.dcache.replacements_1 0 # number of replacements -system.cpu.dcache.sampled_refs 345 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 344 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 198.670475 # Cycle average of tags in use -system.cpu.dcache.total_refs 3983 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 198.340517 # Cycle average of tags in use +system.cpu.dcache.total_refs 3978 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.dcache.writebacks_0 0 # number of writebacks system.cpu.dcache.writebacks_1 0 # number of writebacks -system.cpu.decode.DECODE:BlockedCycles 97618 # Number of cycles decode is blocked -system.cpu.decode.DECODE:BranchMispred 267 # Number of times decode detected a branch misprediction -system.cpu.decode.DECODE:BranchResolved 390 # Number of times decode resolved a branch -system.cpu.decode.DECODE:DecodedInsts 67048 # Number of instructions handled by decode -system.cpu.decode.DECODE:IdleCycles 262280 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 12122 # Number of cycles decode is running -system.cpu.decode.DECODE:SquashCycles 5552 # Number of cycles decode is squashing -system.cpu.decode.DECODE:SquashedInsts 680 # Number of squashed instructions handled by decode -system.cpu.decode.DECODE:UnblockCycles 155 # Number of cycles decode is unblocking -system.cpu.fetch.Branches 12370 # Number of branches that fetch encountered -system.cpu.fetch.CacheLines 13012 # Number of cache lines fetched -system.cpu.fetch.Cycles 27804 # Number of cycles fetch has run and was not squashing or blocked -system.cpu.fetch.IcacheSquashes 800 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 79582 # Number of instructions fetch has processed -system.cpu.fetch.SquashCycles 4833 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.065467 # Number of branch fetches per cycle -system.cpu.fetch.icacheStallCycles 52787 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.predictedBranches 7671 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 0.421180 # Number of inst fetches per cycle +system.cpu.decode.DECODE:BlockedCycles 95932 # Number of cycles decode is blocked +system.cpu.decode.DECODE:BranchMispred 257 # Number of times decode detected a branch misprediction +system.cpu.decode.DECODE:BranchResolved 378 # Number of times decode resolved a branch +system.cpu.decode.DECODE:DecodedInsts 68233 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 264032 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 12255 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 5733 # Number of cycles decode is squashing +system.cpu.decode.DECODE:SquashedInsts 618 # Number of squashed instructions handled by decode +system.cpu.decode.DECODE:UnblockCycles 167 # Number of cycles decode is unblocking +system.cpu.fetch.Branches 12535 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 13184 # Number of cache lines fetched +system.cpu.fetch.Cycles 28123 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 886 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 80687 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 4911 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.066271 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 53960 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 7653 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 0.426584 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 188950 +system.cpu.fetch.rateDist.samples 189147 system.cpu.fetch.rateDist.min_value 0 - 0 174142 9216.30% - 1 378 20.01% - 2 298 15.77% - 3 3656 193.49% - 4 2200 116.43% - 5 1017 53.82% - 6 974 51.55% - 7 2369 125.38% - 8 3916 207.25% + 0 174193 9209.40% + 1 369 19.51% + 2 281 14.86% + 3 3638 192.34% + 4 2283 120.70% + 5 1005 53.13% + 6 984 52.02% + 7 2371 125.35% + 8 4023 212.69% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist -system.cpu.icache.ReadReq_accesses 13010 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_accesses_0 13010 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 7746.912281 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_miss_latency_0 7746.912281 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 7155.055556 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency_0 7155.055556 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 12098 # number of ReadReq hits -system.cpu.icache.ReadReq_hits_0 12098 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 7065184 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_latency_0 7065184 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.070100 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_miss_rate_0 0.070100 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_accesses 13182 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_accesses_0 13182 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 7732.322368 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_miss_latency_0 7732.322368 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 7128.205742 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency_0 7128.205742 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 12270 # number of ReadReq hits +system.cpu.icache.ReadReq_hits_0 12270 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 7051878 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency_0 7051878 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.069185 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_miss_rate_0 0.069185 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 912 # number of ReadReq misses system.cpu.icache.ReadReq_misses_0 912 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 282 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_hits_0 282 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 4507685 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_latency_0 4507685 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.048424 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_miss_rate_0 0.048424 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 630 # number of ReadReq MSHR misses -system.cpu.icache.ReadReq_mshr_misses_0 630 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_mshr_hits 285 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_hits_0 285 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 4469385 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency_0 4469385 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.047565 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_miss_rate_0 0.047565 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 627 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_mshr_misses_0 627 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked -system.cpu.icache.avg_blocked_cycles_no_targets 5648.647059 # average number of cycles each access was blocked -system.cpu.icache.avg_refs 19.203175 # Average number of references to valid blocks. +system.cpu.icache.avg_blocked_cycles_no_targets 5603.944444 # average number of cycles each access was blocked +system.cpu.icache.avg_refs 19.569378 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked -system.cpu.icache.blocked_no_targets 17 # number of cycles access was blocked +system.cpu.icache.blocked_no_targets 18 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked -system.cpu.icache.blocked_cycles_no_targets 96027 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_targets 100871 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 13010 # number of demand (read+write) accesses -system.cpu.icache.demand_accesses_0 13010 # number of demand (read+write) accesses +system.cpu.icache.demand_accesses 13182 # number of demand (read+write) accesses +system.cpu.icache.demand_accesses_0 13182 # number of demand (read+write) accesses system.cpu.icache.demand_accesses_1 0 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 7746.912281 # average overall miss latency -system.cpu.icache.demand_avg_miss_latency_0 7746.912281 # average overall miss latency +system.cpu.icache.demand_avg_miss_latency 7732.322368 # average overall miss latency +system.cpu.icache.demand_avg_miss_latency_0 7732.322368 # average overall miss latency system.cpu.icache.demand_avg_miss_latency_1 <err: div-0> # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 7155.055556 # average overall mshr miss latency -system.cpu.icache.demand_avg_mshr_miss_latency_0 7155.055556 # average overall mshr miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 7128.205742 # average overall mshr miss latency +system.cpu.icache.demand_avg_mshr_miss_latency_0 7128.205742 # average overall mshr miss latency system.cpu.icache.demand_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency -system.cpu.icache.demand_hits 12098 # number of demand (read+write) hits -system.cpu.icache.demand_hits_0 12098 # number of demand (read+write) hits +system.cpu.icache.demand_hits 12270 # number of demand (read+write) hits +system.cpu.icache.demand_hits_0 12270 # number of demand (read+write) hits system.cpu.icache.demand_hits_1 0 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 7065184 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_latency_0 7065184 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency 7051878 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency_0 7051878 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.070100 # miss rate for demand accesses -system.cpu.icache.demand_miss_rate_0 0.070100 # miss rate for demand accesses +system.cpu.icache.demand_miss_rate 0.069185 # miss rate for demand accesses +system.cpu.icache.demand_miss_rate_0 0.069185 # miss rate for demand accesses system.cpu.icache.demand_miss_rate_1 <err: div-0> # miss rate for demand accesses system.cpu.icache.demand_misses 912 # number of demand (read+write) misses system.cpu.icache.demand_misses_0 912 # number of demand (read+write) misses system.cpu.icache.demand_misses_1 0 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 282 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_hits_0 282 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_hits 285 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_hits_0 285 # number of demand (read+write) MSHR hits system.cpu.icache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 4507685 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_latency_0 4507685 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 4469385 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency_0 4469385 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.048424 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_miss_rate_0 0.048424 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_miss_rate 0.047565 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_miss_rate_0 0.047565 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_miss_rate_1 <err: div-0> # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 630 # number of demand (read+write) MSHR misses -system.cpu.icache.demand_mshr_misses_0 630 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_misses 627 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_misses_0 627 # number of demand (read+write) MSHR misses system.cpu.icache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.mshr_cap_events_0 0 # number of times MSHR cap was activated system.cpu.icache.mshr_cap_events_1 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 13010 # number of overall (read+write) accesses -system.cpu.icache.overall_accesses_0 13010 # number of overall (read+write) accesses +system.cpu.icache.overall_accesses 13182 # number of overall (read+write) accesses +system.cpu.icache.overall_accesses_0 13182 # number of overall (read+write) accesses system.cpu.icache.overall_accesses_1 0 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 7746.912281 # average overall miss latency -system.cpu.icache.overall_avg_miss_latency_0 7746.912281 # average overall miss latency +system.cpu.icache.overall_avg_miss_latency 7732.322368 # average overall miss latency +system.cpu.icache.overall_avg_miss_latency_0 7732.322368 # average overall miss latency system.cpu.icache.overall_avg_miss_latency_1 <err: div-0> # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 7155.055556 # average overall mshr miss latency -system.cpu.icache.overall_avg_mshr_miss_latency_0 7155.055556 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 7128.205742 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_miss_latency_0 7128.205742 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.icache.overall_avg_mshr_uncacheable_latency_0 <err: div-0> # average overall mshr uncacheable latency system.cpu.icache.overall_avg_mshr_uncacheable_latency_1 <err: div-0> # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 12098 # number of overall hits -system.cpu.icache.overall_hits_0 12098 # number of overall hits +system.cpu.icache.overall_hits 12270 # number of overall hits +system.cpu.icache.overall_hits_0 12270 # number of overall hits system.cpu.icache.overall_hits_1 0 # number of overall hits -system.cpu.icache.overall_miss_latency 7065184 # number of overall miss cycles -system.cpu.icache.overall_miss_latency_0 7065184 # number of overall miss cycles +system.cpu.icache.overall_miss_latency 7051878 # number of overall miss cycles +system.cpu.icache.overall_miss_latency_0 7051878 # number of overall miss cycles system.cpu.icache.overall_miss_latency_1 0 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.070100 # miss rate for overall accesses -system.cpu.icache.overall_miss_rate_0 0.070100 # miss rate for overall accesses +system.cpu.icache.overall_miss_rate 0.069185 # miss rate for overall accesses +system.cpu.icache.overall_miss_rate_0 0.069185 # miss rate for overall accesses system.cpu.icache.overall_miss_rate_1 <err: div-0> # miss rate for overall accesses system.cpu.icache.overall_misses 912 # number of overall misses system.cpu.icache.overall_misses_0 912 # number of overall misses system.cpu.icache.overall_misses_1 0 # number of overall misses -system.cpu.icache.overall_mshr_hits 282 # number of overall MSHR hits -system.cpu.icache.overall_mshr_hits_0 282 # number of overall MSHR hits +system.cpu.icache.overall_mshr_hits 285 # number of overall MSHR hits +system.cpu.icache.overall_mshr_hits_0 285 # number of overall MSHR hits system.cpu.icache.overall_mshr_hits_1 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 4507685 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_latency_0 4507685 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 4469385 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency_0 4469385 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.048424 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_miss_rate_0 0.048424 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_miss_rate 0.047565 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_miss_rate_0 0.047565 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_miss_rate_1 <err: div-0> # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 630 # number of overall MSHR misses -system.cpu.icache.overall_mshr_misses_0 630 # number of overall MSHR misses +system.cpu.icache.overall_mshr_misses 627 # number of overall MSHR misses +system.cpu.icache.overall_mshr_misses_0 627 # number of overall MSHR misses system.cpu.icache.overall_mshr_misses_1 0 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles @@ -381,138 +381,138 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 6 # number of replacements system.cpu.icache.replacements_0 6 # number of replacements system.cpu.icache.replacements_1 0 # number of replacements -system.cpu.icache.sampled_refs 630 # Sample count of references to valid blocks. +system.cpu.icache.sampled_refs 627 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.icache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions system.cpu.icache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 289.377534 # Cycle average of tags in use -system.cpu.icache.total_refs 12098 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 288.361956 # Cycle average of tags in use +system.cpu.icache.total_refs 12270 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.icache.writebacks_0 0 # number of writebacks system.cpu.icache.writebacks_1 0 # number of writebacks -system.cpu.idleCycles 2048213 # Total number of cycles that the CPU has spent unscheduled due to idling -system.cpu.iew.EXEC:branches 4035 # Number of branches executed -system.cpu.iew.EXEC:branches_0 2458 # Number of branches executed -system.cpu.iew.EXEC:branches_1 1577 # Number of branches executed +system.cpu.idleCycles 2043018 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 4024 # Number of branches executed +system.cpu.iew.EXEC:branches_0 1569 # Number of branches executed +system.cpu.iew.EXEC:branches_1 2455 # Number of branches executed system.cpu.iew.EXEC:nop 84 # number of nop insts executed system.cpu.iew.EXEC:nop_0 42 # number of nop insts executed system.cpu.iew.EXEC:nop_1 42 # number of nop insts executed -system.cpu.iew.EXEC:rate 0.142196 # Inst execution rate -system.cpu.iew.EXEC:refs 10960 # number of memory reference insts executed -system.cpu.iew.EXEC:refs_0 7253 # number of memory reference insts executed -system.cpu.iew.EXEC:refs_1 3707 # number of memory reference insts executed -system.cpu.iew.EXEC:stores 3812 # Number of stores executed -system.cpu.iew.EXEC:stores_0 2509 # Number of stores executed -system.cpu.iew.EXEC:stores_1 1303 # Number of stores executed +system.cpu.iew.EXEC:rate 0.144523 # Inst execution rate +system.cpu.iew.EXEC:refs 11361 # number of memory reference insts executed +system.cpu.iew.EXEC:refs_0 4575 # number of memory reference insts executed +system.cpu.iew.EXEC:refs_1 6786 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 3833 # Number of stores executed +system.cpu.iew.EXEC:stores_0 1337 # Number of stores executed +system.cpu.iew.EXEC:stores_1 2496 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed system.cpu.iew.EXEC:swp_0 0 # number of swp insts executed system.cpu.iew.EXEC:swp_1 0 # number of swp insts executed -system.cpu.iew.WB:consumers 12377 # num instructions consuming a value -system.cpu.iew.WB:consumers_0 6652 # num instructions consuming a value -system.cpu.iew.WB:consumers_1 5725 # num instructions consuming a value -system.cpu.iew.WB:count 22520 # cumulative count of insts written-back -system.cpu.iew.WB:count_0 12790 # cumulative count of insts written-back -system.cpu.iew.WB:count_1 9730 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 0.808516 # average fanout of values written-back -system.cpu.iew.WB:fanout_0 0.819753 # average fanout of values written-back -system.cpu.iew.WB:fanout_1 0.795459 # average fanout of values written-back +system.cpu.iew.WB:consumers 12385 # num instructions consuming a value +system.cpu.iew.WB:consumers_0 5750 # num instructions consuming a value +system.cpu.iew.WB:consumers_1 6635 # num instructions consuming a value +system.cpu.iew.WB:count 22604 # cumulative count of insts written-back +system.cpu.iew.WB:count_0 10240 # cumulative count of insts written-back +system.cpu.iew.WB:count_1 12364 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.811385 # average fanout of values written-back +system.cpu.iew.WB:fanout_0 0.800522 # average fanout of values written-back +system.cpu.iew.WB:fanout_1 0.820799 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_0 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_1 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ system.cpu.iew.WB:penalized_rate_0 0 # fraction of instructions written-back that wrote to 'other' IQ system.cpu.iew.WB:penalized_rate_1 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 10007 # num instructions producing a value -system.cpu.iew.WB:producers_0 5453 # num instructions producing a value -system.cpu.iew.WB:producers_1 4554 # num instructions producing a value -system.cpu.iew.WB:rate 0.119185 # insts written-back per cycle -system.cpu.iew.WB:rate_0 0.067690 # insts written-back per cycle -system.cpu.iew.WB:rate_1 0.051495 # insts written-back per cycle -system.cpu.iew.WB:sent 22674 # cumulative count of insts sent to commit -system.cpu.iew.WB:sent_0 12874 # cumulative count of insts sent to commit -system.cpu.iew.WB:sent_1 9800 # cumulative count of insts sent to commit -system.cpu.iew.branchMispredicts 1030 # Number of branch mispredicts detected at execute -system.cpu.iew.iewBlockCycles 62040 # Number of cycles IEW is blocking -system.cpu.iew.iewDispLoadInsts 8571 # Number of dispatched load instructions -system.cpu.iew.iewDispNonSpecInsts 42 # Number of dispatched non-speculative instructions -system.cpu.iew.iewDispSquashedInsts 5358 # Number of squashed instructions skipped by dispatch -system.cpu.iew.iewDispStoreInsts 6237 # Number of dispatched store instructions -system.cpu.iew.iewDispatchedInsts 39780 # Number of instructions dispatched to IQ -system.cpu.iew.iewExecLoadInsts 7148 # Number of load instructions executed -system.cpu.iew.iewExecLoadInsts_0 4744 # Number of load instructions executed -system.cpu.iew.iewExecLoadInsts_1 2404 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 903 # Number of squashed instructions skipped in execute -system.cpu.iew.iewExecutedInsts 26868 # Number of executed instructions -system.cpu.iew.iewIQFullEvents 44 # Number of times the IQ has become full, causing a stall +system.cpu.iew.WB:producers 10049 # num instructions producing a value +system.cpu.iew.WB:producers_0 4603 # num instructions producing a value +system.cpu.iew.WB:producers_1 5446 # num instructions producing a value +system.cpu.iew.WB:rate 0.119505 # insts written-back per cycle +system.cpu.iew.WB:rate_0 0.054138 # insts written-back per cycle +system.cpu.iew.WB:rate_1 0.065367 # insts written-back per cycle +system.cpu.iew.WB:sent 22763 # cumulative count of insts sent to commit +system.cpu.iew.WB:sent_0 10322 # cumulative count of insts sent to commit +system.cpu.iew.WB:sent_1 12441 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 1027 # Number of branch mispredicts detected at execute +system.cpu.iew.iewBlockCycles 60103 # Number of cycles IEW is blocking +system.cpu.iew.iewDispLoadInsts 8942 # Number of dispatched load instructions +system.cpu.iew.iewDispNonSpecInsts 41 # Number of dispatched non-speculative instructions +system.cpu.iew.iewDispSquashedInsts 5344 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 6219 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 40858 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 7528 # Number of load instructions executed +system.cpu.iew.iewExecLoadInsts_0 3238 # Number of load instructions executed +system.cpu.iew.iewExecLoadInsts_1 4290 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 872 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 27336 # Number of executed instructions +system.cpu.iew.iewIQFullEvents 45 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle -system.cpu.iew.iewLSQFullEvents 2 # Number of times the LSQ has become full, causing a stall -system.cpu.iew.iewSquashCycles 5552 # Number of cycles IEW is squashing -system.cpu.iew.iewUnblockCycles 117 # Number of cycles IEW is unblocking -system.cpu.iew.lsq.thread.0.blockedLoads 1 # Number of blocked loads due to partial load-store forwarding -system.cpu.iew.lsq.thread.0.cacheBlocked 3088 # Number of times an access to memory failed due to the cache being blocked -system.cpu.iew.lsq.thread.0.forwLoads 64 # Number of loads that had data forwarded from stores -system.cpu.iew.lsq.thread.0.ignoredResponses 6 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.iewLSQFullEvents 4 # Number of times the LSQ has become full, causing a stall +system.cpu.iew.iewSquashCycles 5733 # Number of cycles IEW is squashing +system.cpu.iew.iewUnblockCycles 122 # Number of cycles IEW is unblocking +system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding +system.cpu.iew.lsq.thread.0.cacheBlocked 1584 # Number of times an access to memory failed due to the cache being blocked +system.cpu.iew.lsq.thread.0.forwLoads 65 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.0.ignoredResponses 10 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address -system.cpu.iew.lsq.thread.0.memOrderViolation 34 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.memOrderViolation 56 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.0.squashedLoads 4770 # Number of loads squashed -system.cpu.iew.lsq.thread.0.squashedStores 3678 # Number of stores squashed -system.cpu.iew.lsq.thread.1.blockedLoads 1 # Number of blocked loads due to partial load-store forwarding -system.cpu.iew.lsq.thread.1.cacheBlocked 756 # Number of times an access to memory failed due to the cache being blocked -system.cpu.iew.lsq.thread.1.forwLoads 64 # Number of loads that had data forwarded from stores -system.cpu.iew.lsq.thread.1.ignoredResponses 10 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread.0.squashedLoads 2678 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 968 # Number of stores squashed +system.cpu.iew.lsq.thread.1.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding +system.cpu.iew.lsq.thread.1.cacheBlocked 2643 # Number of times an access to memory failed due to the cache being blocked +system.cpu.iew.lsq.thread.1.forwLoads 67 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.1.ignoredResponses 7 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.1.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.1.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address -system.cpu.iew.lsq.thread.1.memOrderViolation 29 # Number of memory ordering violations +system.cpu.iew.lsq.thread.1.memOrderViolation 54 # Number of memory ordering violations system.cpu.iew.lsq.thread.1.rescheduledLoads 1 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.1.squashedLoads 1843 # Number of loads squashed -system.cpu.iew.lsq.thread.1.squashedStores 935 # Number of stores squashed -system.cpu.iew.memOrderViolationEvents 63 # Number of memory order violations -system.cpu.iew.predictedNotTakenIncorrect 798 # Number of branches that were predicted not taken incorrectly -system.cpu.iew.predictedTakenIncorrect 232 # Number of branches that were predicted taken incorrectly -system.cpu.ipc_0 0.002514 # IPC: Instructions Per Cycle -system.cpu.ipc_1 0.002513 # IPC: Instructions Per Cycle -system.cpu.ipc_total 0.005027 # IPC: Total IPC of All Threads -system.cpu.iq.ISSUE:FU_type_0 16536 # Type of FU issued +system.cpu.iew.lsq.thread.1.squashedLoads 4306 # Number of loads squashed +system.cpu.iew.lsq.thread.1.squashedStores 3627 # Number of stores squashed +system.cpu.iew.memOrderViolationEvents 110 # Number of memory order violations +system.cpu.iew.predictedNotTakenIncorrect 796 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedTakenIncorrect 231 # Number of branches that were predicted taken incorrectly +system.cpu.ipc_0 0.002520 # IPC: Instructions Per Cycle +system.cpu.ipc_1 0.002519 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.005039 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 12578 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist - (null) 2 0.01% # Type of FU issued - IntAlu 9136 55.25% # Type of FU issued + (null) 2 0.02% # Type of FU issued + IntAlu 7865 62.53% # Type of FU issued IntMult 1 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued - FloatAdd 2 0.01% # Type of FU issued + FloatAdd 2 0.02% # Type of FU issued FloatCmp 0 0.00% # Type of FU issued FloatCvt 0 0.00% # Type of FU issued FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 4850 29.33% # Type of FU issued - MemWrite 2545 15.39% # Type of FU issued + MemRead 3344 26.59% # Type of FU issued + MemWrite 1364 10.84% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.end_dist -system.cpu.iq.ISSUE:FU_type_1 11235 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_1 15630 # Type of FU issued system.cpu.iq.ISSUE:FU_type_1.start_dist - (null) 2 0.02% # Type of FU issued - IntAlu 7383 65.71% # Type of FU issued + (null) 2 0.01% # Type of FU issued + IntAlu 8707 55.71% # Type of FU issued IntMult 1 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued - FloatAdd 2 0.02% # Type of FU issued + FloatAdd 2 0.01% # Type of FU issued FloatCmp 0 0.00% # Type of FU issued FloatCvt 0 0.00% # Type of FU issued FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 2518 22.41% # Type of FU issued - MemWrite 1329 11.83% # Type of FU issued + MemRead 4394 28.11% # Type of FU issued + MemWrite 2524 16.15% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_1.end_dist -system.cpu.iq.ISSUE:FU_type 27771 # Type of FU issued +system.cpu.iq.ISSUE:FU_type 28208 # Type of FU issued system.cpu.iq.ISSUE:FU_type.start_dist (null) 4 0.01% # Type of FU issued - IntAlu 16519 59.48% # Type of FU issued + IntAlu 16572 58.75% # Type of FU issued IntMult 2 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 4 0.01% # Type of FU issued @@ -521,20 +521,20 @@ system.cpu.iq.ISSUE:FU_type.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 7368 26.53% # Type of FU issued - MemWrite 3874 13.95% # Type of FU issued + MemRead 7738 27.43% # Type of FU issued + MemWrite 3888 13.78% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type.end_dist -system.cpu.iq.ISSUE:fu_busy_cnt 146 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_cnt_0 73 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_cnt_1 73 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.005257 # FU busy rate (busy events/executed inst) -system.cpu.iq.ISSUE:fu_busy_rate_0 0.002629 # FU busy rate (busy events/executed inst) -system.cpu.iq.ISSUE:fu_busy_rate_1 0.002629 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_cnt 149 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_cnt_0 72 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_cnt_1 77 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.005282 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate_0 0.002552 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate_1 0.002730 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full.start_dist (null) 0 0.00% # attempts to use FU when none available - IntAlu 0 0.00% # attempts to use FU when none available + IntAlu 1 0.67% # attempts to use FU when none available IntMult 0 0.00% # attempts to use FU when none available IntDiv 0 0.00% # attempts to use FU when none available FloatAdd 0 0.00% # attempts to use FU when none available @@ -543,52 +543,52 @@ system.cpu.iq.ISSUE:fu_full.start_dist FloatMult 0 0.00% # attempts to use FU when none available FloatDiv 0 0.00% # attempts to use FU when none available FloatSqrt 0 0.00% # attempts to use FU when none available - MemRead 83 56.85% # attempts to use FU when none available - MemWrite 63 43.15% # attempts to use FU when none available + MemRead 83 55.70% # attempts to use FU when none available + MemWrite 65 43.62% # attempts to use FU when none available IprAccess 0 0.00% # attempts to use FU when none available InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 188950 +system.cpu.iq.ISSUE:issued_per_cycle.samples 189147 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 174613 9241.23% - 1 6958 368.25% - 2 3428 181.42% - 3 2696 142.68% - 4 636 33.66% - 5 439 23.23% - 6 143 7.57% - 7 24 1.27% - 8 13 0.69% + 0 174626 9232.29% + 1 7072 373.89% + 2 3403 179.91% + 3 2709 143.22% + 4 713 37.70% + 5 443 23.42% + 6 143 7.56% + 7 26 1.37% + 8 12 0.63% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 0.146975 # Inst issue rate -system.cpu.iq.iqInstsAdded 39654 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 27771 # Number of instructions issued -system.cpu.iq.iqNonSpecInstsAdded 42 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 27426 # Number of squashed instructions iterated over during squash; mainly for profiling -system.cpu.iq.iqSquashedInstsIssued 185 # Number of squashed instructions issued -system.cpu.iq.iqSquashedNonSpecRemoved 8 # Number of squashed non-spec instructions that were removed -system.cpu.iq.iqSquashedOperandsExamined 20011 # Number of squashed operands that are examined and possibly removed from graph -system.cpu.l2cache.ReadReq_accesses 973 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_accesses_0 973 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 6750.932169 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_miss_latency_0 6750.932169 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 3603.773895 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0 3603.773895 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 6568657 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_latency_0 6568657 # number of ReadReq miss cycles +system.cpu.iq.ISSUE:rate 0.149133 # Inst issue rate +system.cpu.iq.iqInstsAdded 40733 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 28208 # Number of instructions issued +system.cpu.iq.iqNonSpecInstsAdded 41 # Number of non-speculative instructions added to the IQ +system.cpu.iq.iqSquashedInstsExamined 28495 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 192 # Number of squashed instructions issued +system.cpu.iq.iqSquashedNonSpecRemoved 7 # Number of squashed non-spec instructions that were removed +system.cpu.iq.iqSquashedOperandsExamined 21369 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.l2cache.ReadReq_accesses 970 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_accesses_0 970 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 6748.795876 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency_0 6748.795876 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 3604.818557 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0 3604.818557 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_miss_latency 6546332 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency_0 6546332 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_miss_rate_0 1 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 973 # number of ReadReq misses -system.cpu.l2cache.ReadReq_misses_0 973 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 3506472 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_latency_0 3506472 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_misses 970 # number of ReadReq misses +system.cpu.l2cache.ReadReq_misses_0 970 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 3496674 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency_0 3496674 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_miss_rate_0 1 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 973 # number of ReadReq MSHR misses -system.cpu.l2cache.ReadReq_mshr_misses_0 973 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_mshr_misses 970 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_mshr_misses_0 970 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. @@ -597,52 +597,52 @@ system.cpu.l2cache.blocked_no_targets 0 # nu system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 973 # number of demand (read+write) accesses -system.cpu.l2cache.demand_accesses_0 973 # number of demand (read+write) accesses +system.cpu.l2cache.demand_accesses 970 # number of demand (read+write) accesses +system.cpu.l2cache.demand_accesses_0 970 # number of demand (read+write) accesses system.cpu.l2cache.demand_accesses_1 0 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 6750.932169 # average overall miss latency -system.cpu.l2cache.demand_avg_miss_latency_0 6750.932169 # average overall miss latency +system.cpu.l2cache.demand_avg_miss_latency 6748.795876 # average overall miss latency +system.cpu.l2cache.demand_avg_miss_latency_0 6748.795876 # average overall miss latency system.cpu.l2cache.demand_avg_miss_latency_1 <err: div-0> # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 3603.773895 # average overall mshr miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency_0 3603.773895 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 3604.818557 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency_0 3604.818557 # average overall mshr miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits system.cpu.l2cache.demand_hits_0 0 # number of demand (read+write) hits system.cpu.l2cache.demand_hits_1 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 6568657 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_latency_0 6568657 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 6546332 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency_0 6546332 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses system.cpu.l2cache.demand_miss_rate_0 1 # miss rate for demand accesses system.cpu.l2cache.demand_miss_rate_1 <err: div-0> # miss rate for demand accesses -system.cpu.l2cache.demand_misses 973 # number of demand (read+write) misses -system.cpu.l2cache.demand_misses_0 973 # number of demand (read+write) misses +system.cpu.l2cache.demand_misses 970 # number of demand (read+write) misses +system.cpu.l2cache.demand_misses_0 970 # number of demand (read+write) misses system.cpu.l2cache.demand_misses_1 0 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.cpu.l2cache.demand_mshr_hits_0 0 # number of demand (read+write) MSHR hits system.cpu.l2cache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 3506472 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_latency_0 3506472 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 3496674 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency_0 3496674 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_miss_rate_0 1 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_miss_rate_1 <err: div-0> # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 973 # number of demand (read+write) MSHR misses -system.cpu.l2cache.demand_mshr_misses_0 973 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_misses 970 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_misses_0 970 # number of demand (read+write) MSHR misses system.cpu.l2cache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.mshr_cap_events_0 0 # number of times MSHR cap was activated system.cpu.l2cache.mshr_cap_events_1 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 973 # number of overall (read+write) accesses -system.cpu.l2cache.overall_accesses_0 973 # number of overall (read+write) accesses +system.cpu.l2cache.overall_accesses 970 # number of overall (read+write) accesses +system.cpu.l2cache.overall_accesses_0 970 # number of overall (read+write) accesses system.cpu.l2cache.overall_accesses_1 0 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 6750.932169 # average overall miss latency -system.cpu.l2cache.overall_avg_miss_latency_0 6750.932169 # average overall miss latency +system.cpu.l2cache.overall_avg_miss_latency 6748.795876 # average overall miss latency +system.cpu.l2cache.overall_avg_miss_latency_0 6748.795876 # average overall miss latency system.cpu.l2cache.overall_avg_miss_latency_1 <err: div-0> # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 3603.773895 # average overall mshr miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency_0 3603.773895 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 3604.818557 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency_0 3604.818557 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_0 <err: div-0> # average overall mshr uncacheable latency @@ -650,26 +650,26 @@ system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_1 <err: div-0> system.cpu.l2cache.overall_hits 0 # number of overall hits system.cpu.l2cache.overall_hits_0 0 # number of overall hits system.cpu.l2cache.overall_hits_1 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 6568657 # number of overall miss cycles -system.cpu.l2cache.overall_miss_latency_0 6568657 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 6546332 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency_0 6546332 # number of overall miss cycles system.cpu.l2cache.overall_miss_latency_1 0 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses system.cpu.l2cache.overall_miss_rate_0 1 # miss rate for overall accesses system.cpu.l2cache.overall_miss_rate_1 <err: div-0> # miss rate for overall accesses -system.cpu.l2cache.overall_misses 973 # number of overall misses -system.cpu.l2cache.overall_misses_0 973 # number of overall misses +system.cpu.l2cache.overall_misses 970 # number of overall misses +system.cpu.l2cache.overall_misses_0 970 # number of overall misses system.cpu.l2cache.overall_misses_1 0 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits system.cpu.l2cache.overall_mshr_hits_0 0 # number of overall MSHR hits system.cpu.l2cache.overall_mshr_hits_1 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 3506472 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_latency_0 3506472 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 3496674 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency_0 3496674 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_miss_rate_0 1 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_miss_rate_1 <err: div-0> # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 973 # number of overall MSHR misses -system.cpu.l2cache.overall_mshr_misses_0 973 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_misses 970 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_misses_0 970 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_misses_1 0 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles @@ -689,35 +689,35 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.replacements_0 0 # number of replacements system.cpu.l2cache.replacements_1 0 # number of replacements -system.cpu.l2cache.sampled_refs 973 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 969 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.l2cache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions system.cpu.l2cache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 489.113488 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 487.752870 # Cycle average of tags in use system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.l2cache.writebacks_0 0 # number of writebacks system.cpu.l2cache.writebacks_1 0 # number of writebacks -system.cpu.numCycles 188950 # number of cpu cycles simulated -system.cpu.rename.RENAME:BlockCycles 74870 # Number of cycles rename is blocking +system.cpu.numCycles 189147 # number of cpu cycles simulated +system.cpu.rename.RENAME:BlockCycles 73147 # Number of cycles rename is blocking system.cpu.rename.RENAME:CommittedMaps 8102 # Number of HB maps that are committed -system.cpu.rename.RENAME:IQFullEvents 21 # Number of times rename has blocked due to IQ full -system.cpu.rename.RENAME:IdleCycles 263382 # Number of cycles rename is idle -system.cpu.rename.RENAME:LSQFullEvents 2455 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:IQFullEvents 24 # Number of times rename has blocked due to IQ full +system.cpu.rename.RENAME:IdleCycles 265134 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 2520 # Number of times rename has blocked due to LSQ full system.cpu.rename.RENAME:ROBFullEvents 31 # Number of times rename has blocked due to ROB full -system.cpu.rename.RENAME:RenameLookups 72755 # Number of register rename lookups that rename has made -system.cpu.rename.RENAME:RenamedInsts 60875 # Number of instructions processed by rename -system.cpu.rename.RENAME:RenamedOperands 44048 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 11047 # Number of cycles rename is running -system.cpu.rename.RENAME:SquashCycles 5552 # Number of cycles rename is squashing -system.cpu.rename.RENAME:UnblockCycles 2536 # Number of cycles rename is unblocking -system.cpu.rename.RENAME:UndoneMaps 35946 # Number of HB maps that are undone due to squashing -system.cpu.rename.RENAME:serializeStallCycles 20340 # count of cycles rename stalled for serializing inst -system.cpu.rename.RENAME:serializingInsts 51 # count of serializing insts renamed -system.cpu.rename.RENAME:skidInsts 4990 # count of insts added to the skid buffer -system.cpu.rename.RENAME:tempSerializingInsts 38 # count of temporary serializing insts renamed -system.cpu.timesIdled 690 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.rename.RENAME:RenameLookups 74254 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 61970 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 45003 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 11202 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 5733 # Number of cycles rename is squashing +system.cpu.rename.RENAME:UnblockCycles 2584 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 36901 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 20319 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:serializingInsts 49 # count of serializing insts renamed +system.cpu.rename.RENAME:skidInsts 5114 # count of insts added to the skid buffer +system.cpu.rename.RENAME:tempSerializingInsts 37 # count of temporary serializing insts renamed +system.cpu.timesIdled 691 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload0.PROG:num_syscalls 17 # Number of system calls system.cpu.workload1.PROG:num_syscalls 17 # Number of system calls diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr index c36de0b79..d8ccd6207 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr @@ -1,3 +1,5 @@ -0: system.remote_gdb.listener: listening for remote gdb on port 7000 -0: system.remote_gdb.listener: listening for remote gdb on port 7001 +0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 +0: system.remote_gdb.listener: listening for remote gdb #1 on port 7001 warn: Entering event queue @ 0. Starting simulation... +warn: Increasing stack size by one page. +warn: Increasing stack size by one page. diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout index f07a960f8..30a45522d 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout @@ -7,8 +7,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jan 22 2007 23:06:52 -M5 started Mon Jan 22 23:07:23 2007 -M5 executing on ewok -command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing -Exiting @ tick 2237162 because target called exit() +M5 compiled Mar 24 2007 13:51:02 +M5 started Sat Mar 24 13:51:16 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing +Global frequency set at 1000000000000 ticks per second +Exiting @ tick 2232164 because target called exit() |