From c7f1cf1d58cf50118c18b1afc4c938eafba81492 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Fri, 13 Apr 2007 13:59:31 +0000 Subject: Remove most of the special handling for delay slots since they have to be squashed anyway on a mispredict. This is because the NNPC value they saw when executing was incorrect. --HG-- extra : convert_revision : b42c4eb28b4fbba66c65cbd0a5033bf886c1532d --- src/cpu/base_dyn_inst.hh | 2 ++ src/cpu/o3/comm.hh | 5 --- src/cpu/o3/commit_impl.hh | 75 ------------------------------------------- src/cpu/o3/cpu.cc | 12 ++----- src/cpu/o3/cpu.hh | 3 +- src/cpu/o3/decode_impl.hh | 65 +------------------------------------ src/cpu/o3/fetch.hh | 3 +- src/cpu/o3/fetch_impl.hh | 33 +++---------------- src/cpu/o3/iew.hh | 3 -- src/cpu/o3/iew_impl.hh | 74 ++++-------------------------------------- src/cpu/o3/inst_queue_impl.hh | 4 --- src/cpu/o3/rename_impl.hh | 49 ++-------------------------- 12 files changed, 20 insertions(+), 308 deletions(-) diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index eed05c2f1..b02038b3e 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -709,7 +709,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Set the next NPC of this instruction (the target in Mips or Sparc).*/ void setNextNPC(uint64_t val) { +#if ISA_HAS_DELAY_SLOT nextNPC = val; +#endif } /** Sets the ASID. */ diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index d96919007..8d7bb95f4 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -87,7 +87,6 @@ struct DefaultIEWDefaultCommit { bool squash[Impl::MaxThreads]; bool branchMispredict[Impl::MaxThreads]; bool branchTaken[Impl::MaxThreads]; - bool squashDelaySlot[Impl::MaxThreads]; uint64_t mispredPC[Impl::MaxThreads]; uint64_t nextPC[Impl::MaxThreads]; uint64_t nextNPC[Impl::MaxThreads]; @@ -114,7 +113,6 @@ struct TimeBufStruct { uint64_t branchAddr; InstSeqNum doneSeqNum; - InstSeqNum bdelayDoneSeqNum; // @todo: Might want to package this kind of branch stuff into a single // struct as it is used pretty frequently. @@ -169,9 +167,6 @@ struct TimeBufStruct { // retired or squashed sequence number. InstSeqNum doneSeqNum; - InstSeqNum bdelayDoneSeqNum; - bool squashDelaySlot; - //Just in case we want to do a commit/squash on a cycle //(necessary for multiple ROBs?) bool commitInsts; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 65625065d..9dd5ed291 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -741,38 +741,15 @@ DefaultCommit::commit() // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; -#if ISA_HAS_DELAY_SLOT - InstSeqNum bdelay_done_seq_num = squashed_inst; - bool squash_bdelay_slot = fromIEW->squashDelaySlot[tid]; - bool branchMispredict = fromIEW->branchMispredict[tid]; - - // Squashing/not squashing the branch delay slot only makes - // sense when you're squashing from a branch, ie from a branch - // mispredict. - if (branchMispredict && !squash_bdelay_slot) { - bdelay_done_seq_num++; - } -#endif - if (fromIEW->includeSquashInst[tid] == true) { squashed_inst--; -#if ISA_HAS_DELAY_SLOT - bdelay_done_seq_num--; -#endif } // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB. youngestSeqNum[tid] = squashed_inst; -#if ISA_HAS_DELAY_SLOT - rob->squash(bdelay_done_seq_num, tid); - toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot; - toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num; -#else rob->squash(squashed_inst, tid); - toIEW->commitInfo[tid].squashDelaySlot = true; -#endif changedROBNumEntries[tid] = true; toIEW->commitInfo[tid].doneSeqNum = squashed_inst; @@ -809,10 +786,6 @@ DefaultCommit::commit() // Try to commit any instructions. commitInsts(); - } else { -#if ISA_HAS_DELAY_SLOT - skidInsert(); -#endif } //Check for any activity @@ -1164,37 +1137,13 @@ DefaultCommit::getInsts() { DPRINTF(Commit, "Getting instructions from Rename stage.\n"); -#if ISA_HAS_DELAY_SLOT - // Read any renamed instructions and place them into the ROB. - int insts_to_process = std::min((int)renameWidth, - (int)(fromRename->size + skidBuffer.size())); - int rename_idx = 0; - - DPRINTF(Commit, "%i insts available to process. Rename Insts:%i " - "SkidBuffer Insts:%i\n", insts_to_process, fromRename->size, - skidBuffer.size()); -#else // Read any renamed instructions and place them into the ROB. int insts_to_process = std::min((int)renameWidth, fromRename->size); -#endif - for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) { DynInstPtr inst; -#if ISA_HAS_DELAY_SLOT - // Get insts from skidBuffer or from Rename - if (skidBuffer.size() > 0) { - DPRINTF(Commit, "Grabbing skidbuffer inst.\n"); - inst = skidBuffer.front(); - skidBuffer.pop(); - } else { - DPRINTF(Commit, "Grabbing rename inst.\n"); - inst = fromRename->insts[rename_idx++]; - } -#else inst = fromRename->insts[inst_num]; -#endif int tid = inst->threadNumber; if (!inst->isSquashed() && @@ -1216,30 +1165,6 @@ DefaultCommit::getInsts() inst->readPC(), inst->seqNum, tid); } } - -#if ISA_HAS_DELAY_SLOT - if (rename_idx < fromRename->size) { - DPRINTF(Commit,"Placing Rename Insts into skidBuffer.\n"); - - for (; - rename_idx < fromRename->size; - rename_idx++) { - DynInstPtr inst = fromRename->insts[rename_idx]; - - if (!inst->isSquashed()) { - DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ", - "skidBuffer.\n", inst->readPC(), inst->seqNum, - inst->threadNumber); - skidBuffer.push(inst); - } else { - DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was " - "squashed, skipping.\n", - inst->readPC(), inst->seqNum, inst->threadNumber); - } - } - } -#endif - } template diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 2e6a43f9c..b2b4645d2 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -696,7 +696,7 @@ FullO3CPU::removeThread(unsigned tid) // Squash Throughout Pipeline InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; - fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid); + fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, tid); decode.squash(tid); rename.squash(squash_seq_num, tid); iew.squash(tid); @@ -1226,9 +1226,7 @@ FullO3CPU::removeFrontInst(DynInstPtr &inst) template void -FullO3CPU::removeInstsNotInROB(unsigned tid, - bool squash_delay_slot, - const InstSeqNum &delay_slot_seq_num) +FullO3CPU::removeInstsNotInROB(unsigned tid) { DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" " list.\n", tid); @@ -1259,12 +1257,6 @@ FullO3CPU::removeInstsNotInROB(unsigned tid, while (inst_it != end_it) { assert(!instList.empty()); -#if ISA_HAS_DELAY_SLOT - if(!squash_delay_slot && - delay_slot_seq_num >= (*inst_it)->seqNum) { - break; - } -#endif squashInstIt(inst_it, tid); inst_it--; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index e71d05c8e..4b247e6e3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -468,8 +468,7 @@ class FullO3CPU : public BaseO3CPU /** Remove all instructions that are not currently in the ROB. * There's also an option to not squash delay slot instructions.*/ - void removeInstsNotInROB(unsigned tid, bool squash_delay_slot, - const InstSeqNum &delay_slot_seq_num); + void removeInstsNotInROB(unsigned tid); /** Remove all instructions younger than the given sequence number. */ void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid); diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 314864f94..c9d0a1885 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -49,8 +49,6 @@ DefaultDecode::DefaultDecode(O3CPU *_cpu, Params *params) stalls[i].rename = false; stalls[i].iew = false; stalls[i].commit = false; - - squashAfterDelaySlot[i] = false; } // @todo: Make into a parameter @@ -278,17 +276,12 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) #if ISA_HAS_DELAY_SLOT toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() != (inst->readNextPC() + sizeof(TheISA::MachInst)); - - toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid]; - squashAfterDelaySlot[tid] = false; - - InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid]; #else toFetch->decodeInfo[tid].branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); +#endif InstSeqNum squash_seq_num = inst->seqNum; -#endif // Might have to tell fetch to unblock. if (decodeStatus[tid] == Blocked || @@ -309,30 +302,10 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) // Clear the instruction list and skid buffer in case they have any // insts in them. while (!insts[tid].empty()) { - -#if ISA_HAS_DELAY_SLOT - if (insts[tid].front()->seqNum <= squash_seq_num) { - DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode " - "instructions before delay slot [sn:%i]. %i insts" - "left in decode.\n", tid, squash_seq_num, - insts[tid].size()); - break; - } -#endif insts[tid].pop(); } while (!skidBuffer[tid].empty()) { - -#if ISA_HAS_DELAY_SLOT - if (skidBuffer[tid].front()->seqNum <= squash_seq_num) { - DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer " - "instructions before delay slot [sn:%i]. %i insts" - "left in decode.\n", tid, squash_seq_num, - insts[tid].size()); - break; - } -#endif skidBuffer[tid].pop(); } @@ -760,48 +733,12 @@ DefaultDecode::decodeInsts(unsigned tid) // Might want to set some sort of boolean and just do // a check at the end -#if !ISA_HAS_DELAY_SLOT squash(inst, inst->threadNumber); Addr target = inst->branchTarget(); inst->setPredTarg(target, target + sizeof(TheISA::MachInst)); break; -#else - // If mispredicted as taken, then ignore delay slot - // instruction... else keep delay slot and squash - // after it is sent to rename - if (inst->readPredTaken() && inst->isCondDelaySlot()) { - DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst." - "[sn:%i] PC %#x mispredicted as taken.\n", tid, - inst->seqNum, inst->PC); - bdelayDoneSeqNum[tid] = inst->seqNum; - squash(inst, inst->threadNumber); - Addr target = inst->branchTarget(); - inst->setPredTarg(target, - target + sizeof(TheISA::MachInst)); - break; - } else { - DPRINTF(Decode, "[tid:%i]: Misprediction detected at " - "[sn:%i] PC %#x, will squash after delay slot " - "inst. is sent to Rename\n", - tid, inst->seqNum, inst->PC); - bdelayDoneSeqNum[tid] = inst->seqNum + 1; - squashAfterDelaySlot[tid] = true; - squashInst[tid] = inst; - continue; - } -#endif } } - - if (squashAfterDelaySlot[tid]) { - assert(!inst->isSquashed()); - squash(squashInst[tid], squashInst[tid]->threadNumber); - Addr target = squashInst[tid]->branchTarget(); - squashInst[tid]->setPredTarg(target, - target + sizeof(TheISA::MachInst)); - assert(!inst->isSquashed()); - break; - } } // If we didn't process all instructions, then we will need to block diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 241935416..bb0057e7c 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -263,8 +263,7 @@ class DefaultFetch * squash should be the commit stage. */ void squash(const Addr &new_PC, const Addr &new_NPC, - const InstSeqNum &seq_num, - bool squash_delay_slot, unsigned tid); + const InstSeqNum &seq_num, unsigned tid); /** Ticks the fetch stage, processing all inputs signals and fetching * as many instructions as possible. diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index e16f97558..25498c7f3 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -774,20 +774,14 @@ DefaultFetch::updateFetchStatus() template void DefaultFetch::squash(const Addr &new_PC, const Addr &new_NPC, - const InstSeqNum &seq_num, - bool squash_delay_slot, unsigned tid) + const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); doSquash(new_PC, new_NPC, tid); -#if ISA_HAS_DELAY_SLOT - // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num); -#else // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid, true, 0); -#endif + cpu->removeInstsNotInROB(tid); } template @@ -896,17 +890,10 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " "from commit.\n",tid); - -#if ISA_HAS_DELAY_SLOT - InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; -#else - InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; -#endif // In any case, squash. squash(fromCommit->commitInfo[tid].nextPC, fromCommit->commitInfo[tid].nextNPC, - doneSeqNum, - fromCommit->commitInfo[tid].squashDelaySlot, + fromCommit->commitInfo[tid].doneSeqNum, tid); // Also check if there's a mispredict that happened. @@ -955,18 +942,13 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) if (fetchStatus[tid] != Squashing) { -#if ISA_HAS_DELAY_SLOT - InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; -#else - InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; -#endif DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n", fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].nextNPC); // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].nextNPC, - doneSeqNum, + fromDecode->decodeInfo[tid].doneSeqNum, tid); return true; @@ -1157,9 +1139,6 @@ DefaultFetch::fetch(bool &status_change) instruction->readPC()); ///FIXME This needs to be more robust in dealing with delay slots -#if !ISA_HAS_DELAY_SLOT -// predicted_branch |= -#endif lookupAndUpdateNextPC(instruction, next_PC, next_NPC); predicted_branch |= (next_PC != fetch_NPC); @@ -1213,11 +1192,7 @@ DefaultFetch::fetch(bool &status_change) PC[tid] = next_PC; nextPC[tid] = next_NPC; nextNPC[tid] = next_NPC + instSize; -#if ISA_HAS_DELAY_SLOT - DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]); -#else DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC); -#endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB // miss) diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index ce2991cfb..eef5a15d2 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -402,9 +402,6 @@ class DefaultIEW /** Records if there is a fetch redirect on this cycle for each thread. */ bool fetchRedirect[Impl::MaxThreads]; - /** Keeps track of the last valid branch delay slot instss for threads */ - InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads]; - /** Used to track if all instructions have been dispatched this cycle. * If they have not, then blocking must have occurred, and the instructions * would already be added to the skid buffer. diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 62e656e93..050785818 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -69,7 +69,6 @@ DefaultIEW::DefaultIEW(O3CPU *_cpu, Params *params) dispatchStatus[i] = Running; stalls[i].commit = false; fetchRedirect[i] = false; - bdelayDoneSeqNum[i] = 0; } wbMax = wbWidth * params->wbDepth; @@ -410,31 +409,14 @@ DefaultIEW::squash(unsigned tid) instQueue.squash(tid); // Tell the LDSTQ to start squashing. -#if ISA_HAS_DELAY_SLOT - ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid); -#else ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid); -#endif updatedQueues = true; // Clear the skid buffer in case it has any data in it. DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n", - tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum); + tid, fromCommit->commitInfo[tid].doneSeqNum); while (!skidBuffer[tid].empty()) { -#if ISA_HAS_DELAY_SLOT - if (skidBuffer[tid].front()->seqNum <= - fromCommit->commitInfo[tid].bdelayDoneSeqNum) { - DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions " - "that occur before delay slot [sn:%i].\n", - fromCommit->commitInfo[tid].bdelayDoneSeqNum, - tid); - break; - } else { - DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from " - "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum); - } -#endif if (skidBuffer[tid].front()->isLoad() || skidBuffer[tid].front()->isStore() ) { toRename->iewInfo[tid].dispatchedToLSQ++; @@ -445,8 +427,6 @@ DefaultIEW::squash(unsigned tid) skidBuffer[tid].pop(); } - bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; - emptyRenameInsts(tid); } @@ -462,38 +442,18 @@ DefaultIEW::squashDueToBranch(DynInstPtr &inst, unsigned tid) toCommit->mispredPC[tid] = inst->readPC(); toCommit->branchMispredict[tid] = true; - int instSize = sizeof(TheISA::MachInst); #if ISA_HAS_DELAY_SLOT - bool branch_taken = + int instSize = sizeof(TheISA::MachInst); + toCommit->branchTaken[tid] = !(inst->readNextPC() + instSize == inst->readNextNPC() && (inst->readNextPC() == inst->readPC() + instSize || inst->readNextPC() == inst->readPC() + 2 * instSize)); - DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n", - branch_taken ? "true": "false", inst->seqNum); - - toCommit->branchTaken[tid] = branch_taken; - - bool squashDelaySlot = true; -// (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst)); - DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n", - squashDelaySlot ? "true": "false", inst->seqNum); - toCommit->squashDelaySlot[tid] = squashDelaySlot; - //If we're squashing the delay slot, we need to pick back up at NextPC. - //Otherwise, NextPC isn't being squashed, so we should pick back up at - //NextNPC. - if (squashDelaySlot) { - toCommit->nextPC[tid] = inst->readNextPC(); - toCommit->nextNPC[tid] = inst->readNextNPC(); - } else { - toCommit->nextPC[tid] = inst->readNextNPC(); - toCommit->nextNPC[tid] = inst->readNextNPC() + instSize; - } #else toCommit->branchTaken[tid] = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); - toCommit->nextPC[tid] = inst->readNextPC(); - toCommit->nextNPC[tid] = inst->readNextPC() + instSize; #endif + toCommit->nextPC[tid] = inst->readNextPC(); + toCommit->nextNPC[tid] = inst->readNextNPC(); toCommit->includeSquashInst[tid] = false; @@ -510,11 +470,7 @@ DefaultIEW::squashDueToMemOrder(DynInstPtr &inst, unsigned tid) toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->nextPC[tid] = inst->readNextPC(); -#if ISA_HAS_DELAY_SLOT toCommit->nextNPC[tid] = inst->readNextNPC(); -#else - toCommit->nextNPC[tid] = inst->readNextPC() + sizeof(TheISA::MachInst); -#endif toCommit->branchMispredict[tid] = false; toCommit->includeSquashInst[tid] = false; @@ -532,11 +488,7 @@ DefaultIEW::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid) toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->nextPC[tid] = inst->readPC(); -#if ISA_HAS_DELAY_SLOT toCommit->nextNPC[tid] = inst->readNextPC(); -#else - toCommit->nextNPC[tid] = inst->readPC() + sizeof(TheISA::MachInst); -#endif toCommit->branchMispredict[tid] = false; // Must include the broadcasted SN in the squash. @@ -880,10 +832,8 @@ DefaultIEW::sortInsts() { int insts_from_rename = fromRename->size; #ifdef DEBUG -#if !ISA_HAS_DELAY_SLOT for (int i = 0; i < numThreads; i++) assert(insts[i].empty()); -#endif #endif for (int i = 0; i < insts_from_rename; ++i) { insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]); @@ -894,21 +844,9 @@ template void DefaultIEW::emptyRenameInsts(unsigned tid) { - DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until " - "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]); + DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions\n", tid); while (!insts[tid].empty()) { -#if ISA_HAS_DELAY_SLOT - if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) { - DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction" - " that occurs at or before delay slot [sn:%i].\n", - tid, bdelayDoneSeqNum[tid]); - break; - } else { - DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction " - "[sn:%i].\n", tid, insts[tid].front()->seqNum); - } -#endif if (insts[tid].front()->isLoad() || insts[tid].front()->isStore() ) { diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 10c3287f2..bdf5f07aa 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -1005,11 +1005,7 @@ InstructionQueue::squash(unsigned tid) // Read instruction sequence number of last instruction out of the // time buffer. -#if ISA_HAS_DELAY_SLOT - squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; -#else squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; -#endif // Call doSquash if there are insts in the IQ if (count[tid] > 0) { diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 431705e19..6e7180b1e 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -356,47 +356,12 @@ DefaultRename::squash(const InstSeqNum &squash_seq_num, unsigned tid) } // Clear the instruction list and skid buffer in case they have any - // insts in them. Since we support multiple ISAs, we cant just: - // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is - // a possible delay slot inst for different architectures - // insts[tid].clear(); -#if ISA_HAS_DELAY_SLOT - DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until " - "[sn:%i].\n",tid, squash_seq_num); - ListIt ilist_it = insts[tid].begin(); - while (ilist_it != insts[tid].end()) { - if ((*ilist_it)->seqNum > squash_seq_num) { - (*ilist_it)->setSquashed(); - DPRINTF(Rename, "Squashing incoming decode instruction, " - "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC); - } - ilist_it++; - } -#else + // insts in them. insts[tid].clear(); -#endif // Clear the skid buffer in case it has any data in it. - // See comments above. - // skidBuffer[tid].clear(); -#if ISA_HAS_DELAY_SLOT - DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions " - "until [sn:%i].\n", tid, squash_seq_num); - ListIt slist_it = skidBuffer[tid].begin(); - while (slist_it != skidBuffer[tid].end()) { - if ((*slist_it)->seqNum > squash_seq_num) { - (*slist_it)->setSquashed(); - DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]" - "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC); - } - slist_it++; - } - resumeUnblocking = (skidBuffer[tid].size() != 0); - DPRINTF(Rename, "Resume unblocking set to %s\n", - resumeUnblocking ? "true" : "false"); -#else skidBuffer[tid].clear(); -#endif + doSquash(squash_seq_num, tid); } @@ -776,10 +741,8 @@ DefaultRename::sortInsts() { int insts_from_decode = fromDecode->size; #ifdef DEBUG -#if !ISA_HAS_DELAY_SLOT for (int i=0; i < numThreads; i++) assert(insts[i].empty()); -#endif #endif for (int i = 0; i < insts_from_decode; ++i) { DynInstPtr inst = fromDecode->insts[i]; @@ -1248,13 +1211,7 @@ DefaultRename::checkSignalsAndUpdate(unsigned tid) DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from " "commit.\n", tid); -#if ISA_HAS_DELAY_SLOT - InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum; -#else - InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; -#endif - - squash(squashed_seq_num, tid); + squash(fromCommit->commitInfo[tid].doneSeqNum, tid); return true; } -- cgit v1.2.3 From e9c6012acf729ef55b37dda76e011b5a284b6988 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Fri, 13 Apr 2007 14:00:42 +0000 Subject: Adjust references to reflect differences without special delay slot handling. Performance actually went up slightly. --HG-- extra : convert_revision : 504f6185ddc89881aa41deb7fd934da8038d1ed2 --- .../ref/sparc/linux/o3-timing/m5stats.txt | 263 ++++++++++----------- .../02.insttest/ref/sparc/linux/o3-timing/stderr | 1 - .../02.insttest/ref/sparc/linux/o3-timing/stdout | 8 +- 3 files changed, 135 insertions(+), 137 deletions(-) diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt index 7c0d31494..4c5655a33 100644 --- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt @@ -1,17 +1,17 @@ ---------- Begin Simulation Statistics ---------- global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. -global.BPredUnit.BTBHits 2990 # Number of BTB hits -global.BPredUnit.BTBLookups 7055 # Number of BTB lookups +global.BPredUnit.BTBHits 3021 # Number of BTB hits +global.BPredUnit.BTBLookups 7086 # Number of BTB lookups global.BPredUnit.RASInCorrect 0 # Number of incorrect RAS predictions. global.BPredUnit.condIncorrect 2077 # Number of conditional branches incorrect -global.BPredUnit.condPredicted 7846 # Number of conditional branches predicted -global.BPredUnit.lookups 7846 # Number of BP lookups +global.BPredUnit.condPredicted 7877 # Number of conditional branches predicted +global.BPredUnit.lookups 7877 # Number of BP lookups global.BPredUnit.usedRAS 0 # Number of times the RAS was used to get a target. -host_inst_rate 15119 # Simulator instruction rate (inst/s) -host_mem_usage 154868 # Number of bytes of host memory used -host_seconds 0.73 # Real time elapsed on the host -host_tick_rate 1956796 # Simulator tick rate (ticks/s) +host_inst_rate 4388 # Simulator instruction rate (inst/s) +host_mem_usage 179936 # Number of bytes of host memory used +host_seconds 2.50 # Real time elapsed on the host +host_tick_rate 568121 # Simulator tick rate (ticks/s) memdepunit.memDep.conflictingLoads 12 # Number of conflicting loads. memdepunit.memDep.conflictingStores 0 # Number of conflicting stores. memdepunit.memDep.insertedLoads 3250 # Number of loads inserted to the mem dependence unit. @@ -19,22 +19,22 @@ memdepunit.memDep.insertedStores 2817 # Nu sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 10976 # Number of instructions simulated sim_seconds 0.000001 # Number of seconds simulated -sim_ticks 1421211 # Number of ticks simulated +sim_ticks 1421207 # Number of ticks simulated system.cpu.commit.COM:branches 2152 # Number of branches committed -system.cpu.commit.COM:bw_lim_events 172 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_lim_events 225 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 221349 +system.cpu.commit.COM:committed_per_cycle.samples 220766 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 215844 9751.30% - 1 2970 134.18% - 2 1290 58.28% - 3 631 28.51% - 4 208 9.40% - 5 90 4.07% - 6 133 6.01% + 0 215368 9755.49% + 1 2915 132.04% + 2 1196 54.18% + 3 673 30.48% + 4 208 9.42% + 5 79 3.58% + 6 91 4.12% 7 11 0.50% - 8 172 7.77% + 8 225 10.19% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -49,65 +49,65 @@ system.cpu.commit.commitNonSpecStalls 327 # Th system.cpu.commit.commitSquashedInsts 14263 # The number of squashed insts skipped by commit system.cpu.committedInsts 10976 # Number of Instructions Simulated system.cpu.committedInsts_total 10976 # Number of Instructions Simulated -system.cpu.cpi 129.483509 # CPI: Cycles Per Instruction -system.cpu.cpi_total 129.483509 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 2737 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 6585.044776 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 6511.939394 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 2603 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 882396 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.048959 # miss rate for ReadReq accesses +system.cpu.cpi 129.483145 # CPI: Cycles Per Instruction +system.cpu.cpi_total 129.483145 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 2738 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 6586.074627 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 6513.166667 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 2604 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 882534 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.048941 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 134 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 68 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 429788 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.024114 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_miss_latency 429869 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.024105 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 66 # number of ReadReq MSHR misses system.cpu.dcache.SwapReq_accesses 6 # number of SwapReq accesses(hits+misses) system.cpu.dcache.SwapReq_hits 6 # number of SwapReq hits system.cpu.dcache.WriteReq_accesses 1292 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 7960.583924 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7136.918605 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 7962.583924 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7138.593023 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 869 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 3367327 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 3368173 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.327399 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 423 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_hits 337 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_miss_latency 613775 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 613919 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.066563 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 86 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 22.881579 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 22.888158 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 4029 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 7629.664273 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 6865.546053 # average overall mshr miss latency -system.cpu.dcache.demand_hits 3472 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 4249723 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.138248 # miss rate for demand accesses +system.cpu.dcache.demand_accesses 4030 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 7631.430880 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 6867.026316 # average overall mshr miss latency +system.cpu.dcache.demand_hits 3473 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 4250707 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.138213 # miss rate for demand accesses system.cpu.dcache.demand_misses 557 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 405 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 1043563 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.037726 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_latency 1043788 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.037717 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 152 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 4029 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 7629.664273 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 6865.546053 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 4030 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 7631.430880 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 6867.026316 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 3472 # number of overall hits -system.cpu.dcache.overall_miss_latency 4249723 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.138248 # miss rate for overall accesses +system.cpu.dcache.overall_hits 3473 # number of overall hits +system.cpu.dcache.overall_miss_latency 4250707 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.138213 # miss rate for overall accesses system.cpu.dcache.overall_misses 557 # number of overall misses system.cpu.dcache.overall_mshr_hits 405 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 1043563 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.037726 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_latency 1043788 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.037717 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 152 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -123,50 +123,50 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 152 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 90.938737 # Cycle average of tags in use -system.cpu.dcache.total_refs 3478 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 90.938565 # Cycle average of tags in use +system.cpu.dcache.total_refs 3479 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks -system.cpu.decode.DECODE:BlockedCycles 192719 # Number of cycles decode is blocked -system.cpu.decode.DECODE:DecodedInsts 39774 # Number of instructions handled by decode -system.cpu.decode.DECODE:IdleCycles 20128 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 8238 # Number of cycles decode is running +system.cpu.decode.DECODE:BlockedCycles 192302 # Number of cycles decode is blocked +system.cpu.decode.DECODE:DecodedInsts 39763 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 19973 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 8441 # Number of cycles decode is running system.cpu.decode.DECODE:SquashCycles 3162 # Number of cycles decode is squashing -system.cpu.decode.DECODE:UnblockCycles 264 # Number of cycles decode is unblocking -system.cpu.fetch.Branches 7846 # Number of branches that fetch encountered +system.cpu.decode.DECODE:UnblockCycles 50 # Number of cycles decode is unblocking +system.cpu.fetch.Branches 7877 # Number of branches that fetch encountered system.cpu.fetch.CacheLines 5085 # Number of cache lines fetched -system.cpu.fetch.Cycles 14399 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.Cycles 14430 # Number of cycles fetch has run and was not squashing or blocked system.cpu.fetch.IcacheSquashes 745 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 43304 # Number of instructions fetch has processed +system.cpu.fetch.Insts 43366 # Number of instructions fetch has processed system.cpu.fetch.SquashCycles 2134 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.034947 # Number of branch fetches per cycle +system.cpu.fetch.branchRate 0.035176 # Number of branch fetches per cycle system.cpu.fetch.icacheStallCycles 5085 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.predictedBranches 2990 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 0.192881 # Number of inst fetches per cycle +system.cpu.fetch.predictedBranches 3021 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 0.193660 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 224511 +system.cpu.fetch.rateDist.samples 223928 system.cpu.fetch.rateDist.min_value 0 - 0 215198 9585.19% - 1 2258 100.57% - 2 627 27.93% - 3 958 42.67% - 4 553 24.63% - 5 816 36.35% - 6 951 42.36% - 7 280 12.47% - 8 2870 127.83% + 0 214584 9582.72% + 1 2258 100.84% + 2 658 29.38% + 3 958 42.78% + 4 553 24.70% + 5 816 36.44% + 6 951 42.47% + 7 280 12.50% + 8 2870 128.17% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist system.cpu.icache.ReadReq_accesses 5085 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 5148.266776 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 4502.972752 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_avg_miss_latency 5150.152209 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 4503.673025 # average ReadReq mshr miss latency system.cpu.icache.ReadReq_hits 4474 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 3145591 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency 3146743 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.120157 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 611 # number of ReadReq misses system.cpu.icache.ReadReq_mshr_hits 244 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 1652591 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency 1652848 # number of ReadReq MSHR miss cycles system.cpu.icache.ReadReq_mshr_miss_rate 0.072173 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 367 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked @@ -178,29 +178,29 @@ system.cpu.icache.blocked_cycles_no_mshrs 0 # n system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed system.cpu.icache.demand_accesses 5085 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 5148.266776 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 4502.972752 # average overall mshr miss latency +system.cpu.icache.demand_avg_miss_latency 5150.152209 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 4503.673025 # average overall mshr miss latency system.cpu.icache.demand_hits 4474 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 3145591 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency 3146743 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.120157 # miss rate for demand accesses system.cpu.icache.demand_misses 611 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 244 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 1652591 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 1652848 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate 0.072173 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 367 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.icache.overall_accesses 5085 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 5148.266776 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 4502.972752 # average overall mshr miss latency +system.cpu.icache.overall_avg_miss_latency 5150.152209 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 4503.673025 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.icache.overall_hits 4474 # number of overall hits -system.cpu.icache.overall_miss_latency 3145591 # number of overall miss cycles +system.cpu.icache.overall_miss_latency 3146743 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.120157 # miss rate for overall accesses system.cpu.icache.overall_misses 611 # number of overall misses system.cpu.icache.overall_mshr_hits 244 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 1652591 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 1652848 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate 0.072173 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 367 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -217,35 +217,35 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 1 # number of replacements system.cpu.icache.sampled_refs 363 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 172.869174 # Cycle average of tags in use +system.cpu.icache.tagsinuse 172.868641 # Cycle average of tags in use system.cpu.icache.total_refs 4474 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 1196701 # Total number of cycles that the CPU has spent unscheduled due to idling -system.cpu.iew.EXEC:branches 3576 # Number of branches executed +system.cpu.idleCycles 1197280 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 3577 # Number of branches executed system.cpu.iew.EXEC:nop 0 # number of nop insts executed -system.cpu.iew.EXEC:rate 0.092548 # Inst execution rate -system.cpu.iew.EXEC:refs 5257 # number of memory reference insts executed +system.cpu.iew.EXEC:rate 0.092802 # Inst execution rate +system.cpu.iew.EXEC:refs 5258 # number of memory reference insts executed system.cpu.iew.EXEC:stores 2386 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed system.cpu.iew.WB:consumers 9737 # num instructions consuming a value -system.cpu.iew.WB:count 19769 # cumulative count of insts written-back +system.cpu.iew.WB:count 19771 # cumulative count of insts written-back system.cpu.iew.WB:fanout 0.790901 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ system.cpu.iew.WB:producers 7701 # num instructions producing a value -system.cpu.iew.WB:rate 0.088054 # insts written-back per cycle -system.cpu.iew.WB:sent 20061 # cumulative count of insts sent to commit -system.cpu.iew.branchMispredicts 2593 # Number of branch mispredicts detected at execute +system.cpu.iew.WB:rate 0.088292 # insts written-back per cycle +system.cpu.iew.WB:sent 20063 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 2594 # Number of branch mispredicts detected at execute system.cpu.iew.iewBlockCycles 476 # Number of cycles IEW is blocking system.cpu.iew.iewDispLoadInsts 3250 # Number of dispatched load instructions system.cpu.iew.iewDispNonSpecInsts 617 # Number of dispatched non-speculative instructions -system.cpu.iew.iewDispSquashedInsts 2705 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispSquashedInsts 2694 # Number of squashed instructions skipped by dispatch system.cpu.iew.iewDispStoreInsts 2817 # Number of dispatched store instructions system.cpu.iew.iewDispatchedInsts 25240 # Number of instructions dispatched to IQ -system.cpu.iew.iewExecLoadInsts 2871 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 1780 # Number of squashed instructions skipped in execute -system.cpu.iew.iewExecutedInsts 20778 # Number of executed instructions +system.cpu.iew.iewExecLoadInsts 2872 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 1777 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 20781 # Number of executed instructions system.cpu.iew.iewIQFullEvents 7 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall @@ -262,7 +262,7 @@ system.cpu.iew.lsq.thread.0.rescheduledLoads 0 system.cpu.iew.lsq.thread.0.squashedLoads 1788 # Number of loads squashed system.cpu.iew.lsq.thread.0.squashedStores 1519 # Number of stores squashed system.cpu.iew.memOrderViolationEvents 54 # Number of memory order violations -system.cpu.iew.predictedNotTakenIncorrect 962 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedNotTakenIncorrect 963 # Number of branches that were predicted not taken incorrectly system.cpu.iew.predictedTakenIncorrect 1631 # Number of branches that were predicted taken incorrectly system.cpu.ipc 0.007723 # IPC: Instructions Per Cycle system.cpu.ipc_total 0.007723 # IPC: Total IPC of All Threads @@ -302,21 +302,21 @@ system.cpu.iq.ISSUE:fu_full.start_dist InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 224511 +system.cpu.iq.ISSUE:issued_per_cycle.samples 223928 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 215315 9590.40% - 1 4124 183.69% - 2 1297 57.77% - 3 1306 58.17% - 4 1190 53.00% - 5 707 31.49% - 6 433 19.29% - 7 83 3.70% - 8 56 2.49% + 0 214838 9594.07% + 1 3976 177.56% + 2 1244 55.55% + 3 1359 60.69% + 4 1316 58.77% + 5 612 27.33% + 6 444 19.83% + 7 83 3.71% + 8 56 2.50% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 0.100476 # Inst issue rate +system.cpu.iq.ISSUE:rate 0.100738 # Inst issue rate system.cpu.iq.iqInstsAdded 24623 # Number of instructions added to the IQ (excludes non-spec) system.cpu.iq.iqInstsIssued 22558 # Number of instructions issued system.cpu.iq.iqNonSpecInstsAdded 617 # Number of non-speculative instructions added to the IQ @@ -325,12 +325,12 @@ system.cpu.iq.iqSquashedInstsIssued 174 # Nu system.cpu.iq.iqSquashedNonSpecRemoved 290 # Number of squashed non-spec instructions that were removed system.cpu.iq.iqSquashedOperandsExamined 5834 # Number of squashed operands that are examined and possibly removed from graph system.cpu.l2cache.ReadReq_accesses 513 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 4754.779727 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2343.506823 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 2439202 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_avg_miss_latency 4755.715400 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2343.752437 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_miss_latency 2439682 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 513 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 1202219 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 1202345 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 513 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked @@ -342,29 +342,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs 0 # system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 513 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 4754.779727 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 2343.506823 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 4755.715400 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 2343.752437 # average overall mshr miss latency system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 2439202 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 2439682 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses system.cpu.l2cache.demand_misses 513 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 1202219 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 1202345 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 513 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 513 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 4754.779727 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 2343.506823 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 4755.715400 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 2343.752437 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 2439202 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 2439682 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses system.cpu.l2cache.overall_misses 513 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 1202219 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 1202345 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 513 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -381,28 +381,27 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 512 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 262.946375 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 262.945674 # Cycle average of tags in use system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 224511 # number of cpu cycles simulated +system.cpu.numCycles 223928 # number of cpu cycles simulated system.cpu.rename.RENAME:BlockCycles 960 # Number of cycles rename is blocking system.cpu.rename.RENAME:CommittedMaps 9868 # Number of HB maps that are committed system.cpu.rename.RENAME:IQFullEvents 2 # Number of times rename has blocked due to IQ full -system.cpu.rename.RENAME:IdleCycles 20098 # Number of cycles rename is idle -system.cpu.rename.RENAME:LSQFullEvents 481 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:IdleCycles 21302 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 411 # Number of times rename has blocked due to LSQ full system.cpu.rename.RENAME:ROBFullEvents 4 # Number of times rename has blocked due to ROB full system.cpu.rename.RENAME:RenameLookups 46931 # Number of register rename lookups that rename has made -system.cpu.rename.RENAME:RenamedInsts 31260 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedInsts 31249 # Number of instructions processed by rename system.cpu.rename.RENAME:RenamedOperands 25831 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 7921 # Number of cycles rename is running +system.cpu.rename.RENAME:RunCycles 7136 # Number of cycles rename is running system.cpu.rename.RENAME:SquashCycles 3162 # Number of cycles rename is squashing -system.cpu.rename.RENAME:SquashedInsts 8042 # Number of squashed instructions processed by rename -system.cpu.rename.RENAME:UnblockCycles 1212 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UnblockCycles 614 # Number of cycles rename is unblocking system.cpu.rename.RENAME:UndoneMaps 15963 # Number of HB maps that are undone due to squashing -system.cpu.rename.RENAME:serializeStallCycles 190573 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:serializeStallCycles 190754 # count of cycles rename stalled for serializing inst system.cpu.rename.RENAME:serializingInsts 638 # count of serializing insts renamed -system.cpu.rename.RENAME:skidInsts 5594 # count of insts added to the skid buffer +system.cpu.rename.RENAME:skidInsts 5529 # count of insts added to the skid buffer system.cpu.rename.RENAME:tempSerializingInsts 629 # count of temporary serializing insts renamed system.cpu.timesIdled 289 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload.PROG:num_syscalls 8 # Number of system calls diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr index 48affb0e2..7873672f2 100644 --- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr @@ -1,4 +1,3 @@ warn: More than two loadable segments in ELF object. warn: Ignoring segment @ 0x0 length 0x0. -0: system.remote_gdb.listener: listening for remote gdb on port 7003 warn: Entering event queue @ 0. Starting simulation... diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout index 6cba2ba7e..38b0c1787 100644 --- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout @@ -16,9 +16,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Apr 9 2007 03:06:26 -M5 started Mon Apr 9 03:06:54 2007 -M5 executing on zizzer.eecs.umich.edu +M5 compiled Apr 13 2007 13:56:34 +M5 started Fri Apr 13 13:56:35 2007 +M5 executing on ahchoo.blinky.homelinux.org command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/o3-timing tests/run.py quick/02.insttest/sparc/linux/o3-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 1421211 because target called exit() +Exiting @ tick 1421207 because target called exit() -- cgit v1.2.3 From 3140dd88bc588ea51aadeb2dd58d33cc9a40883a Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 14 Apr 2007 17:07:24 +0000 Subject: Make the fsr a serializing register. Other control registers probably need this as well. --HG-- extra : convert_revision : edd3f9a83cc2722b6e0eff0eff4a8e034b0f6ec6 --- src/arch/sparc/isa/operands.isa | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/sparc/isa/operands.isa b/src/arch/sparc/isa/operands.isa index 58d616a7a..110b37d15 100644 --- a/src/arch/sparc/isa/operands.isa +++ b/src/arch/sparc/isa/operands.isa @@ -187,7 +187,7 @@ def operands {{ 'Hver': ('ControlReg', 'udw', 'MISCREG_HVER', None, 74), 'StrandStsReg': ('ControlReg', 'udw', 'MISCREG_STRAND_STS_REG', None, 75), - 'Fsr': ('ControlReg', 'udw', 'MISCREG_FSR', None, 80), + 'Fsr': ('ControlReg', 'udw', 'MISCREG_FSR', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 80), # Mem gets a large number so it's always last 'Mem': ('Mem', 'udw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100) -- cgit v1.2.3 From 5a3dcc172a9fd661330909815b163eb6f4d6a2d8 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 14 Apr 2007 17:08:24 +0000 Subject: Make register indexes larger so they can actually hold all the legal values. Oops! --HG-- extra : convert_revision : 7689b2e1f7468e4acb8be0f242f74002c79e7960 --- src/arch/sparc/types.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/sparc/types.hh b/src/arch/sparc/types.hh index 15386adca..8bd50b7e8 100644 --- a/src/arch/sparc/types.hh +++ b/src/arch/sparc/types.hh @@ -59,7 +59,7 @@ namespace SparcISA typedef int RegContextVal; - typedef uint8_t RegIndex; + typedef uint16_t RegIndex; } #endif -- cgit v1.2.3 From c3081d9c1c36e1a08c173048783d191fa19463de Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 14 Apr 2007 17:13:18 +0000 Subject: Add support for microcode and pull out the special branch delay slot handling. Branch delay slots need to be squash on a mispredict as well because the nnpc they saw was incorrect. --HG-- extra : convert_revision : 8b9c603616bcad254417a7a3fa3edfb4c8728719 --- src/cpu/base_dyn_inst.hh | 57 +++++++++++++++++++++--- src/cpu/base_dyn_inst_impl.hh | 55 +++++++++++++++++++++-- src/cpu/o3/comm.hh | 21 +++++---- src/cpu/o3/commit.hh | 34 ++++++++++++--- src/cpu/o3/commit_impl.hh | 11 ++--- src/cpu/o3/cpu.cc | 30 ++++++++++++- src/cpu/o3/cpu.hh | 22 +++++++--- src/cpu/o3/decode_impl.hh | 4 +- src/cpu/o3/fetch.hh | 17 ++++---- src/cpu/o3/fetch_impl.hh | 91 +++++++++++++++++++++++++-------------- src/cpu/o3/iew_impl.hh | 1 + src/cpu/o3/rename_impl.hh | 6 ++- src/cpu/o3/sparc/dyn_inst.hh | 10 ++++- src/cpu/o3/sparc/dyn_inst_impl.hh | 17 +++++++- 14 files changed, 291 insertions(+), 85 deletions(-) diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index b02038b3e..1311e5cf2 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -209,6 +209,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** PC of this instruction. */ Addr PC; + /** Micro PC of this instruction. */ + Addr microPC; + protected: /** Next non-speculative PC. It is not filled in at fetch, but rather * once the target of the branch is truly known (either decode or @@ -219,12 +222,18 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Next non-speculative NPC. Target PC for Mips or Sparc. */ Addr nextNPC; + /** Next non-speculative micro PC. */ + Addr nextMicroPC; + /** Predicted next PC. */ Addr predPC; /** Predicted next NPC. */ Addr predNPC; + /** Predicted next microPC */ + Addr predMicroPC; + /** If this is a branch that was predicted taken */ bool predTaken; @@ -340,6 +349,17 @@ class BaseDynInst : public FastAlloc, public RefCounted { _flatDestRegIdx[idx] = flattened_dest; } + /** BaseDynInst constructor given a binary instruction. + * @param staticInst A StaticInstPtr to the underlying instruction. + * @param PC The PC of the instruction. + * @param pred_PC The predicted next PC. + * @param pred_NPC The predicted next NPC. + * @param seq_num The sequence number of the instruction. + * @param cpu Pointer to the instruction's CPU. + */ + BaseDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC, + Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC, + InstSeqNum seq_num, ImplCPU *cpu); /** BaseDynInst constructor given a binary instruction. * @param inst The binary instruction. @@ -349,8 +369,8 @@ class BaseDynInst : public FastAlloc, public RefCounted * @param seq_num The sequence number of the instruction. * @param cpu Pointer to the instruction's CPU. */ - BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, - Addr pred_PC, Addr pred_NPC, + BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC, + Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC, InstSeqNum seq_num, ImplCPU *cpu); /** BaseDynInst constructor given a StaticInst pointer. @@ -402,11 +422,18 @@ class BaseDynInst : public FastAlloc, public RefCounted #endif } + Addr readNextMicroPC() + { + return nextMicroPC; + } + /** Set the predicted target of this current instruction. */ - void setPredTarg(Addr predicted_PC, Addr predicted_NPC) + void setPredTarg(Addr predicted_PC, Addr predicted_NPC, + Addr predicted_MicroPC) { predPC = predicted_PC; predNPC = predicted_NPC; + predMicroPC = predicted_MicroPC; } /** Returns the predicted PC immediately after the branch. */ @@ -415,6 +442,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns the predicted PC two instructions after the branch */ Addr readPredNPC() { return predNPC; } + /** Returns the predicted micro PC after the branch */ + Addr readPredMicroPC() { return predMicroPC; } + /** Returns whether the instruction was predicted taken or not. */ bool readPredTaken() { @@ -430,7 +460,8 @@ class BaseDynInst : public FastAlloc, public RefCounted bool mispredicted() { return readPredPC() != readNextPC() || - readPredNPC() != readNextNPC(); + readPredNPC() != readNextNPC() || + readPredMicroPC() != readNextMicroPC(); } // @@ -467,6 +498,12 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isQuiesce() const { return staticInst->isQuiesce(); } bool isIprAccess() const { return staticInst->isIprAccess(); } bool isUnverifiable() const { return staticInst->isUnverifiable(); } + bool isMacroOp() const { return staticInst->isMacroOp(); } + bool isMicroOp() const { return staticInst->isMicroOp(); } + bool isDelayedCommit() const { return staticInst->isDelayedCommit(); } + bool isLastMicroOp() const { return staticInst->isLastMicroOp(); } + bool isFirstMicroOp() const { return staticInst->isFirstMicroOp(); } + bool isMicroBranch() const { return staticInst->isMicroBranch(); } /** Temporarily sets this instruction as a serialize before instruction. */ void setSerializeBefore() { status.set(SerializeBefore); } @@ -700,20 +737,28 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Read the PC of this instruction. */ const Addr readPC() const { return PC; } + /**Read the micro PC of this instruction. */ + const Addr readMicroPC() const { return microPC; } + /** Set the next PC of this instruction (its actual target). */ - void setNextPC(uint64_t val) + void setNextPC(Addr val) { nextPC = val; } /** Set the next NPC of this instruction (the target in Mips or Sparc).*/ - void setNextNPC(uint64_t val) + void setNextNPC(Addr val) { #if ISA_HAS_DELAY_SLOT nextNPC = val; #endif } + void setNextMicroPC(Addr val) + { + nextMicroPC = val; + } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index a1c866336..acf8af9cf 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -62,19 +62,66 @@ my_hash_t thishash; #endif template -BaseDynInst::BaseDynInst(TheISA::ExtMachInst machInst, +BaseDynInst::BaseDynInst(StaticInstPtr _staticInst, Addr inst_PC, Addr inst_NPC, + Addr inst_MicroPC, Addr pred_PC, Addr pred_NPC, + Addr pred_MicroPC, InstSeqNum seq_num, ImplCPU *cpu) - : staticInst(machInst), traceData(NULL), cpu(cpu) + : staticInst(_staticInst), traceData(NULL), cpu(cpu) { seqNum = seq_num; + bool nextIsMicro = + staticInst->isMicroOp() && !staticInst->isLastMicroOp(); + PC = inst_PC; - nextPC = inst_NPC; - nextNPC = nextPC + sizeof(TheISA::MachInst); + microPC = inst_MicroPC; + if (nextIsMicro) { + nextPC = inst_PC; + nextNPC = inst_NPC; + nextMicroPC = microPC + 1; + } else { + nextPC = inst_NPC; + nextNPC = nextPC + sizeof(TheISA::MachInst); + nextMicroPC = 0; + } + predPC = pred_PC; + predNPC = pred_NPC; + predMicroPC = pred_MicroPC; + predTaken = false; + + initVars(); +} + +template +BaseDynInst::BaseDynInst(TheISA::ExtMachInst inst, + Addr inst_PC, Addr inst_NPC, + Addr inst_MicroPC, + Addr pred_PC, Addr pred_NPC, + Addr pred_MicroPC, + InstSeqNum seq_num, ImplCPU *cpu) + : staticInst(inst), traceData(NULL), cpu(cpu) +{ + seqNum = seq_num; + + bool nextIsMicro = + staticInst->isMicroOp() && !staticInst->isLastMicroOp(); + + PC = inst_PC; + microPC = inst_MicroPC; + if (nextIsMicro) { + nextPC = inst_PC; + nextNPC = inst_NPC; + nextMicroPC = microPC + 1; + } else { + nextPC = inst_NPC; + nextNPC = nextPC + sizeof(TheISA::MachInst); + nextMicroPC = 0; + } predPC = pred_PC; predNPC = pred_NPC; + predMicroPC = pred_MicroPC; predTaken = false; initVars(); diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index 8d7bb95f4..fb772060b 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -87,9 +87,10 @@ struct DefaultIEWDefaultCommit { bool squash[Impl::MaxThreads]; bool branchMispredict[Impl::MaxThreads]; bool branchTaken[Impl::MaxThreads]; - uint64_t mispredPC[Impl::MaxThreads]; - uint64_t nextPC[Impl::MaxThreads]; - uint64_t nextNPC[Impl::MaxThreads]; + Addr mispredPC[Impl::MaxThreads]; + Addr nextPC[Impl::MaxThreads]; + Addr nextNPC[Impl::MaxThreads]; + Addr nextMicroPC[Impl::MaxThreads]; InstSeqNum squashedSeqNum[Impl::MaxThreads]; bool includeSquashInst[Impl::MaxThreads]; @@ -118,9 +119,10 @@ struct TimeBufStruct { // struct as it is used pretty frequently. bool branchMispredict; bool branchTaken; - uint64_t mispredPC; - uint64_t nextPC; - uint64_t nextNPC; + Addr mispredPC; + Addr nextPC; + Addr nextNPC; + Addr nextMicroPC; unsigned branchCount; }; @@ -158,9 +160,10 @@ struct TimeBufStruct { bool branchMispredict; bool branchTaken; - uint64_t mispredPC; - uint64_t nextPC; - uint64_t nextNPC; + Addr mispredPC; + Addr nextPC; + Addr nextNPC; + Addr nextMicroPC; // Represents the instruction that has either been retired or // squashed. Similar to having a single bus that broadcasts the diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index fba618c14..27bdd20c5 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -279,25 +279,37 @@ class DefaultCommit /** Returns the PC of the head instruction of the ROB. * @todo: Probably remove this function as it returns only thread 0. */ - uint64_t readPC() { return PC[0]; } + Addr readPC() { return PC[0]; } /** Returns the PC of a specific thread. */ - uint64_t readPC(unsigned tid) { return PC[tid]; } + Addr readPC(unsigned tid) { return PC[tid]; } /** Sets the PC of a specific thread. */ - void setPC(uint64_t val, unsigned tid) { PC[tid] = val; } + void setPC(Addr val, unsigned tid) { PC[tid] = val; } + + /** Reads the micro PC of a specific thread. */ + Addr readMicroPC(unsigned tid) { return microPC[tid]; } + + /** Sets the micro PC of a specific thread */ + void setMicroPC(Addr val, unsigned tid) { microPC[tid] = val; } /** Reads the next PC of a specific thread. */ - uint64_t readNextPC(unsigned tid) { return nextPC[tid]; } + Addr readNextPC(unsigned tid) { return nextPC[tid]; } /** Sets the next PC of a specific thread. */ - void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } + void setNextPC(Addr val, unsigned tid) { nextPC[tid] = val; } /** Reads the next NPC of a specific thread. */ - uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; } + Addr readNextNPC(unsigned tid) { return nextNPC[tid]; } /** Sets the next NPC of a specific thread. */ - void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } + void setNextNPC(Addr val, unsigned tid) { nextNPC[tid] = val; } + + /** Reads the micro PC of a specific thread. */ + Addr readNextMicroPC(unsigned tid) { return nextMicroPC[tid]; } + + /** Sets the micro PC of a specific thread */ + void setNextMicroPC(Addr val, unsigned tid) { nextMicroPC[tid] = val; } private: /** Time buffer interface. */ @@ -402,12 +414,20 @@ class DefaultCommit */ Addr PC[Impl::MaxThreads]; + /** The commit micro PC of each thread. Refers to the instruction that + * is currently being processed/committed. + */ + Addr microPC[Impl::MaxThreads]; + /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; /** The next NPC of each thread. */ Addr nextNPC[Impl::MaxThreads]; + /** The next micro PC of each thread. */ + Addr nextMicroPC[Impl::MaxThreads]; + /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 9dd5ed291..fc24d7edc 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -124,7 +124,7 @@ DefaultCommit::DefaultCommit(O3CPU *_cpu, Params *params) committedStores[i] = false; trapSquash[i] = false; tcSquash[i] = false; - PC[i] = nextPC[i] = nextNPC[i] = 0; + microPC[i] = nextMicroPC[i] = PC[i] = nextPC[i] = nextNPC[i] = 0; } #if FULL_SYSTEM interrupt = NoFault; @@ -508,6 +508,7 @@ DefaultCommit::squashAll(unsigned tid) toIEW->commitInfo[tid].nextPC = PC[tid]; toIEW->commitInfo[tid].nextNPC = nextPC[tid]; + toIEW->commitInfo[tid].nextMicroPC = nextMicroPC[tid]; } template @@ -768,6 +769,7 @@ DefaultCommit::commit() toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid]; toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid]; + toIEW->commitInfo[tid].nextMicroPC = fromIEW->nextMicroPC[tid]; toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid]; @@ -877,6 +879,7 @@ DefaultCommit::commitInsts() PC[tid] = head_inst->readPC(); nextPC[tid] = head_inst->readNextPC(); nextNPC[tid] = head_inst->readNextNPC(); + nextMicroPC[tid] = head_inst->readNextMicroPC(); // Increment the total number of non-speculative instructions // executed. @@ -905,12 +908,10 @@ DefaultCommit::commitInsts() } PC[tid] = nextPC[tid]; -#if ISA_HAS_DELAY_SLOT nextPC[tid] = nextNPC[tid]; nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst); -#else - nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst); -#endif + microPC[tid] = nextMicroPC[tid]; + nextMicroPC[tid] = microPC[tid] + 1; #if FULL_SYSTEM int count = 0; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index b2b4645d2..59978a065 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -696,7 +696,7 @@ FullO3CPU::removeThread(unsigned tid) // Squash Throughout Pipeline InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; - fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, tid); + fetch.squash(0, sizeof(TheISA::MachInst), 0, squash_seq_num, tid); decode.squash(tid); rename.squash(squash_seq_num, tid); iew.squash(tid); @@ -1150,6 +1150,20 @@ FullO3CPU::setPC(Addr new_PC,unsigned tid) commit.setPC(new_PC, tid); } +template +uint64_t +FullO3CPU::readMicroPC(unsigned tid) +{ + return commit.readMicroPC(tid); +} + +template +void +FullO3CPU::setMicroPC(Addr new_PC,unsigned tid) +{ + commit.setMicroPC(new_PC, tid); +} + template uint64_t FullO3CPU::readNextPC(unsigned tid) @@ -1178,6 +1192,20 @@ FullO3CPU::setNextNPC(uint64_t val,unsigned tid) commit.setNextNPC(val, tid); } +template +uint64_t +FullO3CPU::readNextMicroPC(unsigned tid) +{ + return commit.readNextMicroPC(tid); +} + +template +void +FullO3CPU::setNextMicroPC(Addr new_PC,unsigned tid) +{ + commit.setNextMicroPC(new_PC, tid); +} + template typename FullO3CPU::ListIt FullO3CPU::addInst(DynInstPtr &inst) diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 4b247e6e3..bff78bf9e 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -433,22 +433,34 @@ class FullO3CPU : public BaseO3CPU void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid); /** Reads the commit PC of a specific thread. */ - uint64_t readPC(unsigned tid); + Addr readPC(unsigned tid); /** Sets the commit PC of a specific thread. */ void setPC(Addr new_PC, unsigned tid); + /** Reads the commit micro PC of a specific thread. */ + Addr readMicroPC(unsigned tid); + + /** Sets the commmit micro PC of a specific thread. */ + void setMicroPC(Addr new_microPC, unsigned tid); + /** Reads the next PC of a specific thread. */ - uint64_t readNextPC(unsigned tid); + Addr readNextPC(unsigned tid); /** Sets the next PC of a specific thread. */ - void setNextPC(uint64_t val, unsigned tid); + void setNextPC(Addr val, unsigned tid); /** Reads the next NPC of a specific thread. */ - uint64_t readNextNPC(unsigned tid); + Addr readNextNPC(unsigned tid); /** Sets the next NPC of a specific thread. */ - void setNextNPC(uint64_t val, unsigned tid); + void setNextNPC(Addr val, unsigned tid); + + /** Reads the commit next micro PC of a specific thread. */ + Addr readNextMicroPC(unsigned tid); + + /** Sets the commit next micro PC of a specific thread. */ + void setNextMicroPC(Addr val, unsigned tid); /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index c9d0a1885..ce6738456 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -273,6 +273,7 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) ///explicitly for ISAs with delay slots. toFetch->decodeInfo[tid].nextNPC = inst->branchTarget() + sizeof(TheISA::MachInst); + toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC(); #if ISA_HAS_DELAY_SLOT toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() != (inst->readNextPC() + sizeof(TheISA::MachInst)); @@ -735,7 +736,8 @@ DefaultDecode::decodeInsts(unsigned tid) // a check at the end squash(inst, inst->threadNumber); Addr target = inst->branchTarget(); - inst->setPredTarg(target, target + sizeof(TheISA::MachInst)); + //The micro pc after an instruction level branch should be 0 + inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0); break; } } diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index bb0057e7c..7645a226c 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -227,7 +227,7 @@ class DefaultFetch * @param next_NPC Used for ISAs which use delay slots. * @return Whether or not a branch was predicted as taken. */ - bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC); + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC, Addr &next_MicroPC); /** * Fetches the cache line that contains fetch_PC. Returns any @@ -242,12 +242,14 @@ class DefaultFetch bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid); /** Squashes a specific thread and resets the PC. */ - inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid); + inline void doSquash(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, unsigned tid); /** Squashes a specific thread and resets the PC. Also tells the CPU to * remove any instructions between fetch and decode that should be sqaushed. */ void squashFromDecode(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, const InstSeqNum &seq_num, unsigned tid); /** Checks if a thread is stalled. */ @@ -263,6 +265,7 @@ class DefaultFetch * squash should be the commit stage. */ void squash(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, const InstSeqNum &seq_num, unsigned tid); /** Ticks the fetch stage, processing all inputs signals and fetching @@ -346,16 +349,12 @@ class DefaultFetch /** Per-thread fetch PC. */ Addr PC[Impl::MaxThreads]; + /** Per-thread fetch micro PC. */ + Addr microPC[Impl::MaxThreads]; + /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; - /** Per-thread next Next PC. - * This is not a real register but is used for - * architectures that use a branch-delay slot. - * (such as MIPS or Sparc) - */ - Addr nextNPC[Impl::MaxThreads]; - /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 25498c7f3..d1f38e38b 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -312,7 +312,7 @@ DefaultFetch::initStage() for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); - nextNPC[tid] = cpu->readNextNPC(tid); + microPC[tid] = cpu->readMicroPC(tid); } for (int tid=0; tid < numThreads; tid++) { @@ -439,11 +439,7 @@ DefaultFetch::takeOverFrom() stalls[i].commit = 0; PC[i] = cpu->readPC(i); nextPC[i] = cpu->readNextPC(i); -#if ISA_HAS_DELAY_SLOT - nextNPC[i] = cpu->readNextNPC(i); -#else - nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst); -#endif + microPC[i] = cpu->readMicroPC(i); fetchStatus[i] = Running; } numInst = 0; @@ -493,7 +489,7 @@ DefaultFetch::switchToInactive() template bool DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, - Addr &next_NPC) + Addr &next_NPC, Addr &next_MicroPC) { // Do branch prediction check here. // A bit of a misnomer...next_PC is actually the current PC until @@ -501,13 +497,22 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, bool predict_taken; if (!inst->isControl()) { - next_PC = next_NPC; - next_NPC = next_NPC + instSize; - inst->setPredTarg(next_PC, next_NPC); + if (inst->isMicroOp() && !inst->isLastMicroOp()) { + next_MicroPC++; + } else { + next_PC = next_NPC; + next_NPC = next_NPC + instSize; + next_MicroPC = 0; + } + inst->setPredTarg(next_PC, next_NPC, next_MicroPC); inst->setPredTaken(false); return false; } + //Assume for now that all control flow is to a different macroop which + //would reset the micro pc to 0. + next_MicroPC = 0; + int tid = inst->threadNumber; Addr pred_PC = next_PC; predict_taken = branchPred.predict(inst, pred_PC, tid); @@ -534,7 +539,7 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, #endif /* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n", tid, next_PC, next_NPC);*/ - inst->setPredTarg(next_PC, next_NPC); + inst->setPredTarg(next_PC, next_NPC, next_MicroPC); inst->setPredTaken(predict_taken); ++fetchedBranches; @@ -658,14 +663,14 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid template inline void DefaultFetch::doSquash(const Addr &new_PC, - const Addr &new_NPC, unsigned tid) + const Addr &new_NPC, const Addr &new_microPC, unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n", tid, new_PC, new_NPC); PC[tid] = new_PC; nextPC[tid] = new_NPC; - nextNPC[tid] = new_NPC + instSize; + microPC[tid] = new_microPC; // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -693,12 +698,12 @@ DefaultFetch::doSquash(const Addr &new_PC, template void DefaultFetch::squashFromDecode(const Addr &new_PC, const Addr &new_NPC, - const InstSeqNum &seq_num, - unsigned tid) + const Addr &new_MicroPC, + const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); - doSquash(new_PC, new_NPC, tid); + doSquash(new_PC, new_NPC, new_MicroPC, tid); // Tell the CPU to remove any instructions that are in flight between // fetch and decode. @@ -774,11 +779,12 @@ DefaultFetch::updateFetchStatus() template void DefaultFetch::squash(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); - doSquash(new_PC, new_NPC, tid); + doSquash(new_PC, new_NPC, new_MicroPC, tid); // Tell the CPU to remove any instructions that are not in the ROB. cpu->removeInstsNotInROB(tid); @@ -893,6 +899,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) // In any case, squash. squash(fromCommit->commitInfo[tid].nextPC, fromCommit->commitInfo[tid].nextNPC, + fromCommit->commitInfo[tid].nextMicroPC, fromCommit->commitInfo[tid].doneSeqNum, tid); @@ -948,6 +955,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].nextNPC, + fromDecode->decodeInfo[tid].nextMicroPC, fromDecode->decodeInfo[tid].doneSeqNum, tid); @@ -1002,9 +1010,9 @@ DefaultFetch::fetch(bool &status_change) DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); // The current PC. - Addr &fetch_PC = PC[tid]; - - Addr &fetch_NPC = nextPC[tid]; + Addr fetch_PC = PC[tid]; + Addr fetch_NPC = nextPC[tid]; + Addr fetch_MicroPC = microPC[tid]; // Fault code for memory access. Fault fault = NoFault; @@ -1063,6 +1071,7 @@ DefaultFetch::fetch(bool &status_change) Addr next_PC = fetch_PC; Addr next_NPC = fetch_NPC; + Addr next_MicroPC = fetch_MicroPC; InstSeqNum inst_seq; MachInst inst; @@ -1070,6 +1079,9 @@ DefaultFetch::fetch(bool &status_change) // @todo: Fix this hack. unsigned offset = (fetch_PC & cacheBlkMask) & ~3; + StaticInstPtr staticInst = NULL; + StaticInstPtr macroop = NULL; + if (fault == NoFault) { // If the read of the first instruction was successful, then grab the // instructions from the rest of the cache line and put them into the @@ -1104,19 +1116,29 @@ DefaultFetch::fetch(bool &status_change) // Make sure this is a valid index. assert(offset <= cacheBlkSize - instSize); - // Get the instruction from the array of the cache line. - inst = TheISA::gtoh(*reinterpret_cast - (&cacheData[tid][offset])); + if (!macroop) { + // Get the instruction from the array of the cache line. + inst = TheISA::gtoh(*reinterpret_cast + (&cacheData[tid][offset])); - predecoder.setTC(cpu->thread[tid]->getTC()); - predecoder.moreBytes(fetch_PC, 0, inst); + predecoder.setTC(cpu->thread[tid]->getTC()); + predecoder.moreBytes(fetch_PC, 0, inst); - ext_inst = predecoder.getExtMachInst(); + ext_inst = predecoder.getExtMachInst(); + staticInst = StaticInstPtr(ext_inst); + if (staticInst->isMacroOp()) + macroop = staticInst; + } + if (macroop) { + staticInst = macroop->fetchMicroOp(fetch_MicroPC); + if (staticInst->isLastMicroOp()) + macroop = NULL; + } // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(ext_inst, - fetch_PC, fetch_NPC, - next_PC, next_NPC, + DynInstPtr instruction = new DynInst(staticInst, + fetch_PC, fetch_NPC, fetch_MicroPC, + next_PC, next_NPC, next_MicroPC, inst_seq, cpu); instruction->setTid(tid); @@ -1139,7 +1161,7 @@ DefaultFetch::fetch(bool &status_change) instruction->readPC()); ///FIXME This needs to be more robust in dealing with delay slots - lookupAndUpdateNextPC(instruction, next_PC, next_NPC); + lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); predicted_branch |= (next_PC != fetch_NPC); // Add instruction to the CPU's list of instructions. @@ -1157,6 +1179,7 @@ DefaultFetch::fetch(bool &status_change) // Move to the next instruction, unless we have a branch. fetch_PC = next_PC; fetch_NPC = next_NPC; + fetch_MicroPC = next_MicroPC; if (instruction->isQuiesce()) { DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", @@ -1167,7 +1190,8 @@ DefaultFetch::fetch(bool &status_change) break; } - offset += instSize; + if (!macroop) + offset += instSize; } if (offset >= cacheBlkSize) { @@ -1191,7 +1215,7 @@ DefaultFetch::fetch(bool &status_change) if (fault == NoFault) { PC[tid] = next_PC; nextPC[tid] = next_NPC; - nextNPC[tid] = next_NPC + instSize; + microPC[tid] = next_MicroPC; DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC); } else { // We shouldn't be in an icache miss and also have a fault (an ITB @@ -1210,8 +1234,9 @@ DefaultFetch::fetch(bool &status_change) // We will use a nop in order to carry the fault. ext_inst = TheISA::NoopMachInst; + StaticInstPtr staticInst = new StaticInst(ext_inst); // Create a new DynInst from the dummy nop. - DynInstPtr instruction = new DynInst(ext_inst, + DynInstPtr instruction = new DynInst(staticInst, fetch_PC, fetch_NPC, next_PC, next_NPC, inst_seq, cpu); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 050785818..399c44909 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -454,6 +454,7 @@ DefaultIEW::squashDueToBranch(DynInstPtr &inst, unsigned tid) #endif toCommit->nextPC[tid] = inst->readNextPC(); toCommit->nextNPC[tid] = inst->readNextNPC(); + toCommit->nextMicroPC[tid] = inst->readNextMicroPC(); toCommit->includeSquashInst[tid] = false; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 6e7180b1e..d78de2c87 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -963,6 +963,7 @@ DefaultRename::renameSrcRegs(DynInstPtr &inst,unsigned tid) // Floating point and Miscellaneous registers need their indexes // adjusted to account for the expanded number of flattened int regs. flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; + DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", src_reg, flat_src_reg); } inst->flattenSrcReg(src_idx, flat_src_reg); @@ -979,9 +980,11 @@ DefaultRename::renameSrcRegs(DynInstPtr &inst,unsigned tid) // See if the register is ready or not. if (scoreboard->getReg(renamed_reg) == true) { - DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid); + DPRINTF(Rename, "[tid:%u]: Register %d is ready.\n", tid, renamed_reg); inst->markSrcRegReady(src_idx); + } else { + DPRINTF(Rename, "[tid:%u]: Register %d is not ready.\n", tid, renamed_reg); } ++renameRenameLookups; @@ -1008,6 +1011,7 @@ DefaultRename::renameDestRegs(DynInstPtr &inst,unsigned tid) // Floating point and Miscellaneous registers need their indexes // adjusted to account for the expanded number of flattened int regs. flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; + DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", dest_reg, flat_dest_reg); } inst->flattenDestReg(dest_idx, flat_dest_reg); diff --git a/src/cpu/o3/sparc/dyn_inst.hh b/src/cpu/o3/sparc/dyn_inst.hh index 72242b161..a7ab6cd79 100644 --- a/src/cpu/o3/sparc/dyn_inst.hh +++ b/src/cpu/o3/sparc/dyn_inst.hh @@ -56,8 +56,14 @@ class SparcDynInst : public BaseDynInst public: /** BaseDynInst constructor given a binary instruction. */ - SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, - Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu); + SparcDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu); + + /** BaseDynInst constructor given a binary instruction. */ + SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ SparcDynInst(StaticInstPtr &_staticInst); diff --git a/src/cpu/o3/sparc/dyn_inst_impl.hh b/src/cpu/o3/sparc/dyn_inst_impl.hh index c4d30b6f4..6bfe97717 100644 --- a/src/cpu/o3/sparc/dyn_inst_impl.hh +++ b/src/cpu/o3/sparc/dyn_inst_impl.hh @@ -30,11 +30,24 @@ #include "cpu/o3/sparc/dyn_inst.hh" +template +SparcDynInst::SparcDynInst(StaticInstPtr staticInst, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst(staticInst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) +{ + initVars(); +} + template SparcDynInst::SparcDynInst(TheISA::ExtMachInst inst, - Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu) - : BaseDynInst(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu) + : BaseDynInst(inst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) { initVars(); } -- cgit v1.2.3 From 308b2f0ce3215eaaed69da937555008f9ed36835 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 15 Apr 2007 21:51:05 +0000 Subject: Add extra constructors to Alpha and MIPS --HG-- extra : convert_revision : 26ea87bfe9e5c27134eb9a15bf9e4629afae6c69 --- src/cpu/o3/alpha/dyn_inst.hh | 9 +++++++-- src/cpu/o3/alpha/dyn_inst_impl.hh | 19 +++++++++++++++++-- src/cpu/o3/mips/dyn_inst.hh | 10 ++++++++-- src/cpu/o3/mips/dyn_inst_impl.hh | 18 +++++++++++++++--- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/cpu/o3/alpha/dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh index 20759d849..a6fb7b885 100644 --- a/src/cpu/o3/alpha/dyn_inst.hh +++ b/src/cpu/o3/alpha/dyn_inst.hh @@ -73,8 +73,13 @@ class AlphaDynInst : public BaseDynInst public: /** BaseDynInst constructor given a binary instruction. */ - AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC, - Addr Pred_PC, Addr Pred_NPC, + AlphaDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu); + + /** BaseDynInst constructor given a binary instruction. */ + AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ diff --git a/src/cpu/o3/alpha/dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh index fdce1ade5..6dfe0ccdd 100644 --- a/src/cpu/o3/alpha/dyn_inst_impl.hh +++ b/src/cpu/o3/alpha/dyn_inst_impl.hh @@ -31,10 +31,25 @@ #include "cpu/o3/alpha/dyn_inst.hh" template -AlphaDynInst::AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC, +AlphaDynInst::AlphaDynInst(StaticInstPtr staticInst, + Addr PC, Addr NPC, Addr microPC, Addr Pred_PC, Addr Pred_NPC, + Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu) - : BaseDynInst(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu) + : BaseDynInst(staticInst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) +{ + initVars(); +} + +template +AlphaDynInst::AlphaDynInst(ExtMachInst inst, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, + Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst(inst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) { initVars(); } diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh index 366b4bb23..cf78c0941 100755 --- a/src/cpu/o3/mips/dyn_inst.hh +++ b/src/cpu/o3/mips/dyn_inst.hh @@ -69,10 +69,16 @@ class MipsDynInst : public BaseDynInst }; public: + /** BaseDynInst constructor given a binary instruction. */ + MipsDynInst(StaticInstPtr staticInst, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu); + /** BaseDynInst constructor given a binary instruction. */ MipsDynInst(ExtMachInst inst, - Addr PC, Addr NPC, - Addr Pred_PC, Addr Pred_NPC, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh index c0f9ae771..7e8697b32 100755 --- a/src/cpu/o3/mips/dyn_inst_impl.hh +++ b/src/cpu/o3/mips/dyn_inst_impl.hh @@ -30,12 +30,24 @@ #include "cpu/o3/mips/dyn_inst.hh" +template +MipsDynInst::MipsDynInst(StaticInstPtr staticInst, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst(staticInst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) +{ + initVars(); +} + template MipsDynInst::MipsDynInst(ExtMachInst inst, - Addr PC, Addr NPC, - Addr Pred_PC, Addr Pred_NPC, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu) - : BaseDynInst(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu) + : BaseDynInst(inst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) { initVars(); } -- cgit v1.2.3 From 8248af53b19a633ae6d9aa8cd6b5a12cfa3b1644 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 15 Apr 2007 21:52:38 +0000 Subject: Make an inner loop which pulls microops out of macroops. These aren't checked for control flow because we can pull out microops until we run out of buffer. This prevents microops from being interpretted as branches because the pc doesn't become npc. --HG-- extra : convert_revision : 9fff7c6c32900692bbc567ecb75701c9c73da259 --- src/cpu/o3/fetch_impl.hh | 127 ++++++++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 63 deletions(-) diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index d1f38e38b..3ae7bc402 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -1094,11 +1094,9 @@ DefaultFetch::fetch(bool &status_change) // ended this fetch block. bool predicted_branch = false; - for (; - offset < cacheBlkSize && - numInst < fetchWidth && - !predicted_branch; - ++numInst) { + while (offset < cacheBlkSize && + numInst < fetchWidth && + !predicted_branch) { // If we're branching after this instruction, quite fetching // from the same block then. @@ -1109,10 +1107,6 @@ DefaultFetch::fetch(bool &status_change) fetch_PC, fetch_NPC); } - - // Get a sequence number. - inst_seq = cpu->getAndIncrementInstSeq(); - // Make sure this is a valid index. assert(offset <= cacheBlkSize - instSize); @@ -1129,80 +1123,87 @@ DefaultFetch::fetch(bool &status_change) if (staticInst->isMacroOp()) macroop = staticInst; } - if (macroop) { - staticInst = macroop->fetchMicroOp(fetch_MicroPC); - if (staticInst->isLastMicroOp()) - macroop = NULL; - } + do { + if (macroop) { + staticInst = macroop->fetchMicroOp(fetch_MicroPC); + if (staticInst->isLastMicroOp()) + macroop = NULL; + } - // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(staticInst, - fetch_PC, fetch_NPC, fetch_MicroPC, - next_PC, next_NPC, next_MicroPC, - inst_seq, cpu); - instruction->setTid(tid); + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); - instruction->setASID(tid); + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(staticInst, + fetch_PC, fetch_NPC, fetch_MicroPC, + next_PC, next_NPC, next_MicroPC, + inst_seq, cpu); + instruction->setTid(tid); - instruction->setThreadState(cpu->thread[tid]); + instruction->setASID(tid); - DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " - "[sn:%lli]\n", - tid, instruction->readPC(), inst_seq); + instruction->setThreadState(cpu->thread[tid]); - //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst); + DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " + "[sn:%lli]\n", + tid, instruction->readPC(), inst_seq); - DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", - tid, instruction->staticInst->disassemble(fetch_PC)); + //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst); - instruction->traceData = - Trace::getInstRecord(curTick, cpu->tcBase(tid), - instruction->staticInst, - instruction->readPC()); + DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", + tid, instruction->staticInst->disassemble(fetch_PC)); - ///FIXME This needs to be more robust in dealing with delay slots - lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); - predicted_branch |= (next_PC != fetch_NPC); + instruction->traceData = + Trace::getInstRecord(curTick, cpu->tcBase(tid), + instruction->staticInst, + instruction->readPC()); - // Add instruction to the CPU's list of instructions. - instruction->setInstListIt(cpu->addInst(instruction)); + ///FIXME This needs to be more robust in dealing with delay slots + predicted_branch |= + lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); - // Write the instruction to the first slot in the queue - // that heads to decode. - toDecode->insts[numInst] = instruction; + // Add instruction to the CPU's list of instructions. + instruction->setInstListIt(cpu->addInst(instruction)); - toDecode->size++; + // Write the instruction to the first slot in the queue + // that heads to decode. + toDecode->insts[numInst] = instruction; - // Increment stat of fetched instructions. - ++fetchedInsts; + toDecode->size++; - // Move to the next instruction, unless we have a branch. - fetch_PC = next_PC; - fetch_NPC = next_NPC; - fetch_MicroPC = next_MicroPC; + // Increment stat of fetched instructions. + ++fetchedInsts; - if (instruction->isQuiesce()) { - DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", - curTick); - fetchStatus[tid] = QuiescePending; - ++numInst; - status_change = true; - break; - } + // Move to the next instruction, unless we have a branch. + fetch_PC = next_PC; + fetch_NPC = next_NPC; + fetch_MicroPC = next_MicroPC; + + if (instruction->isQuiesce()) { + DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", + curTick); + fetchStatus[tid] = QuiescePending; + ++numInst; + status_change = true; + break; + } - if (!macroop) - offset += instSize; + ++numInst; + } while (staticInst->isMicroOp() && + !staticInst->isLastMicroOp() && + numInst < fetchWidth); + offset += instSize; } - if (offset >= cacheBlkSize) { - DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " - "block.\n", tid); + if (predicted_branch) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " + "instruction encountered.\n", tid); } else if (numInst >= fetchWidth) { DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " "for this cycle.\n", tid); - } else if (predicted_branch) { - DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " - "instruction encountered.\n", tid); + } else if (offset >= cacheBlkSize) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " + "block.\n", tid); } } -- cgit v1.2.3 From cea543576082ed860e8dae17519ace48e5b2c78a Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 22 Apr 2007 17:43:45 +0000 Subject: Make the GSR into a renamed control register. It should be split into a renamed part and a control part for the different bitfields, but the renamed part is all that's actually used. --HG-- extra : convert_revision : ffeb4f874bd4430255064f6e8bcb135309932ff8 --- src/arch/sparc/isa/operands.isa | 3 ++- src/arch/sparc/isa_traits.hh | 2 +- src/arch/sparc/sparc_traits.hh | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/arch/sparc/isa/operands.isa b/src/arch/sparc/isa/operands.isa index 110b37d15..a627a2e6f 100644 --- a/src/arch/sparc/isa/operands.isa +++ b/src/arch/sparc/isa/operands.isa @@ -149,7 +149,8 @@ def operands {{ 'Fprs': ('ControlReg', 'udw', 'MISCREG_FPRS', None, 43), 'Pcr': ('ControlReg', 'udw', 'MISCREG_PCR', None, 44), 'Pic': ('ControlReg', 'udw', 'MISCREG_PIC', None, 45), - 'Gsr': ('ControlReg', 'udw', 'MISCREG_GSR', None, 46), +# 'Gsr': ('ControlReg', 'udw', 'MISCREG_GSR', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 46), + 'Gsr': ('IntReg', 'udw', 'NumIntArchRegs + 8', None, 46), 'Softint': ('ControlReg', 'udw', 'MISCREG_SOFTINT', None, 47), 'SoftintSet': ('ControlReg', 'udw', 'MISCREG_SOFTINT_SET', None, 48), 'SoftintClr': ('ControlReg', 'udw', 'MISCREG_SOFTINT_CLR', None, 49), diff --git a/src/arch/sparc/isa_traits.hh b/src/arch/sparc/isa_traits.hh index d0b8827f3..8b3ec36a6 100644 --- a/src/arch/sparc/isa_traits.hh +++ b/src/arch/sparc/isa_traits.hh @@ -58,7 +58,7 @@ namespace SparcISA // These enumerate all the registers for dependence tracking. enum DependenceTags { - FP_Base_DepTag = 32*3+8, + FP_Base_DepTag = 32*3+9, Ctrl_Base_DepTag = FP_Base_DepTag + 64 }; diff --git a/src/arch/sparc/sparc_traits.hh b/src/arch/sparc/sparc_traits.hh index d89ec1119..715c08c03 100644 --- a/src/arch/sparc/sparc_traits.hh +++ b/src/arch/sparc/sparc_traits.hh @@ -42,7 +42,7 @@ namespace SparcISA // Number of register windows, can legally be 3 to 32 const int NWindows = 8; //const int NumMicroIntRegs = 1; - const int NumMicroIntRegs = 8; + const int NumMicroIntRegs = 9; // const int NumRegularIntRegs = MaxGL * 8 + NWindows * 16; // const int NumMicroIntRegs = 1; -- cgit v1.2.3 From acc62514b1a4244182a7e5fad8ca03505389d94d Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 22 Apr 2007 17:50:43 +0000 Subject: Make the floating point zero register special handling only apply for ALPHA. --HG-- extra : convert_revision : 4f393a5471656b29cecbacfcb337992239775915 --- src/cpu/o3/free_list.hh | 2 ++ src/cpu/o3/regfile.hh | 4 ++++ src/cpu/o3/rename_map.cc | 4 ++++ src/cpu/o3/scoreboard.cc | 15 +++++++++++++++ 4 files changed, 25 insertions(+) diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index c669b0b34..42fc0c533 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -168,7 +168,9 @@ SimpleFreeList::addReg(PhysRegIndex freed_reg) if (freed_reg != TheISA::ZeroReg) freeIntRegs.push(freed_reg); } else if (freed_reg < numPhysicalRegs) { +#if THE_ISA == ALPHA_ISA if (freed_reg != (TheISA::ZeroReg + numPhysicalIntRegs)) +#endif freeFloatRegs.push(freed_reg); } } diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index b5b1cd021..75d3fa6eb 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -179,7 +179,9 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); +#if THE_ISA == ALPHA_ISA if (reg_idx != TheISA::ZeroReg) +#endif floatRegFile[reg_idx].d = val; } @@ -194,7 +196,9 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); +#if THE_ISA == ALPHA_ISA if (reg_idx != TheISA::ZeroReg) +#endif floatRegFile[reg_idx].d = val; } diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index b436ec1c3..e6649ce3e 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -165,17 +165,21 @@ SimpleRenameMap::rename(RegIndex arch_reg) // If it's not referencing the zero register, then rename the // register. +#if THE_ISA == ALPHA_ISA if (arch_reg != floatZeroReg) { +#endif renamed_reg = freeList->getFloatReg(); floatRenameMap[arch_reg].physical_reg = renamed_reg; assert(renamed_reg < numPhysicalRegs && renamed_reg >= numPhysicalIntRegs); +#if THE_ISA == ALPHA_ISA } else { // Otherwise return the zero register so nothing bad happens. renamed_reg = floatZeroReg; } +#endif } else { // Subtract off the base offset for miscellaneous registers. arch_reg = arch_reg - numLogicalRegs; diff --git a/src/cpu/o3/scoreboard.cc b/src/cpu/o3/scoreboard.cc index 1859b35a4..e7f8b7949 100644 --- a/src/cpu/o3/scoreboard.cc +++ b/src/cpu/o3/scoreboard.cc @@ -29,6 +29,7 @@ * Kevin Lim */ +#include "arch/isa_specific.hh" #include "cpu/o3/scoreboard.hh" Scoreboard::Scoreboard(unsigned activeThreads, @@ -79,11 +80,18 @@ Scoreboard::name() const bool Scoreboard::getReg(PhysRegIndex phys_reg) { +#if THE_ISA == ALPHA_ISA // Always ready if int or fp zero reg. if (phys_reg == zeroRegIdx || phys_reg == (zeroRegIdx + numPhysicalIntRegs)) { return 1; } +#else + // Always ready if int zero reg. + if (phys_reg == zeroRegIdx) { + return 1; + } +#endif return regScoreBoard[phys_reg]; } @@ -99,11 +107,18 @@ Scoreboard::setReg(PhysRegIndex phys_reg) void Scoreboard::unsetReg(PhysRegIndex ready_reg) { +#if THE_ISA == ALPHA_ISA if (ready_reg == zeroRegIdx || ready_reg == (zeroRegIdx + numPhysicalIntRegs)) { // Don't do anything if int or fp zero reg. return; } +#else + if (ready_reg == zeroRegIdx) { + // Don't do anything if int zero reg. + return; + } +#endif regScoreBoard[ready_reg] = 0; } -- cgit v1.2.3 From f0929006965514982603fe58ebc3211acf021cce Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 22 Apr 2007 19:30:47 +0000 Subject: Change mcf to use smred inputs so it doesn't take two days to run in o3. --HG-- extra : convert_revision : 6bf58f5ea12afc56dff6237640fbeded58b9951e --- tests/long/10.mcf/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/long/10.mcf/test.py b/tests/long/10.mcf/test.py index ffe2758f1..cb8acff22 100644 --- a/tests/long/10.mcf/test.py +++ b/tests/long/10.mcf/test.py @@ -29,5 +29,5 @@ m5.AddToPath('../configs/common') from cpu2000 import mcf -workload = mcf(isa, opsys, 'lgred') +workload = mcf(isa, opsys, 'smred') root.system.cpu.workload = workload.makeLiveProcess() -- cgit v1.2.3 -- cgit v1.2.3 From dc1c9e03007f084caabc995b41616603e0a004dc Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 8 May 2007 13:02:19 +0000 Subject: Add a hack to truncate addresses to 32 bits in SE. Paging should be changed to use the architecture's TLB, at which point this can be removed. --HG-- extra : convert_revision : 54f3c18e5aead727d0ac244ed00fd97d3ca8ad75 --- src/arch/sparc/isa/formats/mem/basicmem.isa | 6 ++++-- src/arch/sparc/isa/formats/mem/blockmem.isa | 6 ++++-- src/arch/sparc/isa/formats/mem/swap.isa | 8 ++++++-- src/arch/sparc/isa/formats/mem/util.isa | 12 +++++++++++- src/arch/sparc/process.cc | 4 ++-- 5 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/arch/sparc/isa/formats/mem/basicmem.isa b/src/arch/sparc/isa/formats/mem/basicmem.isa index 2f62c7bef..aa6c4cdea 100644 --- a/src/arch/sparc/isa/formats/mem/basicmem.isa +++ b/src/arch/sparc/isa/formats/mem/basicmem.isa @@ -57,10 +57,12 @@ let {{ addrCalcImm = 'EA = Rs1 + imm;' iop = InstObjParams(name, Name, 'Mem', {"code": code, "postacc_code" : postacc_code, - "fault_check": faultCode, "ea_code": addrCalcReg}, opt_flags) + "fault_check": faultCode, "ea_code": addrCalcReg, + "EA_trunc": TruncateEA}, opt_flags) iop_imm = InstObjParams(name, Name + "Imm", 'MemImm', {"code": code, "postacc_code" : postacc_code, - "fault_check": faultCode, "ea_code": addrCalcImm}, opt_flags) + "fault_check": faultCode, "ea_code": addrCalcImm, + "EA_trunc": TruncateEA}, opt_flags) header_output = MemDeclare.subst(iop) + MemDeclare.subst(iop_imm) decoder_output = BasicConstructor.subst(iop) + BasicConstructor.subst(iop_imm) decode_block = ROrImmDecode.subst(iop) diff --git a/src/arch/sparc/isa/formats/mem/blockmem.isa b/src/arch/sparc/isa/formats/mem/blockmem.isa index e19016bd0..ea74ef179 100644 --- a/src/arch/sparc/isa/formats/mem/blockmem.isa +++ b/src/arch/sparc/isa/formats/mem/blockmem.isa @@ -298,11 +298,13 @@ let {{ iop = InstObjParams(name, Name, 'BlockMem', {"code": pcedCode, "ea_code": addrCalcReg, "fault_check": faultCode, "micro_pc": microPc, - "set_flags": flag_code}, opt_flags) + "set_flags": flag_code, "EA_trunc" : TruncateEA}, + opt_flags) iop_imm = InstObjParams(name, Name + 'Imm', 'BlockMemImm', {"code": pcedCode, "ea_code": addrCalcImm, "fault_check": faultCode, "micro_pc": microPc, - "set_flags": flag_code}, opt_flags) + "set_flags": flag_code, "EA_trunc" : TruncateEA}, + opt_flags) decoder_output += BlockMemMicroConstructor.subst(iop) decoder_output += BlockMemMicroConstructor.subst(iop_imm) exec_output += doDualSplitExecute( diff --git a/src/arch/sparc/isa/formats/mem/swap.isa b/src/arch/sparc/isa/formats/mem/swap.isa index b71542a2b..3814d1030 100644 --- a/src/arch/sparc/isa/formats/mem/swap.isa +++ b/src/arch/sparc/isa/formats/mem/swap.isa @@ -51,6 +51,7 @@ def template SwapExecute {{ } if(storeCond && fault == NoFault) { + %(EA_trunc)s fault = xc->write((uint%(mem_acc_size)s_t)Mem, EA, %(asi_val)s, &mem_data); } @@ -91,6 +92,7 @@ def template SwapInitiateAcc {{ } if(fault == NoFault) { + %(EA_trunc)s fault = xc->write((uint%(mem_acc_size)s_t)Mem, EA, %(asi_val)s, &mem_data); } @@ -157,12 +159,14 @@ let {{ addrCalcReg = 'EA = Rs1;' iop = InstObjParams(name, Name, 'Mem', {"code": code, "postacc_code" : postacc_code, - "fault_check": faultCode, "ea_code": addrCalcReg}, opt_flags) + "fault_check": faultCode, "ea_code": addrCalcReg, + "EA_trunc" : TruncateEA}, opt_flags) header_output = MemDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) decode_block = BasicDecode.subst(iop) microParams = {"code": code, "postacc_code" : postacc_code, - "ea_code" : addrCalcReg, "fault_check" : faultCode} + "ea_code" : addrCalcReg, "fault_check" : faultCode, + "EA_trunc" : TruncateEA} exec_output = doSplitExecute(execute, name, Name, asi, ["IsStoreConditional"], microParams); return (header_output, decoder_output, exec_output, decode_block) diff --git a/src/arch/sparc/isa/formats/mem/util.isa b/src/arch/sparc/isa/formats/mem/util.isa index dfe937371..38cde9a50 100644 --- a/src/arch/sparc/isa/formats/mem/util.isa +++ b/src/arch/sparc/isa/formats/mem/util.isa @@ -149,6 +149,7 @@ def template LoadExecute {{ %(fault_check)s; if(fault == NoFault) { + %(EA_trunc)s fault = xc->read(EA, (%(mem_acc_type)s%(mem_acc_size)s_t&)Mem, %(asi_val)s); } if(fault == NoFault) @@ -179,6 +180,7 @@ def template LoadInitiateAcc {{ %(fault_check)s; if(fault == NoFault) { + %(EA_trunc)s fault = xc->read(EA, (%(mem_acc_type)s%(mem_acc_size)s_t&)Mem, %(asi_val)s); } return fault; @@ -224,6 +226,7 @@ def template StoreExecute {{ } if(storeCond && fault == NoFault) { + %(EA_trunc)s fault = xc->write((%(mem_acc_type)s%(mem_acc_size)s_t)Mem, EA, %(asi_val)s, 0); } @@ -257,6 +260,7 @@ def template StoreInitiateAcc {{ } if(storeCond && fault == NoFault) { + %(EA_trunc)s fault = xc->write((%(mem_acc_type)s%(mem_acc_size)s_t)Mem, EA, %(asi_val)s, 0); } @@ -317,6 +321,11 @@ let {{ fault = new PrivilegedAction; ''' + TruncateEA = ''' +#if !FULL_SYSTEM + EA = Pstate<3:> ? EA<31:0> : EA; +#endif + ''' }}; //A simple function to generate the name of the macro op of a certain @@ -346,7 +355,8 @@ let {{ (eaRegCode, nameReg, NameReg), (eaImmCode, nameImm, NameImm)): microParams = {"code": code, "postacc_code" : postacc_code, - "ea_code": eaCode, "fault_check": faultCode} + "ea_code": eaCode, "fault_check": faultCode, + "EA_trunc" : TruncateEA} executeCode += doSplitExecute(execute, name, Name, asi, opt_flags, microParams) return executeCode diff --git a/src/arch/sparc/process.cc b/src/arch/sparc/process.cc index e4774ab54..11fa9be28 100644 --- a/src/arch/sparc/process.cc +++ b/src/arch/sparc/process.cc @@ -87,8 +87,8 @@ Sparc32LiveProcess::startup() //From the SPARC ABI - //The process runs in user mode - threadContexts[0]->setMiscReg(MISCREG_PSTATE, 0x02); + //The process runs in user mode with 32 bit addresses + threadContexts[0]->setMiscReg(MISCREG_PSTATE, 0x0a); //Setup default FP state threadContexts[0]->setMiscRegNoEffect(MISCREG_FSR, 0); -- cgit v1.2.3 From c2ac0fd89b9928f653e1485b2432cd71b455d7c5 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 8 May 2007 17:19:33 +0000 Subject: Fix insertBits so it doesn't shift things into oblivion --HG-- extra : convert_revision : 8833b60e3fc94c917fbdb7a99f3d90155907b44e --- src/base/bitfield.hh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/base/bitfield.hh b/src/base/bitfield.hh index 69cce2245..518bad6b8 100644 --- a/src/base/bitfield.hh +++ b/src/base/bitfield.hh @@ -96,8 +96,9 @@ inline T insertBits(T val, int first, int last, B bit_val) { + T t_bit_val = bit_val; T bmask = mask(first - last + 1) << last; - return ((bit_val << last) & bmask) | (val & ~bmask); + return ((t_bit_val << last) & bmask) | (val & ~bmask); } /** -- cgit v1.2.3 From debf04aef1b0f662e981507545cdac956dd22a47 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 12 May 2007 15:11:44 -0700 Subject: Make sure all addresses used in syscalls are truncated to 32 bits. Actually -all- arguements are truncated to 32 bits, but we should be able to get away with it. --HG-- extra : convert_revision : 3b8766c68a4ab36e2e769fac4812657f3f7e0d1c --- src/cpu/o3/sparc/cpu_impl.hh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cpu/o3/sparc/cpu_impl.hh b/src/cpu/o3/sparc/cpu_impl.hh index 50d980f55..2e398577e 100644 --- a/src/cpu/o3/sparc/cpu_impl.hh +++ b/src/cpu/o3/sparc/cpu_impl.hh @@ -272,7 +272,10 @@ SparcO3CPU::getSyscallArg(int i, int tid) { TheISA::IntReg idx = TheISA::flattenIntIndex(this->tcBase(tid), SparcISA::ArgumentReg0 + i); - return this->readArchIntReg(idx, tid); + TheISA::IntReg val = this->readArchIntReg(idx, tid); + if (bits(this->readMiscRegNoEffect(SparcISA::MISCREG_PSTATE, tid), 3, 3)) + val = bits(val, 31, 0); + return val; } template -- cgit v1.2.3 From df7730b6774a730d554bfaa469ad95eeeffd3dc9 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 20 Jun 2007 19:46:45 -0700 Subject: Fix compiler errors. --HG-- extra : convert_revision : 2b10076a24cb36cb748e299011ae691f09c158cd --- src/cpu/base_dyn_inst.hh | 8 ++++---- src/cpu/base_dyn_inst_impl.hh | 6 +++--- src/cpu/o3/fetch_impl.hh | 23 +++++++++++------------ 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 1311e5cf2..a55c1e3c0 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -498,11 +498,11 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isQuiesce() const { return staticInst->isQuiesce(); } bool isIprAccess() const { return staticInst->isIprAccess(); } bool isUnverifiable() const { return staticInst->isUnverifiable(); } - bool isMacroOp() const { return staticInst->isMacroOp(); } - bool isMicroOp() const { return staticInst->isMicroOp(); } + bool isMacroop() const { return staticInst->isMacroop(); } + bool isMicroop() const { return staticInst->isMicroop(); } bool isDelayedCommit() const { return staticInst->isDelayedCommit(); } - bool isLastMicroOp() const { return staticInst->isLastMicroOp(); } - bool isFirstMicroOp() const { return staticInst->isFirstMicroOp(); } + bool isLastMicroop() const { return staticInst->isLastMicroop(); } + bool isFirstMicroop() const { return staticInst->isFirstMicroop(); } bool isMicroBranch() const { return staticInst->isMicroBranch(); } /** Temporarily sets this instruction as a serialize before instruction. */ diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index acf8af9cf..5c18ae694 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -73,7 +73,7 @@ BaseDynInst::BaseDynInst(StaticInstPtr _staticInst, seqNum = seq_num; bool nextIsMicro = - staticInst->isMicroOp() && !staticInst->isLastMicroOp(); + staticInst->isMicroop() && !staticInst->isLastMicroop(); PC = inst_PC; microPC = inst_MicroPC; @@ -101,12 +101,12 @@ BaseDynInst::BaseDynInst(TheISA::ExtMachInst inst, Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC, InstSeqNum seq_num, ImplCPU *cpu) - : staticInst(inst), traceData(NULL), cpu(cpu) + : staticInst(inst, inst_PC), traceData(NULL), cpu(cpu) { seqNum = seq_num; bool nextIsMicro = - staticInst->isMicroOp() && !staticInst->isLastMicroOp(); + staticInst->isMicroop() && !staticInst->isLastMicroop(); PC = inst_PC; microPC = inst_MicroPC; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 0fd1e7bac..857a08629 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -498,7 +498,7 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, bool predict_taken; if (!inst->isControl()) { - if (inst->isMicroOp() && !inst->isLastMicroOp()) { + if (inst->isMicroop() && !inst->isLastMicroop()) { next_MicroPC++; } else { next_PC = next_NPC; @@ -1120,14 +1120,14 @@ DefaultFetch::fetch(bool &status_change) predecoder.moreBytes(fetch_PC, fetch_PC, 0, inst); ext_inst = predecoder.getExtMachInst(); - staticInst = StaticInstPtr(ext_inst); - if (staticInst->isMacroOp()) + staticInst = StaticInstPtr(ext_inst, fetch_PC); + if (staticInst->isMacroop()) macroop = staticInst; } do { if (macroop) { - staticInst = macroop->fetchMicroOp(fetch_MicroPC); - if (staticInst->isLastMicroOp()) + staticInst = macroop->fetchMicroop(fetch_MicroPC); + if (staticInst->isLastMicroop()) macroop = NULL; } @@ -1194,8 +1194,8 @@ DefaultFetch::fetch(bool &status_change) } ++numInst; - } while (staticInst->isMicroOp() && - !staticInst->isLastMicroOp() && + } while (staticInst->isMicroop() && + !staticInst->isLastMicroop() && numInst < fetchWidth); offset += instSize; } @@ -1240,13 +1240,12 @@ DefaultFetch::fetch(bool &status_change) // We will use a nop in order to carry the fault. ext_inst = TheISA::NoopMachInst; - StaticInstPtr staticInst = new StaticInst(ext_inst); // Create a new DynInst from the dummy nop. - DynInstPtr instruction = new DynInst(staticInst, - fetch_PC, fetch_NPC, - next_PC, next_NPC, + DynInstPtr instruction = new DynInst(ext_inst, + fetch_PC, fetch_NPC, fetch_MicroPC, + next_PC, next_NPC, next_MicroPC, inst_seq, cpu); - instruction->setPredTarg(next_PC, next_NPC); + instruction->setPredTarg(next_PC, next_NPC, 1); instruction->setTid(tid); instruction->setASID(tid); -- cgit v1.2.3 From afd00820004984de085a727e60e25742a69d9c51 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 20 Jun 2007 19:48:25 -0700 Subject: long is too long --HG-- extra : convert_revision : 7342cd4a1700a247f30d6f85fc6c2685341ba20e --- tests/long/20.parser/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/long/20.parser/test.py b/tests/long/20.parser/test.py index 82ab71c90..8e745ec26 100644 --- a/tests/long/20.parser/test.py +++ b/tests/long/20.parser/test.py @@ -29,5 +29,5 @@ m5.AddToPath('../configs/common') from cpu2000 import parser -workload = parser(isa, opsys, 'lgred') +workload = parser(isa, opsys, 'mdred') root.system.cpu.workload = workload.makeLiveProcess() -- cgit v1.2.3 From ec24de8b59e174b93b7c42669d71fe61db296688 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 21 Jun 2007 20:35:26 +0000 Subject: Get rid of an unnecessary include file. --HG-- extra : convert_revision : d8d139180917f54006a5a79df4a0f206ddd39fed --- src/arch/x86/isa/includes.isa | 1 - 1 file changed, 1 deletion(-) diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa index 3ef204850..4f27c72f5 100644 --- a/src/arch/x86/isa/includes.isa +++ b/src/arch/x86/isa/includes.isa @@ -103,7 +103,6 @@ output header {{ #include "base/misc.hh" #include "cpu/static_inst.hh" #include "mem/packet.hh" -#include "mem/request.hh" // some constructors use MemReq flags #include "sim/faults.hh" }}; -- cgit v1.2.3 From 70d6044527d6e6dfaf2de6674ae412706b6e131c Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 21 Jun 2007 20:35:27 +0000 Subject: Make symbols for regular registers. --HG-- extra : convert_revision : 28a6df1efe4298877dc2b20179caeb25dfdc4622 --- src/arch/x86/isa/insts/control_transfer/call.py | 4 ++-- src/arch/x86/isa/insts/data_transfer/stack_operations.py | 8 ++++---- src/arch/x86/isa/microasm.isa | 3 +++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/arch/x86/isa/insts/control_transfer/call.py b/src/arch/x86/isa/insts/control_transfer/call.py index 1372f7dba..530162bfd 100644 --- a/src/arch/x86/isa/insts/control_transfer/call.py +++ b/src/arch/x86/isa/insts/control_transfer/call.py @@ -61,8 +61,8 @@ def macroop CALL_I limm t2, imm rdip t1 - subi "INTREG_RSP", "INTREG_RSP", dsz - st t1, ss, [0, t0, "INTREG_RSP"] + subi rsp, rsp, dsz + st t1, ss, [0, t0, rsp] wrip t1, t2 }; ''' diff --git a/src/arch/x86/isa/insts/data_transfer/stack_operations.py b/src/arch/x86/isa/insts/data_transfer/stack_operations.py index ca2443752..585437b8c 100644 --- a/src/arch/x86/isa/insts/data_transfer/stack_operations.py +++ b/src/arch/x86/isa/insts/data_transfer/stack_operations.py @@ -58,16 +58,16 @@ def macroop POP_R { # Make the default data size of pops 64 bits in 64 bit mode .adjust_env oszIn64Override - ld reg, ss, [0, t0, "INTREG_RSP"] - addi "INTREG_RSP", "INTREG_RSP", dsz + ld reg, ss, [0, t0, rsp] + addi rsp, rsp, dsz }; def macroop PUSH_R { # Make the default data size of pops 64 bits in 64 bit mode .adjust_env oszIn64Override - subi "INTREG_RSP", "INTREG_RSP", dsz - st reg, ss, [0, t0, "INTREG_RSP"] + subi rsp, rsp, dsz + st reg, ss, [0, t0, rsp] }; ''' #let {{ diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index 4e06f4391..ee2b92f53 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -91,6 +91,9 @@ let {{ "osz" : "env.operandSize", "ssz" : "env.stackSize" } + + for reg in ('ax', 'bx', 'cx', 'dx', 'sp', 'bp', 'si', 'di'): + assembler.symbols["r%s" % reg] = "INTREG_R%s" % reg.upper() assembler.symbols.update(symbols) # Code literal which forces a default 64 bit operand size in 64 bit mode. -- cgit v1.2.3 From 8e6abaed797d567b4ce009abac63ba19f87efa28 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Fri, 22 Jun 2007 15:06:10 -0400 Subject: Update of reference outputs. SPARC_SE o3 gzip didn't have reference outputs, mcf has a reduced input size, and most of the other changes are for a change in how branch mispredicts work which makes things more accurate. --HG-- extra : convert_revision : 33ad6a220945b344d2fc5c6abef8e67467e0c0ec --- .../00.gzip/ref/sparc/linux/o3-timing/config.ini | 391 +++ .../00.gzip/ref/sparc/linux/o3-timing/config.out | 366 +++ .../00.gzip/ref/sparc/linux/o3-timing/m5stats.txt | 423 +++ .../long/00.gzip/ref/sparc/linux/o3-timing/stderr | 6 + .../long/00.gzip/ref/sparc/linux/o3-timing/stdout | 44 + .../ref/sparc/linux/simple-atomic/config.ini | 4 +- .../ref/sparc/linux/simple-atomic/config.out | 2 +- .../ref/sparc/linux/simple-atomic/m5stats.txt | 22 +- .../10.mcf/ref/sparc/linux/simple-atomic/mcf.out | 3165 ++++---------------- .../10.mcf/ref/sparc/linux/simple-atomic/stdout | 28 +- .../ref/sparc/linux/simple-timing/config.ini | 13 +- .../ref/sparc/linux/simple-timing/config.out | 8 +- .../ref/sparc/linux/simple-timing/m5stats.txt | 288 +- .../10.mcf/ref/sparc/linux/simple-timing/mcf.out | 3165 ++++---------------- .../10.mcf/ref/sparc/linux/simple-timing/stdout | 28 +- .../00.hello/ref/alpha/linux/o3-timing/config.ini | 13 + .../00.hello/ref/alpha/linux/o3-timing/m5stats.txt | 343 +-- .../00.hello/ref/alpha/linux/o3-timing/stdout | 10 +- .../00.hello/ref/alpha/tru64/o3-timing/config.ini | 13 + .../00.hello/ref/alpha/tru64/o3-timing/m5stats.txt | 161 +- .../00.hello/ref/alpha/tru64/o3-timing/stdout | 10 +- .../ref/alpha/linux/o3-timing/config.ini | 13 + .../ref/alpha/linux/o3-timing/m5stats.txt | 687 ++--- .../ref/alpha/linux/o3-timing/stdout | 10 +- .../ref/sparc/linux/o3-timing/config.ini | 21 +- .../ref/sparc/linux/o3-timing/config.out | 6 +- .../ref/sparc/linux/o3-timing/m5stats.txt | 409 ++- .../02.insttest/ref/sparc/linux/o3-timing/stdout | 6 +- 28 files changed, 3372 insertions(+), 6283 deletions(-) create mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini create mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out create mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt create mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/stderr create mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini new file mode 100644 index 000000000..585239418 --- /dev/null +++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini @@ -0,0 +1,391 @@ +[root] +type=Root +children=system +dummy=0 + +[system] +type=System +children=cpu membus physmem +mem_mode=atomic +physmem=system.physmem + +[system.cpu] +type=DerivO3CPU +children=dcache fuPool icache l2cache toL2Bus workload +BTBEntries=4096 +BTBTagSize=16 +LFSTSize=1024 +LQEntries=32 +RASSize=16 +SQEntries=32 +SSITSize=1024 +activity=0 +backComSize=5 +cachePorts=200 +choiceCtrBits=2 +choicePredictorSize=8192 +clock=500 +commitToDecodeDelay=1 +commitToFetchDelay=1 +commitToIEWDelay=1 +commitToRenameDelay=1 +commitWidth=8 +cpu_id=0 +decodeToFetchDelay=1 +decodeToRenameDelay=1 +decodeWidth=8 +defer_registration=false +dispatchWidth=8 +fetchToDecodeDelay=1 +fetchTrapLatency=1 +fetchWidth=8 +forwardComSize=5 +fuPool=system.cpu.fuPool +function_trace=false +function_trace_start=0 +globalCtrBits=2 +globalHistoryBits=13 +globalPredictorSize=8192 +iewToCommitDelay=1 +iewToDecodeDelay=1 +iewToFetchDelay=1 +iewToRenameDelay=1 +instShiftAmt=2 +issueToExecuteDelay=1 +issueWidth=8 +localCtrBits=2 +localHistoryBits=11 +localHistoryTableSize=2048 +localPredictorSize=2048 +max_insts_all_threads=0 +max_insts_any_thread=0 +max_loads_all_threads=0 +max_loads_any_thread=0 +numIQEntries=64 +numPhysFloatRegs=256 +numPhysIntRegs=256 +numROBEntries=192 +numRobs=1 +numThreads=1 +phase=0 +predType=tournament +progress_interval=0 +renameToDecodeDelay=1 +renameToFetchDelay=1 +renameToIEWDelay=2 +renameToROBDelay=1 +renameWidth=8 +smtCommitPolicy=RoundRobin +smtFetchPolicy=SingleThread +smtIQPolicy=Partitioned +smtIQThreshold=100 +smtLSQPolicy=Partitioned +smtLSQThreshold=100 +smtNumFetchingThreads=1 +smtROBPolicy=Partitioned +smtROBThreshold=100 +squashWidth=8 +system=system +trapLatency=13 +wbDepth=1 +wbWidth=8 +workload=system.cpu.workload +dcache_port=system.cpu.dcache.cpu_side +icache_port=system.cpu.icache.cpu_side + +[system.cpu.dcache] +type=BaseCache +adaptive_compression=false +addr_range=0:18446744073709551615 +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +latency=1000 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10000 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=262144 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=20 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.dcache_port +mem_side=system.cpu.toL2Bus.port[1] + +[system.cpu.fuPool] +type=FUPool +children=FUList0 FUList1 FUList2 FUList3 FUList4 FUList5 FUList6 FUList7 +FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 + +[system.cpu.fuPool.FUList0] +type=FUDesc +children=opList0 +count=6 +opList=system.cpu.fuPool.FUList0.opList0 + +[system.cpu.fuPool.FUList0.opList0] +type=OpDesc +issueLat=1 +opClass=IntAlu +opLat=1 + +[system.cpu.fuPool.FUList1] +type=FUDesc +children=opList0 opList1 +count=2 +opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 + +[system.cpu.fuPool.FUList1.opList0] +type=OpDesc +issueLat=1 +opClass=IntMult +opLat=3 + +[system.cpu.fuPool.FUList1.opList1] +type=OpDesc +issueLat=19 +opClass=IntDiv +opLat=20 + +[system.cpu.fuPool.FUList2] +type=FUDesc +children=opList0 opList1 opList2 +count=4 +opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 + +[system.cpu.fuPool.FUList2.opList0] +type=OpDesc +issueLat=1 +opClass=FloatAdd +opLat=2 + +[system.cpu.fuPool.FUList2.opList1] +type=OpDesc +issueLat=1 +opClass=FloatCmp +opLat=2 + +[system.cpu.fuPool.FUList2.opList2] +type=OpDesc +issueLat=1 +opClass=FloatCvt +opLat=2 + +[system.cpu.fuPool.FUList3] +type=FUDesc +children=opList0 opList1 opList2 +count=2 +opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 + +[system.cpu.fuPool.FUList3.opList0] +type=OpDesc +issueLat=1 +opClass=FloatMult +opLat=4 + +[system.cpu.fuPool.FUList3.opList1] +type=OpDesc +issueLat=12 +opClass=FloatDiv +opLat=12 + +[system.cpu.fuPool.FUList3.opList2] +type=OpDesc +issueLat=24 +opClass=FloatSqrt +opLat=24 + +[system.cpu.fuPool.FUList4] +type=FUDesc +children=opList0 +count=0 +opList=system.cpu.fuPool.FUList4.opList0 + +[system.cpu.fuPool.FUList4.opList0] +type=OpDesc +issueLat=1 +opClass=MemRead +opLat=1 + +[system.cpu.fuPool.FUList5] +type=FUDesc +children=opList0 +count=0 +opList=system.cpu.fuPool.FUList5.opList0 + +[system.cpu.fuPool.FUList5.opList0] +type=OpDesc +issueLat=1 +opClass=MemWrite +opLat=1 + +[system.cpu.fuPool.FUList6] +type=FUDesc +children=opList0 opList1 +count=4 +opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 + +[system.cpu.fuPool.FUList6.opList0] +type=OpDesc +issueLat=1 +opClass=MemRead +opLat=1 + +[system.cpu.fuPool.FUList6.opList1] +type=OpDesc +issueLat=1 +opClass=MemWrite +opLat=1 + +[system.cpu.fuPool.FUList7] +type=FUDesc +children=opList0 +count=1 +opList=system.cpu.fuPool.FUList7.opList0 + +[system.cpu.fuPool.FUList7.opList0] +type=OpDesc +issueLat=3 +opClass=IprAccess +opLat=3 + +[system.cpu.icache] +type=BaseCache +adaptive_compression=false +addr_range=0:18446744073709551615 +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +latency=1000 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10000 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=131072 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=20 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.icache_port +mem_side=system.cpu.toL2Bus.port[0] + +[system.cpu.l2cache] +type=BaseCache +adaptive_compression=false +addr_range=0:18446744073709551615 +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +latency=1000 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10000 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=2097152 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.toL2Bus.port[2] +mem_side=system.membus.port[1] + +[system.cpu.toL2Bus] +type=Bus +block_size=64 +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side + +[system.cpu.workload] +type=LiveProcess +cmd=gzip input.log 1 +cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing +egid=100 +env= +euid=100 +executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip +gid=100 +input=cin +output=cout +pid=100 +ppid=99 +system=system +uid=100 + +[system.membus] +type=Bus +block_size=64 +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.physmem.port[0] system.cpu.l2cache.mem_side + +[system.physmem] +type=PhysicalMemory +file= +latency=1 +range=0:134217727 +zero=false +port=system.membus.port[0] + diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out new file mode 100644 index 000000000..b8a2728b3 --- /dev/null +++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out @@ -0,0 +1,366 @@ +[root] +type=Root +dummy=0 + +[system.physmem] +type=PhysicalMemory +file= +range=[0,134217727] +latency=1 +zero=false + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false +block_size=64 + +[system.cpu.workload] +type=LiveProcess +cmd=gzip input.log 1 +executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip +input=cin +output=cout +env= +cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing +system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 + +[system.cpu.fuPool.FUList0.opList0] +type=OpDesc +opClass=IntAlu +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList0] +type=FUDesc +opList=system.cpu.fuPool.FUList0.opList0 +count=6 + +[system.cpu.fuPool.FUList1.opList0] +type=OpDesc +opClass=IntMult +opLat=3 +issueLat=1 + +[system.cpu.fuPool.FUList1.opList1] +type=OpDesc +opClass=IntDiv +opLat=20 +issueLat=19 + +[system.cpu.fuPool.FUList1] +type=FUDesc +opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 +count=2 + +[system.cpu.fuPool.FUList2.opList0] +type=OpDesc +opClass=FloatAdd +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2.opList1] +type=OpDesc +opClass=FloatCmp +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2.opList2] +type=OpDesc +opClass=FloatCvt +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2] +type=FUDesc +opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 +count=4 + +[system.cpu.fuPool.FUList3.opList0] +type=OpDesc +opClass=FloatMult +opLat=4 +issueLat=1 + +[system.cpu.fuPool.FUList3.opList1] +type=OpDesc +opClass=FloatDiv +opLat=12 +issueLat=12 + +[system.cpu.fuPool.FUList3.opList2] +type=OpDesc +opClass=FloatSqrt +opLat=24 +issueLat=24 + +[system.cpu.fuPool.FUList3] +type=FUDesc +opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 +count=2 + +[system.cpu.fuPool.FUList4.opList0] +type=OpDesc +opClass=MemRead +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList4] +type=FUDesc +opList=system.cpu.fuPool.FUList4.opList0 +count=0 + +[system.cpu.fuPool.FUList5.opList0] +type=OpDesc +opClass=MemWrite +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList5] +type=FUDesc +opList=system.cpu.fuPool.FUList5.opList0 +count=0 + +[system.cpu.fuPool.FUList6.opList0] +type=OpDesc +opClass=MemRead +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList6.opList1] +type=OpDesc +opClass=MemWrite +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList6] +type=FUDesc +opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 +count=4 + +[system.cpu.fuPool.FUList7.opList0] +type=OpDesc +opClass=IprAccess +opLat=3 +issueLat=3 + +[system.cpu.fuPool.FUList7] +type=FUDesc +opList=system.cpu.fuPool.FUList7.opList0 +count=1 + +[system.cpu.fuPool] +type=FUPool +FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 + +[system.cpu] +type=DerivO3CPU +clock=500 +phase=0 +numThreads=1 +cpu_id=0 +activity=0 +workload=system.cpu.workload +checker=null +max_insts_any_thread=0 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +progress_interval=0 +cachePorts=200 +decodeToFetchDelay=1 +renameToFetchDelay=1 +iewToFetchDelay=1 +commitToFetchDelay=1 +fetchWidth=8 +renameToDecodeDelay=1 +iewToDecodeDelay=1 +commitToDecodeDelay=1 +fetchToDecodeDelay=1 +decodeWidth=8 +iewToRenameDelay=1 +commitToRenameDelay=1 +decodeToRenameDelay=1 +renameWidth=8 +commitToIEWDelay=1 +renameToIEWDelay=2 +issueToExecuteDelay=1 +dispatchWidth=8 +issueWidth=8 +wbWidth=8 +wbDepth=1 +fuPool=system.cpu.fuPool +iewToCommitDelay=1 +renameToROBDelay=1 +commitWidth=8 +squashWidth=8 +trapLatency=13 +backComSize=5 +forwardComSize=5 +predType=tournament +localPredictorSize=2048 +localCtrBits=2 +localHistoryTableSize=2048 +localHistoryBits=11 +globalPredictorSize=8192 +globalCtrBits=2 +globalHistoryBits=13 +choicePredictorSize=8192 +choiceCtrBits=2 +BTBEntries=4096 +BTBTagSize=16 +RASSize=16 +LQEntries=32 +SQEntries=32 +LFSTSize=1024 +SSITSize=1024 +numPhysIntRegs=256 +numPhysFloatRegs=256 +numIQEntries=64 +numROBEntries=192 +smtNumFetchingThreads=1 +smtFetchPolicy=SingleThread +smtLSQPolicy=Partitioned +smtLSQThreshold=100 +smtIQPolicy=Partitioned +smtIQThreshold=100 +smtROBPolicy=Partitioned +smtROBThreshold=100 +smtCommitPolicy=RoundRobin +instShiftAmt=2 +defer_registration=false +function_trace=false +function_trace_start=0 + +[system.cpu.icache] +type=BaseCache +size=131072 +assoc=2 +block_size=64 +latency=1000 +mshrs=10 +tgts_per_mshr=20 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10000 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false + +[system.cpu.dcache] +type=BaseCache +size=262144 +assoc=2 +block_size=64 +latency=1000 +mshrs=10 +tgts_per_mshr=20 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10000 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false + +[system.cpu.l2cache] +type=BaseCache +size=2097152 +assoc=2 +block_size=64 +latency=1000 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10000 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false +block_size=64 + diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt b/tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt new file mode 100644 index 000000000..929354b82 --- /dev/null +++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt @@ -0,0 +1,423 @@ + +---------- Begin Simulation Statistics ---------- +global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. +global.BPredUnit.BTBHits 155497873 # Number of BTB hits +global.BPredUnit.BTBLookups 176569029 # Number of BTB lookups +global.BPredUnit.RASInCorrect 0 # Number of incorrect RAS predictions. +global.BPredUnit.condIncorrect 90327270 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 223339092 # Number of conditional branches predicted +global.BPredUnit.lookups 223339092 # Number of BP lookups +global.BPredUnit.usedRAS 0 # Number of times the RAS was used to get a target. +host_inst_rate 54106 # Simulator instruction rate (inst/s) +host_mem_usage 156124 # Number of bytes of host memory used +host_seconds 27529.37 # Real time elapsed on the host +host_tick_rate 45674334 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 464625781 # Number of conflicting loads. +memdepunit.memDep.conflictingStores 155659586 # Number of conflicting stores. +memdepunit.memDep.insertedLoads 751805606 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 305482201 # Number of stores inserted to the mem dependence unit. +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 1489514762 # Number of instructions simulated +sim_seconds 1.257386 # Number of seconds simulated +sim_ticks 1257385552000 # Number of ticks simulated +system.cpu.commit.COM:branches 86246390 # Number of branches committed +system.cpu.commit.COM:bw_lim_events 9313657 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits +system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle.samples 2273477268 +system.cpu.commit.COM:committed_per_cycle.min_value 0 + 0 1413600532 6217.79% + 1 557883273 2453.88% + 2 123364539 542.62% + 3 120963543 532.06% + 4 18884040 83.06% + 5 12171132 53.54% + 6 9965158 43.83% + 7 7331394 32.25% + 8 9313657 40.97% +system.cpu.commit.COM:committed_per_cycle.max_value 8 +system.cpu.commit.COM:committed_per_cycle.end_dist + +system.cpu.commit.COM:count 1489514762 # Number of instructions committed +system.cpu.commit.COM:loads 402511689 # Number of loads committed +system.cpu.commit.COM:membars 51356 # Number of memory barriers committed +system.cpu.commit.COM:refs 569359657 # Number of memory references committed +system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed +system.cpu.commit.branchMispredicts 90327270 # The number of times a branch was mispredicted +system.cpu.commit.commitCommittedInsts 1489514762 # The number of committed instructions +system.cpu.commit.commitNonSpecStalls 2243499 # The number of times commit has been forced to stall to communicate backwards +system.cpu.commit.commitSquashedInsts 1399513618 # The number of squashed insts skipped by commit +system.cpu.committedInsts 1489514762 # Number of Instructions Simulated +system.cpu.committedInsts_total 1489514762 # Number of Instructions Simulated +system.cpu.cpi 1.688316 # CPI: Cycles Per Instruction +system.cpu.cpi_total 1.688316 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 431095835 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 2842.252413 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2392.500580 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 430168385 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 2636047000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.002151 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 927450 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 694672 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 556921500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.000540 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 232778 # number of ReadReq MSHR misses +system.cpu.dcache.SwapReq_accesses 1326 # number of SwapReq accesses(hits+misses) +system.cpu.dcache.SwapReq_avg_miss_latency 3500 # average SwapReq miss latency +system.cpu.dcache.SwapReq_avg_mshr_miss_latency 2500 # average SwapReq mshr miss latency +system.cpu.dcache.SwapReq_hits 1319 # number of SwapReq hits +system.cpu.dcache.SwapReq_miss_latency 24500 # number of SwapReq miss cycles +system.cpu.dcache.SwapReq_miss_rate 0.005279 # miss rate for SwapReq accesses +system.cpu.dcache.SwapReq_misses 7 # number of SwapReq misses +system.cpu.dcache.SwapReq_mshr_miss_latency 17500 # number of SwapReq MSHR miss cycles +system.cpu.dcache.SwapReq_mshr_miss_rate 0.005279 # mshr miss rate for SwapReq accesses +system.cpu.dcache.SwapReq_mshr_misses 7 # number of SwapReq MSHR misses +system.cpu.dcache.WriteReq_accesses 166846642 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 3889.592412 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 3171.120393 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 165155866 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 6576429500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.010134 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 1690776 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_hits 1420478 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_miss_latency 857147500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.001620 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 270298 # number of WriteReq MSHR misses +system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked +system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked +system.cpu.dcache.avg_refs 1183.354576 # Average number of references to valid blocks. +system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.cache_copies 0 # number of cache copies performed +system.cpu.dcache.demand_accesses 597942477 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 3518.594842 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 2810.845677 # average overall mshr miss latency +system.cpu.dcache.demand_hits 595324251 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 9212476500 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.004379 # miss rate for demand accesses +system.cpu.dcache.demand_misses 2618226 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 2115150 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 1414069000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.000841 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 503076 # number of demand (read+write) MSHR misses +system.cpu.dcache.fast_writes 0 # number of fast writes performed +system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.dcache.overall_accesses 597942477 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 3518.594842 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 2810.845677 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency +system.cpu.dcache.overall_hits 595324251 # number of overall hits +system.cpu.dcache.overall_miss_latency 9212476500 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.004379 # miss rate for overall accesses +system.cpu.dcache.overall_misses 2618226 # number of overall misses +system.cpu.dcache.overall_mshr_hits 2115150 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 1414069000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.000841 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 503076 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.dcache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.dcache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.dcache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.dcache.replacements 498987 # number of replacements +system.cpu.dcache.sampled_refs 503083 # Sample count of references to valid blocks. +system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.dcache.tagsinuse 4095.797134 # Cycle average of tags in use +system.cpu.dcache.total_refs 595325570 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 77974000 # Cycle when the warmup percentage was hit. +system.cpu.dcache.writebacks 335737 # number of writebacks +system.cpu.decode.DECODE:BlockedCycles 435745843 # Number of cycles decode is blocked +system.cpu.decode.DECODE:DecodedInsts 3276032607 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 1073744654 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 761619600 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 241293837 # Number of cycles decode is squashing +system.cpu.decode.DECODE:UnblockCycles 2367171 # Number of cycles decode is unblocking +system.cpu.fetch.Branches 223339092 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 355860305 # Number of cache lines fetched +system.cpu.fetch.Cycles 1166695920 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 14770227 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 3591774268 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 93734364 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.088811 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 355860305 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 155497873 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 1.428271 # Number of inst fetches per cycle +system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist.samples 2514771105 +system.cpu.fetch.rateDist.min_value 0 + 0 1703935491 6775.71% + 1 252157679 1002.71% + 2 75632424 300.75% + 3 38096592 151.49% + 4 76680653 304.92% + 5 30840750 122.64% + 6 33076966 131.53% + 7 20130593 80.05% + 8 284219957 1130.20% +system.cpu.fetch.rateDist.max_value 8 +system.cpu.fetch.rateDist.end_dist + +system.cpu.icache.ReadReq_accesses 355860305 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 5111.111111 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 4198.640483 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 355858946 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 6946000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.000004 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 1359 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 35 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 5559000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.000004 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 1324 # number of ReadReq MSHR misses +system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked +system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked +system.cpu.icache.avg_refs 268775.638973 # Average number of references to valid blocks. +system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.icache.cache_copies 0 # number of cache copies performed +system.cpu.icache.demand_accesses 355860305 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 5111.111111 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 4198.640483 # average overall mshr miss latency +system.cpu.icache.demand_hits 355858946 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 6946000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.000004 # miss rate for demand accesses +system.cpu.icache.demand_misses 1359 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 35 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 5559000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.000004 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 1324 # number of demand (read+write) MSHR misses +system.cpu.icache.fast_writes 0 # number of fast writes performed +system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.icache.overall_accesses 355860305 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 5111.111111 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 4198.640483 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency +system.cpu.icache.overall_hits 355858946 # number of overall hits +system.cpu.icache.overall_miss_latency 6946000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.000004 # miss rate for overall accesses +system.cpu.icache.overall_misses 1359 # number of overall misses +system.cpu.icache.overall_mshr_hits 35 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 5559000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.000004 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 1324 # number of overall MSHR misses +system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.icache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.icache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.icache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.icache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.icache.replacements 198 # number of replacements +system.cpu.icache.sampled_refs 1324 # Sample count of references to valid blocks. +system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.icache.tagsinuse 1026.431065 # Cycle average of tags in use +system.cpu.icache.total_refs 355858946 # Total number of references to valid blocks. +system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.icache.writebacks 0 # number of writebacks +system.cpu.idleCycles 1497 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 128998684 # Number of branches executed +system.cpu.iew.EXEC:nop 0 # number of nop insts executed +system.cpu.iew.EXEC:rate 0.879999 # Inst execution rate +system.cpu.iew.EXEC:refs 756340485 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 208683785 # Number of stores executed +system.cpu.iew.EXEC:swp 0 # number of swp insts executed +system.cpu.iew.WB:consumers 1511846593 # num instructions consuming a value +system.cpu.iew.WB:count 2184193190 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.964010 # average fanout of values written-back +system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ +system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ +system.cpu.iew.WB:producers 1457435157 # num instructions producing a value +system.cpu.iew.WB:rate 0.868546 # insts written-back per cycle +system.cpu.iew.WB:sent 2194556483 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 93921260 # Number of branch mispredicts detected at execute +system.cpu.iew.iewBlockCycles 242324 # Number of cycles IEW is blocking +system.cpu.iew.iewDispLoadInsts 751805606 # Number of dispatched load instructions +system.cpu.iew.iewDispNonSpecInsts 21112863 # Number of dispatched non-speculative instructions +system.cpu.iew.iewDispSquashedInsts 6967923 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 305482201 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 2889028359 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 547656700 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 155922171 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 2212995141 # Number of executed instructions +system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall +system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle +system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall +system.cpu.iew.iewSquashCycles 241293837 # Number of cycles IEW is squashing +system.cpu.iew.iewUnblockCycles 1173 # Number of cycles IEW is unblocking +system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding +system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked +system.cpu.iew.lsq.thread.0.forwLoads 116560202 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.0.ignoredResponses 586068 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address +system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address +system.cpu.iew.lsq.thread.0.memOrderViolation 3827981 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.rescheduledLoads 59 # Number of loads that were rescheduled +system.cpu.iew.lsq.thread.0.squashedLoads 349293917 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 138634233 # Number of stores squashed +system.cpu.iew.memOrderViolationEvents 3827981 # Number of memory order violations +system.cpu.iew.predictedNotTakenIncorrect 1127857 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedTakenIncorrect 92793403 # Number of branches that were predicted taken incorrectly +system.cpu.ipc 0.592306 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.592306 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 2368917312 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0.start_dist + No_OpClass 351375247 14.83% # Type of FU issued + IntAlu 1188705257 50.18% # Type of FU issued + IntMult 0 0.00% # Type of FU issued + IntDiv 0 0.00% # Type of FU issued + FloatAdd 2951238 0.12% # Type of FU issued + FloatCmp 0 0.00% # Type of FU issued + FloatCvt 0 0.00% # Type of FU issued + FloatMult 0 0.00% # Type of FU issued + FloatDiv 0 0.00% # Type of FU issued + FloatSqrt 0 0.00% # Type of FU issued + MemRead 592531661 25.01% # Type of FU issued + MemWrite 233353909 9.85% # Type of FU issued + IprAccess 0 0.00% # Type of FU issued + InstPrefetch 0 0.00% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0.end_dist +system.cpu.iq.ISSUE:fu_busy_cnt 6622922 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.002796 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_full.start_dist + No_OpClass 0 0.00% # attempts to use FU when none available + IntAlu 3150287 47.57% # attempts to use FU when none available + IntMult 0 0.00% # attempts to use FU when none available + IntDiv 0 0.00% # attempts to use FU when none available + FloatAdd 202242 3.05% # attempts to use FU when none available + FloatCmp 0 0.00% # attempts to use FU when none available + FloatCvt 0 0.00% # attempts to use FU when none available + FloatMult 0 0.00% # attempts to use FU when none available + FloatDiv 0 0.00% # attempts to use FU when none available + FloatSqrt 0 0.00% # attempts to use FU when none available + MemRead 2975364 44.93% # attempts to use FU when none available + MemWrite 295029 4.45% # attempts to use FU when none available + IprAccess 0 0.00% # attempts to use FU when none available + InstPrefetch 0 0.00% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full.end_dist +system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle.samples 2514771105 +system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 + 0 1264571415 5028.57% + 1 618163663 2458.13% + 2 318214573 1265.38% + 3 195947630 779.19% + 4 78232851 311.09% + 5 28085074 111.68% + 6 8167595 32.48% + 7 2987163 11.88% + 8 401141 1.60% +system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 +system.cpu.iq.ISSUE:issued_per_cycle.end_dist + +system.cpu.iq.ISSUE:rate 0.942001 # Inst issue rate +system.cpu.iq.iqInstsAdded 2867645475 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 2368917312 # Number of instructions issued +system.cpu.iq.iqNonSpecInstsAdded 21382884 # Number of non-speculative instructions added to the IQ +system.cpu.iq.iqSquashedInstsExamined 1368214032 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 461256 # Number of squashed instructions issued +system.cpu.iq.iqSquashedNonSpecRemoved 19139385 # Number of squashed non-spec instructions that were removed +system.cpu.iq.iqSquashedOperandsExamined 1296493196 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.l2cache.ReadReq_accesses 504406 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 4393.799833 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2267.430007 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_hits 476939 # number of ReadReq hits +system.cpu.l2cache.ReadReq_miss_latency 120684500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.054454 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 27467 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 62279500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.054454 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 27467 # number of ReadReq MSHR misses +system.cpu.l2cache.Writeback_accesses 335737 # number of Writeback accesses(hits+misses) +system.cpu.l2cache.Writeback_hits 335720 # number of Writeback hits +system.cpu.l2cache.Writeback_miss_rate 0.000051 # miss rate for Writeback accesses +system.cpu.l2cache.Writeback_misses 17 # number of Writeback misses +system.cpu.l2cache.Writeback_mshr_miss_rate 0.000051 # mshr miss rate for Writeback accesses +system.cpu.l2cache.Writeback_mshr_misses 17 # number of Writeback MSHR misses +system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked +system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked +system.cpu.l2cache.avg_refs 29.586740 # Average number of references to valid blocks. +system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.cache_copies 0 # number of cache copies performed +system.cpu.l2cache.demand_accesses 504406 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 4393.799833 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 2267.430007 # average overall mshr miss latency +system.cpu.l2cache.demand_hits 476939 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 120684500 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.054454 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 27467 # number of demand (read+write) misses +system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.l2cache.demand_mshr_miss_latency 62279500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.054454 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 27467 # number of demand (read+write) MSHR misses +system.cpu.l2cache.fast_writes 0 # number of fast writes performed +system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.l2cache.overall_accesses 840143 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 4391.082084 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 2267.430007 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency +system.cpu.l2cache.overall_hits 812659 # number of overall hits +system.cpu.l2cache.overall_miss_latency 120684500 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.032713 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 27484 # number of overall misses +system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.l2cache.overall_mshr_miss_latency 62279500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.032693 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 27467 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.l2cache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.l2cache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.l2cache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.l2cache.replacements 2692 # number of replacements +system.cpu.l2cache.sampled_refs 27467 # Sample count of references to valid blocks. +system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.l2cache.tagsinuse 24466.224839 # Cycle average of tags in use +system.cpu.l2cache.total_refs 812659 # Total number of references to valid blocks. +system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.l2cache.writebacks 2555 # number of writebacks +system.cpu.numCycles 2514771105 # number of cpu cycles simulated +system.cpu.rename.RENAME:BlockCycles 14153952 # Number of cycles rename is blocking +system.cpu.rename.RENAME:CommittedMaps 1244762263 # Number of HB maps that are committed +system.cpu.rename.RENAME:IQFullEvents 845 # Number of times rename has blocked due to IQ full +system.cpu.rename.RENAME:IdleCycles 1122858502 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 18964355 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:RenameLookups 4974059876 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 3105364972 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 2435580679 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 713636177 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 241293837 # Number of cycles rename is squashing +system.cpu.rename.RENAME:UnblockCycles 24303898 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 1190818416 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 398524739 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:serializingInsts 21495577 # count of serializing insts renamed +system.cpu.rename.RENAME:skidInsts 149561373 # count of insts added to the skid buffer +system.cpu.rename.RENAME:tempSerializingInsts 21338548 # count of temporary serializing insts renamed +system.cpu.timesIdled 3 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.workload.PROG:num_syscalls 19 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/stderr b/tests/long/00.gzip/ref/sparc/linux/o3-timing/stderr new file mode 100644 index 000000000..6fe2fe04f --- /dev/null +++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/stderr @@ -0,0 +1,6 @@ +warn: More than two loadable segments in ELF object. +warn: Ignoring segment @ 0xb4000 length 0x10. +warn: More than two loadable segments in ELF object. +warn: Ignoring segment @ 0x0 length 0x0. +warn: Entering event queue @ 0. Starting simulation... +warn: Ignoring request to flush register windows. diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout b/tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout new file mode 100644 index 000000000..c0d965c7b --- /dev/null +++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout @@ -0,0 +1,44 @@ +spec_init +Loading Input Data +Duplicating 262144 bytes +Duplicating 524288 bytes +Input data 1048576 bytes in length +Compressing Input Data, level 1 +Compressed data 108074 bytes in length +Uncompressing Data +Uncompressed data 1048576 bytes in length +Uncompressed data compared correctly +Compressing Input Data, level 3 +Compressed data 97831 bytes in length +Uncompressing Data +Uncompressed data 1048576 bytes in length +Uncompressed data compared correctly +Compressing Input Data, level 5 +Compressed data 83382 bytes in length +Uncompressing Data +Uncompressed data 1048576 bytes in length +Uncompressed data compared correctly +Compressing Input Data, level 7 +Compressed data 76606 bytes in length +Uncompressing Data +Uncompressed data 1048576 bytes in length +Uncompressed data compared correctly +Compressing Input Data, level 9 +Compressed data 73189 bytes in length +Uncompressing Data +Uncompressed data 1048576 bytes in length +Uncompressed data compared correctly +Tested 1MB buffer: OK! +M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Jun 21 2007 21:15:48 +M5 started Fri Jun 22 01:01:27 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing tests/run.py long/00.gzip/sparc/linux/o3-timing +Global frequency set at 1000000000000 ticks per second +Exiting @ tick 1257385552000 because target called exit() diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini index 9b8d69888..9cdc13914 100644 --- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini +++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini @@ -39,7 +39,7 @@ env= euid=100 executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf gid=100 -input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in +input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in output=cout pid=100 ppid=99 @@ -53,7 +53,7 @@ bus_id=0 clock=1000 responder_set=false width=64 -port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port +port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port [system.physmem] type=PhysicalMemory diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out index 8a5c9fd62..b84a9d780 100644 --- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out +++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out @@ -26,7 +26,7 @@ block_size=64 type=LiveProcess cmd=mcf mcf.in executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf -input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in +input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in output=cout env= cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt index 530572b5d..ed8482fb4 100644 --- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt +++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt @@ -1,18 +1,18 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 686638 # Simulator instruction rate (inst/s) -host_mem_usage 149820 # Number of bytes of host memory used -host_seconds 2504.37 # Real time elapsed on the host -host_tick_rate 343319148 # Simulator tick rate (ticks/s) +host_inst_rate 1151751 # Simulator instruction rate (inst/s) +host_mem_usage 150484 # Number of bytes of host memory used +host_seconds 211.71 # Real time elapsed on the host +host_tick_rate 575874246 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 1719594534 # Number of instructions simulated -sim_seconds 0.859797 # Number of seconds simulated -sim_ticks 859797266500 # Number of ticks simulated +sim_insts 243840172 # Number of instructions simulated +sim_seconds 0.121920 # Number of seconds simulated +sim_ticks 121920085500 # Number of ticks simulated system.cpu.idle_fraction 0 # Percentage of idle cycles system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 1719594534 # number of cpu cycles simulated -system.cpu.num_insts 1719594534 # Number of instructions executed -system.cpu.num_refs 774793634 # Number of memory references -system.cpu.workload.PROG:num_syscalls 632 # Number of system calls +system.cpu.numCycles 243840172 # number of cpu cycles simulated +system.cpu.num_insts 243840172 # Number of instructions executed +system.cpu.num_refs 105125191 # Number of memory references +system.cpu.workload.PROG:num_syscalls 428 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/mcf.out b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/mcf.out index 6bbb02cf0..095132477 100644 --- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/mcf.out +++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/mcf.out @@ -1,3092 +1,999 @@ () -1642 -*** -1759 -() -1641 -*** -1691 +500 () -1640 +499 () -1639 +498 () -1638 +496 () -1637 +495 () -1636 +494 () -1635 +493 () -1634 +492 () -1633 +491 () -1632 +490 () -1631 +489 () -1630 +488 () -1629 +487 () -1628 +486 () -1627 +484 () -1626 +482 () -1625 -*** -1784 +481 () -1624 +480 () -1623 +479 () -1622 -*** -1688 +478 () -1621 +477 () -1618 +476 () -1617 -*** -1796 +475 () -1616 +474 () -1615 -*** -1668 +473 () -1614 +472 () -1613 +471 () -1612 -*** -1700 +469 () -1611 +468 () -1610 +467 () -1608 +466 () -1606 +465 () -1605 +464 () -1604 +463 () -1603 +462 () -1602 +461 () -1601 +460 () -1599 +459 () -1598 -*** -1714 +458 () -1597 +457 () -1595 +455 () -1591 +454 () -1590 -*** -1773 +452 () -1589 +451 () -1588 +450 () -1587 -*** -1710 +449 () -1586 +448 () -1585 +446 () -1584 -*** -1748 +445 () -1583 -*** -1648 +444 () -1582 +443 () -1581 -*** -1757 +442 () -1579 +440 () -1578 -*** -1726 +439 () -1575 -*** -1763 +438 () -1574 +436 () -1573 +435 () -1572 +433 () -1571 +432 () -1568 +431 () -1567 +428 () -1565 -*** -1643 +427 () -1564 +425 () -1563 +424 () -1562 +423 () -1559 +420 () -1557 +419 () -1556 +416 () -1555 +414 () -1554 +413 () -1553 -*** -1684 +412 () -1552 +407 () -1551 -*** -1697 +406 () -1549 +405 () -1546 -*** -1768 +404 () -1544 -*** -1798 +403 () -1542 +402 () -1541 -*** -1650 +401 () -1540 +400 () -1539 +399 () -1538 +398 () -1536 +396 () -1534 +395 () -1533 +393 () -1532 +392 () -1529 +390 () -1528 +389 () -1527 +388 () -1526 +387 () -1525 +386 () -1524 -*** -1736 +385 () -1523 +384 () -1522 -*** -1794 +383 () -1521 +382 () -1519 +381 () -1517 -*** -1687 +380 () -1516 +379 () -1515 +377 () -1514 +375 () -1513 +374 () -1512 +373 () -1511 +372 () -1510 +371 () -1509 +370 () -1508 +369 () -1507 +368 () -1506 +366 () -1505 +365 () -1504 +364 () -1503 +362 () -1502 -*** -1746 +361 () -1501 -*** -1766 +360 () -1498 +359 () -1497 +358 () -1495 +357 () -1494 +356 () -1493 -*** -1673 +355 () -1490 -*** -1774 +354 () -1486 +352 () -1485 +350 () -1482 +347 () -1481 +344 () -1480 +342 () -1479 +341 () -1477 +340 () -1476 +339 () -1475 +338 () -1473 +332 () -1472 +325 () -1471 +320 *** -1728 -() -1470 -() -1469 -() -1467 -() -1466 -() -1465 -() -1464 -() -1463 -() -1462 -() -1461 -() -1460 -() -1459 -() -1455 +345 () -1454 +319 *** -1782 -() -1453 -() -1452 -() -1451 +497 () -1449 +318 *** -1732 -() -1448 -() -1445 -() -1444 -() -1442 -() -1441 -() -1440 -() -1438 -() -1437 -() -1435 -() -1433 +349 () -1432 +317 *** -1665 -() -1431 -() -1426 -() -1425 -() -1424 -() -1423 +408 () -1420 +316 *** -1499 +324 () -1419 -*** -1457 +315 *** -1653 +328 () -1418 -*** -1577 +314 *** -1664 +335 () -1417 +313 *** -1489 +378 () -1416 +312 *** -1545 +426 () -1415 +311 *** -1430 +411 () -1414 +304 *** -1434 +343 () -1413 -*** -1594 +303 *** -1735 +417 () -1412 -*** -1560 +302 *** -1724 +485 () -1411 +301 *** -1428 +363 () -1404 -*** -1496 +300 *** -1780 +376 () -1403 +299 *** -1561 +333 () -1402 +292 *** -1548 +337 () -1401 -*** -1569 +291 *** -1792 +409 () -1400 +290 *** -1537 +421 () -1399 +289 *** -1429 +437 () -1392 +288 *** -1580 +430 () -1391 +287 *** -1410 +348 () -1390 +286 *** -1500 +326 () -1389 -*** -1483 +284 () -1388 +282 *** -1570 +308 () -1387 +279 *** -1543 -() -1386 +297 *** -1558 -() -1385 +305 () -1384 +278 () -1382 +277 *** -1439 +307 () -1381 +276 *** -1677 +296 () -1380 +273 () -1378 -*** -1397 +271 () -1377 -*** -1787 +265 () -1376 +246 *** -1408 -() -1375 -() -1374 +267 () -1373 +245 *** -1671 -() -1372 +280 () -1370 +244 *** -1793 +391 () -1369 +243 +*** +330 () -1365 +242 *** -1762 +456 () -1346 +241 +*** +346 () -1345 +240 *** -1566 +483 () -1344 +239 *** -1520 +260 () -1343 +238 *** -1492 +261 () -1342 +237 *** -1576 +262 *** -1656 +294 () -1341 +236 *** -1447 +253 () -1340 +229 *** -1550 -() -1339 -() -1338 -() -1337 +397 () -1329 +228 *** -1336 +298 () -1328 +227 *** -1446 +415 () -1327 +226 *** -1607 -() -1325 -() -1324 -() -1323 -() -1317 -() -1315 +264 () -1311 +224 *** -1450 +232 +() +222 *** -1720 +233 () -1310 +217 *** -1619 +250 () -1309 +211 *** -1458 +331 () -1308 +210 +*** +394 () -1307 +209 *** -1427 +410 () -1306 +208 *** -1364 +321 +() +207 *** -1696 +327 () -1299 +206 +*** +309 () -1297 +199 *** -1395 +259 () -1296 +198 +*** +219 () -1295 +197 *** -1326 +220 () -1294 +195 *** -1371 +429 () -1293 +194 *** -1456 +470 () -1292 +193 *** -1312 +274 () -1291 +191 +*** +203 () -1290 +190 *** -1363 +263 () -1282 +189 +215 *** -1592 +230 () -1281 +188 +*** +266 *** -1379 +295 () -1280 +182 *** -1478 +329 () -1279 +181 *** -1436 +351 () -1278 +180 *** -1620 +441 () -1277 +179 *** -1487 +453 () -1276 +178 *** -1288 +418 () -1275 +177 *** -1596 +353 () -1274 +176 *** -1322 +422 () -1273 +175 *** -1305 +225 *** -1699 +255 () -1272 +174 +*** +269 () -1271 +173 *** -1484 +214 () -1270 +172 *** -1518 +186 () -1269 +171 *** -1289 +447 () -1268 +170 *** -1443 +270 *** -1786 +306 () -1265 +169 +*** +336 () -1243 +168 *** -1368 +285 () -1242 +165 +*** +249 () -1241 +146 *** -1421 +154 +() +143 *** -1749 +334 () -1240 +142 *** -1260 +216 *** -1678 -() -1239 -() -1238 +257 () -1236 +141 *** -1263 +167 *** -1767 +251 () -1235 +140 +*** +162 +*** +293 () -1234 +139 +*** +158 () -1233 +137 +*** +166 +*** +201 () -1232 +136 *** -1752 +160 () -1231 +134 *** -1791 +221 () -1230 +132 +*** +213 () -1229 +131 +*** +187 () -1228 +129 *** -1702 +235 () -1227 +128 +*** +153 () -1226 +127 +*** +156 () -1225 +126 +*** +159 +*** +218 () -1224 +125 +*** +155 () -1223 +124 +*** +157 () -1216 +123 *** -1531 +152 () -1215 +116 *** -1530 +135 *** -1797 +163 () -1214 +115 *** -1474 +133 *** -1742 -() -1213 +204 *** -1488 +248 () -1212 +114 *** -1298 +192 *** -1789 +212 () -1211 +113 *** -1491 +268 () -1210 +112 *** -1600 +367 () -1209 +111 *** -1244 +272 () -1208 -*** -1609 +110 *** -1704 +434 () -1207 +109 *** -1237 +323 () -1206 +108 *** -1468 +281 () -1205 +107 +*** +144 *** -1547 +148 () -1204 +106 *** -1246 +275 () -1203 +105 *** -1593 +196 *** -1734 +254 () -1202 +104 *** -1535 -() -1200 -() -1198 -() -1196 -() -1195 +138 +*** +161 () -1194 +103 *** -1302 +310 () -1192 +102 +*** +223 +*** +252 () -1191 +80 () -1189 +70 () -1188 +69 () -1187 +68 () -1186 +66 () -1183 +64 () -1181 +62 *** -1778 -() -1179 +256 () -1178 +61 +*** +93 () -1177 +59 *** -1645 +120 () -1176 +58 () -1175 -*** -1318 +57 *** -1649 +183 () -1173 +55 () -1172 +54 () -1171 -() -1169 +52 *** -1654 +147 () -1168 +51 *** -1692 -() -1167 -() -1164 -() -1163 +118 () -1162 +50 *** -1716 -() -1160 +83 () -1159 +49 *** -1663 -() -1157 -() -1156 -() -1155 -() -1154 -() -1153 -() -1152 -() -1150 -() -1149 -() -1147 -() -1145 +98 () -1143 +48 *** -1711 -() -1142 +99 () -1141 +47 () -1140 +46 +*** +184 () -1139 +45 *** -1755 +121 () -1138 +44 () -1137 +43 *** -1218 +88 () -1136 -*** -1248 +42 *** -1670 -() -1135 -() -1134 +122 () -1133 +41 *** -1662 -() -1132 -() -1131 -() -1129 -() -1128 -() -1127 +91 () -1126 +40 *** -1301 -() -1125 -() -1124 -() -1123 -() -1122 +96 () -1120 +38 *** -1332 +100 () -1119 +37 *** -1737 +149 () -1118 +36 *** -1718 +74 () -1117 -*** -1250 +35 *** -1658 -() -1116 -() -1114 -() -1113 -() -1112 +258 () -1111 +34 *** -1772 +151 () -1110 +33 *** -1359 +85 () -1109 +32 () -1108 +31 *** -1251 -() -1106 +94 () -1105 +30 *** -1771 -() -1104 -() -1102 -() -1101 -() -1100 +97 () -1099 +29 *** -1689 -() -1098 +90 () -1097 +28 *** -1785 +89 () -1096 +27 *** -1685 -() -1095 -() -1094 -() -1093 -() -1092 -() -1091 -() -1090 -() -1089 -() -1088 -() -1087 -() -1086 -() -1085 +92 () -1084 +26 *** -1739 -() -1083 +72 *** -1405 -() -1082 -() -1081 -() -1080 -() -1078 -() -1077 -() -1076 -() -1075 -() -1074 -() -1073 -() -1072 -() -1071 +247 () -1070 +25 *** -1707 +86 () -1069 +24 *** -1334 -() -1068 -() -1066 -() -1065 -() -1064 -() -1063 -() -1062 -() -1061 -() -1060 -() -1059 -() -1058 +82 () -1057 +23 *** -1744 -() -1056 -() -1055 -() -1054 +87 *** -1335 +117 () -1052 +22 *** -1660 -() -1051 -() -1050 -() -1049 -() -1048 -() -1047 -() -1046 -() -1045 +76 *** -1357 +119 () -1044 +21 *** -1659 -() -1043 -() -1041 -() -1040 -() -1039 -() -1038 -() -1037 -() -1036 -() -1035 -() -1034 +84 () -1033 +20 *** -1690 -() -1031 -() -1030 -() -1029 +78 () -1028 +19 *** -1675 -() -1027 -() -1026 +73 () -1025 +18 *** -1257 -() -1024 -() -1023 -() -1022 -() -1021 -() -1020 +81 () -1019 +17 *** -1284 +65 () -1018 +16 +*** +63 +*** +101 () -1017 +15 *** -1754 +71 () -1016 +14 +*** +75 () -1015 +13 *** -1247 +322 () -1014 +12 +*** +77 () -1013 +11 +*** +283 () -1012 +10 *** -1319 +79 () -1011 +9 *** -1352 +145 *** -1651 -() -1010 +150 () -1009 +8 *** -1705 -() -1008 -() -1007 -() -1006 +67 () -1005 +7 *** -1679 -() -1004 -() -1003 -() -1002 -() -1001 -() -1000 +60 *** -1731 -() -999 -() -998 -() -996 -() -995 -() -994 -() -993 +231 () -991 +6 *** -1799 -() -990 -() -989 -() -987 -() -986 -() -985 -() -984 -() -983 +56 *** -1745 -() -982 +234 () -981 +5 *** -1644 -() -980 -() -979 -() -978 -() -977 -() -976 -() -975 -() -974 +164 *** -1222 -() -973 -() -972 -() -971 -() -970 -() -968 -() -967 -() -966 +202 () -965 +4 *** -1347 -() -964 -() -963 +53 () -962 +3 *** -1743 -() -961 +130 *** -1719 -() -960 +185 *** -1758 -() -959 +200 () -958 +2 *** -1733 -() -957 -*** -1775 -() -956 -() -955 -() -954 -() -953 -() -952 -*** -1393 -() -951 -() -950 -() -949 -*** -1669 -() -948 -() -947 -() -946 -*** -1681 -() -944 -*** -1686 -() -943 -() -942 -() -940 -*** -1783 -() -939 -() -938 -() -937 -() -936 -() -934 -() -933 -() -932 -() -931 -() -930 -() -929 -*** -1713 -() -928 -*** -1725 -() -927 -() -926 -() -925 -() -924 -() -923 -() -922 -() -921 -*** -1394 -() -920 -*** -1741 -() -919 -*** -1708 -() -918 -() -917 -() -916 -*** -1723 -() -915 -() -914 -() -913 -() -912 -() -911 -() -910 -() -909 -*** -1795 -() -908 -() -907 -() -906 -() -905 -() -904 -() -903 -*** -1330 -() -902 -() -901 -() -900 -() -899 -() -898 -() -897 -*** -1790 -() -896 -*** -1652 -() -895 -*** -1761 -() -894 -() -893 -() -892 -() -891 -() -890 -*** -1253 -() -889 -*** -1698 -() -888 -() -887 -() -885 -() -884 -*** -1703 -() -883 -() -882 -() -881 -*** -1747 -() -880 -() -879 -*** -1647 -() -878 -*** -1358 -() -877 -*** -1407 -() -876 -() -875 -() -874 -*** -1283 -() -873 -*** -1682 -() -872 -() -871 -() -870 -() -869 -() -868 -() -867 -*** -1751 -() -866 -() -865 -() -864 -() -863 -() -862 -*** -1753 -() -861 -() -860 -() -859 -() -858 -*** -1348 -() -857 -() -856 -*** -1350 -() -855 -*** -1252 -() -854 -() -853 -*** -1201 -() -852 -() -851 -() -850 -*** -1361 -() -849 -() -848 -() -847 -() -846 -() -845 -() -844 -() -843 -() -842 -() -841 -() -840 -() -839 -*** -1360 -() -838 -() -837 -() -836 -() -835 -() -834 -() -833 -*** -1406 -() -832 -() -831 -() -830 -() -829 -() -827 -() -826 -() -825 -() -824 -() -823 -() -822 -() -821 -*** -1683 -() -820 -*** -1672 -() -819 -() -818 -*** -1693 -() -816 -() -815 -*** -1313 -() -814 -() -813 -() -812 -*** -1727 -() -811 -() -810 -() -809 -() -808 -() -806 -() -805 -*** -1217 -() -804 -() -803 -() -802 -() -801 -() -800 -() -799 -() -798 -() -797 -*** -1220 -() -796 -*** -1788 -() -795 -() -794 -*** -1255 -*** -1674 -() -793 -*** -1740 -() -792 -() -791 -*** -1349 -() -790 -() -789 -() -788 -() -787 -*** -1800 -() -786 -() -785 -() -784 -() -783 -() -782 -() -781 -() -780 -() -779 -() -778 -() -777 -() -776 -() -775 -() -774 -*** -1331 -() -773 -() -772 -*** -1256 -() -771 -() -770 -() -769 -() -768 -() -767 -() -766 -() -765 -() -764 -() -763 -() -762 -() -761 -() -759 -() -758 -*** -1655 -() -757 -() -756 -() -755 -*** -1760 -() -754 -() -753 -() -752 -() -751 -*** -1285 -*** -1680 -() -750 -*** -1261 -() -749 -() -748 -() -747 -() -746 -() -745 -*** -1362 -() -744 -() -743 -() -742 -() -741 -() -740 -() -739 -() -738 -() -737 -() -736 -*** -1729 -() -735 -*** -1769 -() -734 -() -733 -() -732 -*** -1715 -() -731 -() -730 -() -729 -() -728 -() -727 -*** -1721 -() -726 -() -725 -() -724 -() -723 -() -722 -() -721 -() -720 -() -719 -*** -1770 -() -718 -() -717 -() -716 -() -715 -() -714 -() -713 -() -712 -() -711 -*** -1779 -() -710 -*** -1221 -() -709 -() -708 -() -707 -() -706 -() -705 -*** -1661 -() -704 -() -703 -() -702 -() -701 -*** -1722 -() -700 -() -699 -() -698 -() -697 -() -696 -() -695 -() -694 -() -693 -() -692 -*** -1776 -() -690 -*** -1254 -() -689 -*** -1738 -() -688 -() -687 -() -686 -*** -1287 -() -685 -() -684 -() -683 -() -682 -() -681 -*** -1666 -() -680 -() -679 -() -678 -() -677 -() -676 -() -675 -() -674 -*** -1695 -() -673 -*** -1709 -() -672 -() -671 -() -670 -() -669 -() -667 -() -666 -() -665 -() -664 -() -663 -() -662 -() -661 -*** -1730 -() -660 -() -659 -() -658 -() -657 -() -656 -() -655 -() -654 -() -653 -() -652 -() -651 -() -650 -() -649 -() -648 -() -647 -() -594 -610 -622 -() -588 -() -584 -601 -615 -*** -1266 -() -578 -590 -603 -() -574 -592 -607 -*** -1646 -() -568 -() -564 -582 -598 -() -558 -570 -*** -1351 -*** -1712 -() -554 -572 -() -547 -560 -580 -() -543 -562 -() -536 -549 -() -533 -551 -*** -1356 -() -527 -539 -() -524 -541 -() -518 -530 -() -514 -531 -() -508 -521 -*** -1657 -() -503 -523 -() -498 -*** -1383 -() -493 -512 -*** -1422 -() -487 -501 -() -484 -515 -*** -1354 -*** -1701 -() -481 -502 -() -475 -490 -511 -() -472 -504 -538 -566 -589 -613 -629 -() -470 -491 -*** -1303 -() -464 -() -461 -494 -526 -556 -579 -605 -623 -639 -() -450 -*** -1355 -() -438 -483 -516 -545 -569 -596 -616 -633 -() -426 -471 -506 -535 -559 -586 -608 -627 -643 -*** -1259 -() -414 -459 -495 -525 -548 -576 -599 -620 -635 -*** -1765 -() -402 -449 -500 -() -401 -446 -482 -*** -1258 -() -391 -418 -434 -455 -() -388 -435 -469 -() -384 -407 -429 -454 -() -378 -406 -447 -467 -() -376 -423 -457 -*** -1316 -() -373 -394 -416 -442 -() -367 -393 -410 -431 -452 -478 -() -366 -413 -465 -513 -550 -585 -617 -638 -() -364 -*** -1146 -*** -1750 -() -363 -411 -445 -() -359 -396 -*** -1396 -*** -1756 -() -357 -381 -405 -430 -458 -479 -*** -1353 -() -351 -368 -() -350 -389 -*** -1103 -() -349 -397 -433 -() -344 -369 -422 -443 -() -338 -354 -380 -398 -419 -441 -466 -() -335 -385 -421 -() -332 -355 -*** -1320 -() -327 -375 -428 -505 -540 -575 -609 -632 -*** -1321 -() -326 -341 -*** -1182 -() -323 -372 -409 -() -319 -342 -() -318 -331 -343 -356 -370 -382 -395 -408 -420 -432 -444 -456 -468 -480 -492 -() -312 -*** -1161 -() -309 -346 -383 -*** -1366 -() -308 -*** -1262 -() -305 -330 -() -299 -315 -*** -1333 -*** -1676 -() -293 -317 -() -289 -296 -334 -371 -*** -1158 -() -286 -302 -329 -() -281 -303 -*** -1219 -() -280 -292 -304 -316 -*** -1264 -() -275 -290 -() -270 -291 -() -265 -278 -*** -1184 -() -260 -279 -() -255 -268 -*** -1367 -() -250 -269 -*** -1165 -() -245 -*** -1115 -() -240 -259 -*** -1067 -() -235 -248 -*** -1199 -*** -1717 -() -230 -249 -() -225 -238 -*** -1197 -() -220 -239 -() -215 -*** -935 -() -210 -229 -258 -*** -1193 -() -205 -*** -988 -() -200 -219 -() -195 -*** -1166 -*** -1667 -() -190 -209 -*** -1079 -*** -1249 -() -185 -198 -*** -1180 -() -131 -161 -192 -221 -252 -282 -320 -() -118 -151 -182 -211 -242 -271 -306 -*** -1398 -() -112 -127 -140 -*** -1148 -() -105 -141 -172 -201 -232 -261 -294 -() -103 -*** -1144 -() -92 -130 -162 -191 -222 -251 -283 -321 -358 -() -91 -*** -886 -() -80 -136 -174 -216 -254 -301 -348 -404 -473 -520 -555 -591 -619 -() -79 -117 -152 -181 -212 -241 -272 -307 -345 -*** -1267 -() -78 -116 -*** -1042 -*** -1764 -() -74 -87 -100 -114 -126 -() -73 -95 -111 -128 -149 -165 -178 -*** -997 -() -70 -119 -166 -204 -246 -285 -339 -386 -439 -485 -532 -557 -583 -606 -625 -640 -646 -() -66 -104 -142 -171 -202 -231 -262 -295 -333 -*** -1286 -() -62 -86 -108 -124 -139 -159 -175 -188 -*** -1130 -() -61 -72 -88 -113 -134 -148 -160 -179 -208 -228 -*** -1245 -() -57 -106 -157 -193 -236 -273 -328 -374 -427 -474 -519 -552 -() -56 -*** -969 -() -55 -109 -153 -197 -233 -277 -325 -377 -424 -476 -517 -553 -577 -602 -621 -637 -645 -() -54 -110 -154 -196 -234 -276 -324 -379 -425 -477 -522 -561 -595 -624 -642 -() -53 -90 -129 -*** -1190 -() -52 -*** -941 -() -50 -59 -75 -99 -121 -137 -150 -169 -*** -945 -*** -1706 -() -49 -69 -85 -101 -125 -145 -158 -170 -189 -218 -*** -992 -*** -1781 -() -48 -68 -122 -163 -207 -244 -288 -336 -390 -436 -489 -529 -() -45 -96 -143 -187 -223 -267 -310 -360 -*** -1409 -() -41 -60 -82 -98 -115 -138 -155 -168 -180 -199 -() -39 -67 -123 -164 -206 -243 -287 -337 -392 -437 -488 -534 -571 -604 -630 -() -36 -43 -*** -1170 -() -26 -*** -1107 -() -24 -40 -*** -817 -() -20 -46 -97 -144 -186 -224 -266 -311 -365 -412 -463 -507 -542 -567 -593 -614 -631 -() -19 -33 -*** -1185 -*** -1694 -() -18 -44 -94 -146 -184 -226 -263 -314 -361 -415 -460 -509 -546 -573 -597 -618 -634 -644 -() -17 -31 -65 -102 -*** -807 -() -16 -34 -84 -133 -177 -213 -256 -298 -352 -400 -453 -496 -() -14 -37 -81 -135 -173 -217 -253 -300 -347 -403 -448 -499 -537 -563 -587 -611 -628 -641 -() -13 -22 -42 -*** -691 -() -12 -47 -93 -147 -183 -227 -264 -313 -362 -417 -462 -510 -544 -581 -612 -636 -() -11 -29 -*** -760 -() -10 -30 -63 -*** -1121 -() -9 -35 -83 -132 -176 -214 -257 -297 -353 -399 -451 -497 -*** -1304 -() -8 -25 -64 -*** -828 -() -7 -23 -51 -89 -*** -1174 -*** -1300 -() -6 -28 -71 -120 -167 -203 -247 -284 -340 -387 -440 -486 -528 -565 -600 -626 -() -5 -*** -668 -() -4 -32 -77 -*** -1032 -() -3 -15 -38 -76 -*** -1314 -() -2 -27 -*** -1053 +205 () 1 -21 -58 -107 -156 -194 -237 -274 -322 *** -1151 +39 *** -1777 +95 diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout index bd861b307..448df62f5 100644 --- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout +++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout @@ -4,19 +4,15 @@ by Andreas Loebel Copyright (c) 1998,1999 ZIB Berlin All Rights Reserved. -nodes : 1800 -active arcs : 8190 -simplex iterations : 6837 -flow value : 12860044181 -new implicit arcs : 300000 -active arcs : 308190 -simplex iterations : 11843 -flow value : 9360043604 -new implicit arcs : 22787 -active arcs : 330977 -simplex iterations : 11931 -flow value : 9360043512 -checksum : 798014 +nodes : 500 +active arcs : 1905 +simplex iterations : 1502 +flow value : 4990014995 +new implicit arcs : 23867 +active arcs : 25772 +simplex iterations : 2663 +flow value : 3080014995 +checksum : 68389 optimal M5 Simulator System @@ -25,9 +21,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled May 15 2007 13:02:31 -M5 started Tue May 15 14:23:47 2007 +M5 compiled Jun 21 2007 21:15:48 +M5 started Fri Jun 22 01:58:18 2007 M5 executing on zizzer.eecs.umich.edu command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic tests/run.py long/10.mcf/sparc/linux/simple-atomic Global frequency set at 1000000000000 ticks per second -Exiting @ tick 859797266500 because target called exit() +Exiting @ tick 121920085500 because target called exit() diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.ini b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.ini index 9beb527ea..fe99eeeb9 100644 --- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.ini +++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.ini @@ -31,6 +31,7 @@ icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -44,7 +45,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=10000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -69,6 +70,7 @@ mem_side=system.cpu.toL2Bus.port[1] [system.cpu.icache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -82,7 +84,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=10000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -107,6 +109,7 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -120,7 +123,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=100000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -160,7 +163,7 @@ env= euid=100 executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf gid=100 -input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in +input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in output=cout pid=100 ppid=99 @@ -174,7 +177,7 @@ bus_id=0 clock=1000 responder_set=false width=64 -port=system.physmem.port system.cpu.l2cache.mem_side +port=system.physmem.port[0] system.cpu.l2cache.mem_side [system.physmem] type=PhysicalMemory diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out index 5d5cc71c1..81e06c995 100644 --- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out +++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out @@ -26,7 +26,7 @@ block_size=64 type=LiveProcess cmd=mcf mcf.in executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf -input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in +input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in output=cout env= cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-timing @@ -94,7 +94,7 @@ prefetch_access=false prefetcher_size=100 prefetch_past_page=false prefetch_serial_squash=false -prefetch_latency=10 +prefetch_latency=10000 prefetch_degree=1 prefetch_policy=none prefetch_cache_check_push=true @@ -131,7 +131,7 @@ prefetch_access=false prefetcher_size=100 prefetch_past_page=false prefetch_serial_squash=false -prefetch_latency=10 +prefetch_latency=10000 prefetch_degree=1 prefetch_policy=none prefetch_cache_check_push=true @@ -168,7 +168,7 @@ prefetch_access=false prefetcher_size=100 prefetch_past_page=false prefetch_serial_squash=false -prefetch_latency=10 +prefetch_latency=100000 prefetch_degree=1 prefetch_policy=none prefetch_cache_check_push=true diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/m5stats.txt b/tests/long/10.mcf/ref/sparc/linux/simple-timing/m5stats.txt index c95331047..56d2d33b9 100644 --- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/m5stats.txt +++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/m5stats.txt @@ -1,77 +1,77 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 480485 # Simulator instruction rate (inst/s) -host_mem_usage 155316 # Number of bytes of host memory used -host_seconds 3578.87 # Real time elapsed on the host -host_tick_rate 745845171 # Simulator tick rate (ticks/s) +host_inst_rate 697152 # Simulator instruction rate (inst/s) +host_mem_usage 155896 # Number of bytes of host memory used +host_seconds 349.77 # Real time elapsed on the host +host_tick_rate 1027373651 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 1719594534 # Number of instructions simulated -sim_seconds 2.669285 # Number of seconds simulated -sim_ticks 2669284585000 # Number of ticks simulated -system.cpu.dcache.ReadReq_accesses 607807189 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 12893.226605 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 11893.226605 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 594739458 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 168485217000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.021500 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 13067731 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 155417486000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.021500 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 13067731 # number of ReadReq MSHR misses -system.cpu.dcache.SwapReq_accesses 15448 # number of SwapReq accesses(hits+misses) -system.cpu.dcache.SwapReq_avg_miss_latency 13090.909091 # average SwapReq miss latency -system.cpu.dcache.SwapReq_avg_mshr_miss_latency 12090.909091 # average SwapReq mshr miss latency -system.cpu.dcache.SwapReq_hits 15437 # number of SwapReq hits -system.cpu.dcache.SwapReq_miss_latency 144000 # number of SwapReq miss cycles -system.cpu.dcache.SwapReq_miss_rate 0.000712 # miss rate for SwapReq accesses -system.cpu.dcache.SwapReq_misses 11 # number of SwapReq misses -system.cpu.dcache.SwapReq_mshr_miss_latency 133000 # number of SwapReq MSHR miss cycles -system.cpu.dcache.SwapReq_mshr_miss_rate 0.000712 # mshr miss rate for SwapReq accesses -system.cpu.dcache.SwapReq_mshr_misses 11 # number of SwapReq MSHR misses -system.cpu.dcache.WriteReq_accesses 166970997 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 12404.292450 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 11404.292450 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 165264000 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 21174090000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.010223 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 1706997 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 19467093000 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.010223 # mshr miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_mshr_misses 1706997 # number of WriteReq MSHR misses +sim_insts 243840172 # Number of instructions simulated +sim_seconds 0.359341 # Number of seconds simulated +sim_ticks 359340764000 # Number of ticks simulated +system.cpu.dcache.ReadReq_accesses 82219469 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 12000.343864 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 11000.343864 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 81326673 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 10713859000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.010859 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 892796 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 9821063000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.010859 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 892796 # number of ReadReq MSHR misses +system.cpu.dcache.SwapReq_accesses 3886 # number of SwapReq accesses(hits+misses) +system.cpu.dcache.SwapReq_avg_miss_latency 12500 # average SwapReq miss latency +system.cpu.dcache.SwapReq_avg_mshr_miss_latency 11500 # average SwapReq mshr miss latency +system.cpu.dcache.SwapReq_hits 3882 # number of SwapReq hits +system.cpu.dcache.SwapReq_miss_latency 50000 # number of SwapReq miss cycles +system.cpu.dcache.SwapReq_miss_rate 0.001029 # miss rate for SwapReq accesses +system.cpu.dcache.SwapReq_misses 4 # number of SwapReq misses +system.cpu.dcache.SwapReq_mshr_miss_latency 46000 # number of SwapReq MSHR miss cycles +system.cpu.dcache.SwapReq_mshr_miss_rate 0.001029 # mshr miss rate for SwapReq accesses +system.cpu.dcache.SwapReq_mshr_misses 4 # number of SwapReq MSHR misses +system.cpu.dcache.WriteReq_accesses 22901836 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 12623.899964 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 11623.899964 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 22855133 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 589574000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.002039 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 46703 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_miss_latency 542871000 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.002039 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 46703 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 51.440428 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 110.894471 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 774778186 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 12836.737637 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 11836.737637 # average overall mshr miss latency -system.cpu.dcache.demand_hits 760003458 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 189659307000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.019070 # miss rate for demand accesses -system.cpu.dcache.demand_misses 14774728 # number of demand (read+write) misses +system.cpu.dcache.demand_accesses 105121305 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 12031.341172 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 11031.341172 # average overall mshr miss latency +system.cpu.dcache.demand_hits 104181806 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 11303433000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.008937 # miss rate for demand accesses +system.cpu.dcache.demand_misses 939499 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 174884579000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.019070 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 14774728 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 10363934000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.008937 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 939499 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 774778186 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 12836.737637 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 11836.737637 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 105121305 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 12031.341172 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 11031.341172 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 760003458 # number of overall hits -system.cpu.dcache.overall_miss_latency 189659307000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.019070 # miss rate for overall accesses -system.cpu.dcache.overall_misses 14774728 # number of overall misses +system.cpu.dcache.overall_hits 104181806 # number of overall hits +system.cpu.dcache.overall_miss_latency 11303433000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.008937 # miss rate for overall accesses +system.cpu.dcache.overall_misses 939499 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 174884579000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.019070 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 14774728 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_miss_latency 10363934000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.008937 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 939499 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -83,57 +83,57 @@ system.cpu.dcache.prefetcher.num_hwpf_issued 0 system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu.dcache.replacements 14770643 # number of replacements -system.cpu.dcache.sampled_refs 14774739 # Sample count of references to valid blocks. +system.cpu.dcache.replacements 935407 # number of replacements +system.cpu.dcache.sampled_refs 939503 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 4094.628585 # Cycle average of tags in use -system.cpu.dcache.total_refs 760018895 # Total number of references to valid blocks. -system.cpu.dcache.warmup_cycle 3913237000 # Cycle when the warmup percentage was hit. -system.cpu.dcache.writebacks 4191356 # number of writebacks -system.cpu.icache.ReadReq_accesses 1719594535 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 13991.120977 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 12991.120977 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 1719593634 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 12606000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.000001 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 901 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_miss_latency 11705000 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.000001 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 901 # number of ReadReq MSHR misses +system.cpu.dcache.tagsinuse 3560.887601 # Cycle average of tags in use +system.cpu.dcache.total_refs 104185688 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 134116230000 # Cycle when the warmup percentage was hit. +system.cpu.dcache.writebacks 94807 # number of writebacks +system.cpu.icache.ReadReq_accesses 243840173 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 13993.174061 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 12993.174061 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 243839294 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 12300000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.000004 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 879 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_miss_latency 11421000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.000004 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 879 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.icache.avg_refs 1908538.994451 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 277405.340159 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 1719594535 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 13991.120977 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 12991.120977 # average overall mshr miss latency -system.cpu.icache.demand_hits 1719593634 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 12606000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.000001 # miss rate for demand accesses -system.cpu.icache.demand_misses 901 # number of demand (read+write) misses +system.cpu.icache.demand_accesses 243840173 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 13993.174061 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 12993.174061 # average overall mshr miss latency +system.cpu.icache.demand_hits 243839294 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 12300000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.000004 # miss rate for demand accesses +system.cpu.icache.demand_misses 879 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 11705000 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.000001 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 901 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_miss_latency 11421000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.000004 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 879 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 1719594535 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 13991.120977 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 12991.120977 # average overall mshr miss latency +system.cpu.icache.overall_accesses 243840173 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 13993.174061 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 12993.174061 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 1719593634 # number of overall hits -system.cpu.icache.overall_miss_latency 12606000 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.000001 # miss rate for overall accesses -system.cpu.icache.overall_misses 901 # number of overall misses +system.cpu.icache.overall_hits 243839294 # number of overall hits +system.cpu.icache.overall_miss_latency 12300000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.000004 # miss rate for overall accesses +system.cpu.icache.overall_misses 879 # number of overall misses system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 11705000 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.000001 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 901 # number of overall MSHR misses +system.cpu.icache.overall_mshr_miss_latency 11421000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.000004 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 879 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -145,64 +145,60 @@ system.cpu.icache.prefetcher.num_hwpf_issued 0 system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu.icache.replacements 31 # number of replacements -system.cpu.icache.sampled_refs 901 # Sample count of references to valid blocks. +system.cpu.icache.replacements 25 # number of replacements +system.cpu.icache.sampled_refs 879 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 737.715884 # Cycle average of tags in use -system.cpu.icache.total_refs 1719593634 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 716.200092 # Cycle average of tags in use +system.cpu.icache.total_refs 243839294 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idle_fraction 0 # Percentage of idle cycles -system.cpu.l2cache.ReadReq_accesses 14775639 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 12999.785859 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 10999.785859 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_hits 8592784 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 80375791000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 0.418449 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 6182855 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 68010081000 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.418449 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 6182855 # number of ReadReq MSHR misses -system.cpu.l2cache.Writeback_accesses 4191356 # number of Writeback accesses(hits+misses) -system.cpu.l2cache.Writeback_hits 4164131 # number of Writeback hits -system.cpu.l2cache.Writeback_miss_rate 0.006496 # miss rate for Writeback accesses -system.cpu.l2cache.Writeback_misses 27225 # number of Writeback misses -system.cpu.l2cache.Writeback_mshr_miss_rate 0.006496 # mshr miss rate for Writeback accesses -system.cpu.l2cache.Writeback_mshr_misses 27225 # number of Writeback MSHR misses +system.cpu.l2cache.ReadReq_accesses 940381 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 13000 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 11000 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_hits 924777 # number of ReadReq hits +system.cpu.l2cache.ReadReq_miss_latency 202852000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.016593 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 15604 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 171644000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.016593 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 15604 # number of ReadReq MSHR misses +system.cpu.l2cache.Writeback_accesses 94807 # number of Writeback accesses(hits+misses) +system.cpu.l2cache.Writeback_hits 94807 # number of Writeback hits system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 2.063273 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 65.341195 # Average number of references to valid blocks. system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 14775639 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 12999.785859 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 10999.785859 # average overall mshr miss latency -system.cpu.l2cache.demand_hits 8592784 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 80375791000 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 0.418449 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 6182855 # number of demand (read+write) misses +system.cpu.l2cache.demand_accesses 940381 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 13000 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 11000 # average overall mshr miss latency +system.cpu.l2cache.demand_hits 924777 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 202852000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.016593 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 15604 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 68010081000 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 0.418449 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 6182855 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_miss_latency 171644000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.016593 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 15604 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 18966995 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 12942.794779 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 10999.785859 # average overall mshr miss latency +system.cpu.l2cache.overall_accesses 1035188 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 13000 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 11000 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.l2cache.overall_hits 12756915 # number of overall hits -system.cpu.l2cache.overall_miss_latency 80375791000 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 0.327415 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 6210080 # number of overall misses +system.cpu.l2cache.overall_hits 1019584 # number of overall hits +system.cpu.l2cache.overall_miss_latency 202852000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.015074 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 15604 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 68010081000 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 0.325980 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 6182855 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_miss_latency 171644000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.015074 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 15604 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -214,17 +210,17 @@ system.cpu.l2cache.prefetcher.num_hwpf_issued 0 system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu.l2cache.replacements 6150087 # number of replacements -system.cpu.l2cache.sampled_refs 6182855 # Sample count of references to valid blocks. +system.cpu.l2cache.replacements 0 # number of replacements +system.cpu.l2cache.sampled_refs 15604 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 26129.060966 # Cycle average of tags in use -system.cpu.l2cache.total_refs 12756915 # Total number of references to valid blocks. -system.cpu.l2cache.warmup_cycle 806915893000 # Cycle when the warmup percentage was hit. -system.cpu.l2cache.writebacks 1069081 # number of writebacks +system.cpu.l2cache.tagsinuse 10833.027960 # Cycle average of tags in use +system.cpu.l2cache.total_refs 1019584 # Total number of references to valid blocks. +system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 2669284585000 # number of cpu cycles simulated -system.cpu.num_insts 1719594534 # Number of instructions executed -system.cpu.num_refs 774793634 # Number of memory references -system.cpu.workload.PROG:num_syscalls 632 # Number of system calls +system.cpu.numCycles 359340764000 # number of cpu cycles simulated +system.cpu.num_insts 243840172 # Number of instructions executed +system.cpu.num_refs 105125191 # Number of memory references +system.cpu.workload.PROG:num_syscalls 428 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/mcf.out b/tests/long/10.mcf/ref/sparc/linux/simple-timing/mcf.out index 6bbb02cf0..095132477 100644 --- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/mcf.out +++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/mcf.out @@ -1,3092 +1,999 @@ () -1642 -*** -1759 -() -1641 -*** -1691 +500 () -1640 +499 () -1639 +498 () -1638 +496 () -1637 +495 () -1636 +494 () -1635 +493 () -1634 +492 () -1633 +491 () -1632 +490 () -1631 +489 () -1630 +488 () -1629 +487 () -1628 +486 () -1627 +484 () -1626 +482 () -1625 -*** -1784 +481 () -1624 +480 () -1623 +479 () -1622 -*** -1688 +478 () -1621 +477 () -1618 +476 () -1617 -*** -1796 +475 () -1616 +474 () -1615 -*** -1668 +473 () -1614 +472 () -1613 +471 () -1612 -*** -1700 +469 () -1611 +468 () -1610 +467 () -1608 +466 () -1606 +465 () -1605 +464 () -1604 +463 () -1603 +462 () -1602 +461 () -1601 +460 () -1599 +459 () -1598 -*** -1714 +458 () -1597 +457 () -1595 +455 () -1591 +454 () -1590 -*** -1773 +452 () -1589 +451 () -1588 +450 () -1587 -*** -1710 +449 () -1586 +448 () -1585 +446 () -1584 -*** -1748 +445 () -1583 -*** -1648 +444 () -1582 +443 () -1581 -*** -1757 +442 () -1579 +440 () -1578 -*** -1726 +439 () -1575 -*** -1763 +438 () -1574 +436 () -1573 +435 () -1572 +433 () -1571 +432 () -1568 +431 () -1567 +428 () -1565 -*** -1643 +427 () -1564 +425 () -1563 +424 () -1562 +423 () -1559 +420 () -1557 +419 () -1556 +416 () -1555 +414 () -1554 +413 () -1553 -*** -1684 +412 () -1552 +407 () -1551 -*** -1697 +406 () -1549 +405 () -1546 -*** -1768 +404 () -1544 -*** -1798 +403 () -1542 +402 () -1541 -*** -1650 +401 () -1540 +400 () -1539 +399 () -1538 +398 () -1536 +396 () -1534 +395 () -1533 +393 () -1532 +392 () -1529 +390 () -1528 +389 () -1527 +388 () -1526 +387 () -1525 +386 () -1524 -*** -1736 +385 () -1523 +384 () -1522 -*** -1794 +383 () -1521 +382 () -1519 +381 () -1517 -*** -1687 +380 () -1516 +379 () -1515 +377 () -1514 +375 () -1513 +374 () -1512 +373 () -1511 +372 () -1510 +371 () -1509 +370 () -1508 +369 () -1507 +368 () -1506 +366 () -1505 +365 () -1504 +364 () -1503 +362 () -1502 -*** -1746 +361 () -1501 -*** -1766 +360 () -1498 +359 () -1497 +358 () -1495 +357 () -1494 +356 () -1493 -*** -1673 +355 () -1490 -*** -1774 +354 () -1486 +352 () -1485 +350 () -1482 +347 () -1481 +344 () -1480 +342 () -1479 +341 () -1477 +340 () -1476 +339 () -1475 +338 () -1473 +332 () -1472 +325 () -1471 +320 *** -1728 -() -1470 -() -1469 -() -1467 -() -1466 -() -1465 -() -1464 -() -1463 -() -1462 -() -1461 -() -1460 -() -1459 -() -1455 +345 () -1454 +319 *** -1782 -() -1453 -() -1452 -() -1451 +497 () -1449 +318 *** -1732 -() -1448 -() -1445 -() -1444 -() -1442 -() -1441 -() -1440 -() -1438 -() -1437 -() -1435 -() -1433 +349 () -1432 +317 *** -1665 -() -1431 -() -1426 -() -1425 -() -1424 -() -1423 +408 () -1420 +316 *** -1499 +324 () -1419 -*** -1457 +315 *** -1653 +328 () -1418 -*** -1577 +314 *** -1664 +335 () -1417 +313 *** -1489 +378 () -1416 +312 *** -1545 +426 () -1415 +311 *** -1430 +411 () -1414 +304 *** -1434 +343 () -1413 -*** -1594 +303 *** -1735 +417 () -1412 -*** -1560 +302 *** -1724 +485 () -1411 +301 *** -1428 +363 () -1404 -*** -1496 +300 *** -1780 +376 () -1403 +299 *** -1561 +333 () -1402 +292 *** -1548 +337 () -1401 -*** -1569 +291 *** -1792 +409 () -1400 +290 *** -1537 +421 () -1399 +289 *** -1429 +437 () -1392 +288 *** -1580 +430 () -1391 +287 *** -1410 +348 () -1390 +286 *** -1500 +326 () -1389 -*** -1483 +284 () -1388 +282 *** -1570 +308 () -1387 +279 *** -1543 -() -1386 +297 *** -1558 -() -1385 +305 () -1384 +278 () -1382 +277 *** -1439 +307 () -1381 +276 *** -1677 +296 () -1380 +273 () -1378 -*** -1397 +271 () -1377 -*** -1787 +265 () -1376 +246 *** -1408 -() -1375 -() -1374 +267 () -1373 +245 *** -1671 -() -1372 +280 () -1370 +244 *** -1793 +391 () -1369 +243 +*** +330 () -1365 +242 *** -1762 +456 () -1346 +241 +*** +346 () -1345 +240 *** -1566 +483 () -1344 +239 *** -1520 +260 () -1343 +238 *** -1492 +261 () -1342 +237 *** -1576 +262 *** -1656 +294 () -1341 +236 *** -1447 +253 () -1340 +229 *** -1550 -() -1339 -() -1338 -() -1337 +397 () -1329 +228 *** -1336 +298 () -1328 +227 *** -1446 +415 () -1327 +226 *** -1607 -() -1325 -() -1324 -() -1323 -() -1317 -() -1315 +264 () -1311 +224 *** -1450 +232 +() +222 *** -1720 +233 () -1310 +217 *** -1619 +250 () -1309 +211 *** -1458 +331 () -1308 +210 +*** +394 () -1307 +209 *** -1427 +410 () -1306 +208 *** -1364 +321 +() +207 *** -1696 +327 () -1299 +206 +*** +309 () -1297 +199 *** -1395 +259 () -1296 +198 +*** +219 () -1295 +197 *** -1326 +220 () -1294 +195 *** -1371 +429 () -1293 +194 *** -1456 +470 () -1292 +193 *** -1312 +274 () -1291 +191 +*** +203 () -1290 +190 *** -1363 +263 () -1282 +189 +215 *** -1592 +230 () -1281 +188 +*** +266 *** -1379 +295 () -1280 +182 *** -1478 +329 () -1279 +181 *** -1436 +351 () -1278 +180 *** -1620 +441 () -1277 +179 *** -1487 +453 () -1276 +178 *** -1288 +418 () -1275 +177 *** -1596 +353 () -1274 +176 *** -1322 +422 () -1273 +175 *** -1305 +225 *** -1699 +255 () -1272 +174 +*** +269 () -1271 +173 *** -1484 +214 () -1270 +172 *** -1518 +186 () -1269 +171 *** -1289 +447 () -1268 +170 *** -1443 +270 *** -1786 +306 () -1265 +169 +*** +336 () -1243 +168 *** -1368 +285 () -1242 +165 +*** +249 () -1241 +146 *** -1421 +154 +() +143 *** -1749 +334 () -1240 +142 *** -1260 +216 *** -1678 -() -1239 -() -1238 +257 () -1236 +141 *** -1263 +167 *** -1767 +251 () -1235 +140 +*** +162 +*** +293 () -1234 +139 +*** +158 () -1233 +137 +*** +166 +*** +201 () -1232 +136 *** -1752 +160 () -1231 +134 *** -1791 +221 () -1230 +132 +*** +213 () -1229 +131 +*** +187 () -1228 +129 *** -1702 +235 () -1227 +128 +*** +153 () -1226 +127 +*** +156 () -1225 +126 +*** +159 +*** +218 () -1224 +125 +*** +155 () -1223 +124 +*** +157 () -1216 +123 *** -1531 +152 () -1215 +116 *** -1530 +135 *** -1797 +163 () -1214 +115 *** -1474 +133 *** -1742 -() -1213 +204 *** -1488 +248 () -1212 +114 *** -1298 +192 *** -1789 +212 () -1211 +113 *** -1491 +268 () -1210 +112 *** -1600 +367 () -1209 +111 *** -1244 +272 () -1208 -*** -1609 +110 *** -1704 +434 () -1207 +109 *** -1237 +323 () -1206 +108 *** -1468 +281 () -1205 +107 +*** +144 *** -1547 +148 () -1204 +106 *** -1246 +275 () -1203 +105 *** -1593 +196 *** -1734 +254 () -1202 +104 *** -1535 -() -1200 -() -1198 -() -1196 -() -1195 +138 +*** +161 () -1194 +103 *** -1302 +310 () -1192 +102 +*** +223 +*** +252 () -1191 +80 () -1189 +70 () -1188 +69 () -1187 +68 () -1186 +66 () -1183 +64 () -1181 +62 *** -1778 -() -1179 +256 () -1178 +61 +*** +93 () -1177 +59 *** -1645 +120 () -1176 +58 () -1175 -*** -1318 +57 *** -1649 +183 () -1173 +55 () -1172 +54 () -1171 -() -1169 +52 *** -1654 +147 () -1168 +51 *** -1692 -() -1167 -() -1164 -() -1163 +118 () -1162 +50 *** -1716 -() -1160 +83 () -1159 +49 *** -1663 -() -1157 -() -1156 -() -1155 -() -1154 -() -1153 -() -1152 -() -1150 -() -1149 -() -1147 -() -1145 +98 () -1143 +48 *** -1711 -() -1142 +99 () -1141 +47 () -1140 +46 +*** +184 () -1139 +45 *** -1755 +121 () -1138 +44 () -1137 +43 *** -1218 +88 () -1136 -*** -1248 +42 *** -1670 -() -1135 -() -1134 +122 () -1133 +41 *** -1662 -() -1132 -() -1131 -() -1129 -() -1128 -() -1127 +91 () -1126 +40 *** -1301 -() -1125 -() -1124 -() -1123 -() -1122 +96 () -1120 +38 *** -1332 +100 () -1119 +37 *** -1737 +149 () -1118 +36 *** -1718 +74 () -1117 -*** -1250 +35 *** -1658 -() -1116 -() -1114 -() -1113 -() -1112 +258 () -1111 +34 *** -1772 +151 () -1110 +33 *** -1359 +85 () -1109 +32 () -1108 +31 *** -1251 -() -1106 +94 () -1105 +30 *** -1771 -() -1104 -() -1102 -() -1101 -() -1100 +97 () -1099 +29 *** -1689 -() -1098 +90 () -1097 +28 *** -1785 +89 () -1096 +27 *** -1685 -() -1095 -() -1094 -() -1093 -() -1092 -() -1091 -() -1090 -() -1089 -() -1088 -() -1087 -() -1086 -() -1085 +92 () -1084 +26 *** -1739 -() -1083 +72 *** -1405 -() -1082 -() -1081 -() -1080 -() -1078 -() -1077 -() -1076 -() -1075 -() -1074 -() -1073 -() -1072 -() -1071 +247 () -1070 +25 *** -1707 +86 () -1069 +24 *** -1334 -() -1068 -() -1066 -() -1065 -() -1064 -() -1063 -() -1062 -() -1061 -() -1060 -() -1059 -() -1058 +82 () -1057 +23 *** -1744 -() -1056 -() -1055 -() -1054 +87 *** -1335 +117 () -1052 +22 *** -1660 -() -1051 -() -1050 -() -1049 -() -1048 -() -1047 -() -1046 -() -1045 +76 *** -1357 +119 () -1044 +21 *** -1659 -() -1043 -() -1041 -() -1040 -() -1039 -() -1038 -() -1037 -() -1036 -() -1035 -() -1034 +84 () -1033 +20 *** -1690 -() -1031 -() -1030 -() -1029 +78 () -1028 +19 *** -1675 -() -1027 -() -1026 +73 () -1025 +18 *** -1257 -() -1024 -() -1023 -() -1022 -() -1021 -() -1020 +81 () -1019 +17 *** -1284 +65 () -1018 +16 +*** +63 +*** +101 () -1017 +15 *** -1754 +71 () -1016 +14 +*** +75 () -1015 +13 *** -1247 +322 () -1014 +12 +*** +77 () -1013 +11 +*** +283 () -1012 +10 *** -1319 +79 () -1011 +9 *** -1352 +145 *** -1651 -() -1010 +150 () -1009 +8 *** -1705 -() -1008 -() -1007 -() -1006 +67 () -1005 +7 *** -1679 -() -1004 -() -1003 -() -1002 -() -1001 -() -1000 +60 *** -1731 -() -999 -() -998 -() -996 -() -995 -() -994 -() -993 +231 () -991 +6 *** -1799 -() -990 -() -989 -() -987 -() -986 -() -985 -() -984 -() -983 +56 *** -1745 -() -982 +234 () -981 +5 *** -1644 -() -980 -() -979 -() -978 -() -977 -() -976 -() -975 -() -974 +164 *** -1222 -() -973 -() -972 -() -971 -() -970 -() -968 -() -967 -() -966 +202 () -965 +4 *** -1347 -() -964 -() -963 +53 () -962 +3 *** -1743 -() -961 +130 *** -1719 -() -960 +185 *** -1758 -() -959 +200 () -958 +2 *** -1733 -() -957 -*** -1775 -() -956 -() -955 -() -954 -() -953 -() -952 -*** -1393 -() -951 -() -950 -() -949 -*** -1669 -() -948 -() -947 -() -946 -*** -1681 -() -944 -*** -1686 -() -943 -() -942 -() -940 -*** -1783 -() -939 -() -938 -() -937 -() -936 -() -934 -() -933 -() -932 -() -931 -() -930 -() -929 -*** -1713 -() -928 -*** -1725 -() -927 -() -926 -() -925 -() -924 -() -923 -() -922 -() -921 -*** -1394 -() -920 -*** -1741 -() -919 -*** -1708 -() -918 -() -917 -() -916 -*** -1723 -() -915 -() -914 -() -913 -() -912 -() -911 -() -910 -() -909 -*** -1795 -() -908 -() -907 -() -906 -() -905 -() -904 -() -903 -*** -1330 -() -902 -() -901 -() -900 -() -899 -() -898 -() -897 -*** -1790 -() -896 -*** -1652 -() -895 -*** -1761 -() -894 -() -893 -() -892 -() -891 -() -890 -*** -1253 -() -889 -*** -1698 -() -888 -() -887 -() -885 -() -884 -*** -1703 -() -883 -() -882 -() -881 -*** -1747 -() -880 -() -879 -*** -1647 -() -878 -*** -1358 -() -877 -*** -1407 -() -876 -() -875 -() -874 -*** -1283 -() -873 -*** -1682 -() -872 -() -871 -() -870 -() -869 -() -868 -() -867 -*** -1751 -() -866 -() -865 -() -864 -() -863 -() -862 -*** -1753 -() -861 -() -860 -() -859 -() -858 -*** -1348 -() -857 -() -856 -*** -1350 -() -855 -*** -1252 -() -854 -() -853 -*** -1201 -() -852 -() -851 -() -850 -*** -1361 -() -849 -() -848 -() -847 -() -846 -() -845 -() -844 -() -843 -() -842 -() -841 -() -840 -() -839 -*** -1360 -() -838 -() -837 -() -836 -() -835 -() -834 -() -833 -*** -1406 -() -832 -() -831 -() -830 -() -829 -() -827 -() -826 -() -825 -() -824 -() -823 -() -822 -() -821 -*** -1683 -() -820 -*** -1672 -() -819 -() -818 -*** -1693 -() -816 -() -815 -*** -1313 -() -814 -() -813 -() -812 -*** -1727 -() -811 -() -810 -() -809 -() -808 -() -806 -() -805 -*** -1217 -() -804 -() -803 -() -802 -() -801 -() -800 -() -799 -() -798 -() -797 -*** -1220 -() -796 -*** -1788 -() -795 -() -794 -*** -1255 -*** -1674 -() -793 -*** -1740 -() -792 -() -791 -*** -1349 -() -790 -() -789 -() -788 -() -787 -*** -1800 -() -786 -() -785 -() -784 -() -783 -() -782 -() -781 -() -780 -() -779 -() -778 -() -777 -() -776 -() -775 -() -774 -*** -1331 -() -773 -() -772 -*** -1256 -() -771 -() -770 -() -769 -() -768 -() -767 -() -766 -() -765 -() -764 -() -763 -() -762 -() -761 -() -759 -() -758 -*** -1655 -() -757 -() -756 -() -755 -*** -1760 -() -754 -() -753 -() -752 -() -751 -*** -1285 -*** -1680 -() -750 -*** -1261 -() -749 -() -748 -() -747 -() -746 -() -745 -*** -1362 -() -744 -() -743 -() -742 -() -741 -() -740 -() -739 -() -738 -() -737 -() -736 -*** -1729 -() -735 -*** -1769 -() -734 -() -733 -() -732 -*** -1715 -() -731 -() -730 -() -729 -() -728 -() -727 -*** -1721 -() -726 -() -725 -() -724 -() -723 -() -722 -() -721 -() -720 -() -719 -*** -1770 -() -718 -() -717 -() -716 -() -715 -() -714 -() -713 -() -712 -() -711 -*** -1779 -() -710 -*** -1221 -() -709 -() -708 -() -707 -() -706 -() -705 -*** -1661 -() -704 -() -703 -() -702 -() -701 -*** -1722 -() -700 -() -699 -() -698 -() -697 -() -696 -() -695 -() -694 -() -693 -() -692 -*** -1776 -() -690 -*** -1254 -() -689 -*** -1738 -() -688 -() -687 -() -686 -*** -1287 -() -685 -() -684 -() -683 -() -682 -() -681 -*** -1666 -() -680 -() -679 -() -678 -() -677 -() -676 -() -675 -() -674 -*** -1695 -() -673 -*** -1709 -() -672 -() -671 -() -670 -() -669 -() -667 -() -666 -() -665 -() -664 -() -663 -() -662 -() -661 -*** -1730 -() -660 -() -659 -() -658 -() -657 -() -656 -() -655 -() -654 -() -653 -() -652 -() -651 -() -650 -() -649 -() -648 -() -647 -() -594 -610 -622 -() -588 -() -584 -601 -615 -*** -1266 -() -578 -590 -603 -() -574 -592 -607 -*** -1646 -() -568 -() -564 -582 -598 -() -558 -570 -*** -1351 -*** -1712 -() -554 -572 -() -547 -560 -580 -() -543 -562 -() -536 -549 -() -533 -551 -*** -1356 -() -527 -539 -() -524 -541 -() -518 -530 -() -514 -531 -() -508 -521 -*** -1657 -() -503 -523 -() -498 -*** -1383 -() -493 -512 -*** -1422 -() -487 -501 -() -484 -515 -*** -1354 -*** -1701 -() -481 -502 -() -475 -490 -511 -() -472 -504 -538 -566 -589 -613 -629 -() -470 -491 -*** -1303 -() -464 -() -461 -494 -526 -556 -579 -605 -623 -639 -() -450 -*** -1355 -() -438 -483 -516 -545 -569 -596 -616 -633 -() -426 -471 -506 -535 -559 -586 -608 -627 -643 -*** -1259 -() -414 -459 -495 -525 -548 -576 -599 -620 -635 -*** -1765 -() -402 -449 -500 -() -401 -446 -482 -*** -1258 -() -391 -418 -434 -455 -() -388 -435 -469 -() -384 -407 -429 -454 -() -378 -406 -447 -467 -() -376 -423 -457 -*** -1316 -() -373 -394 -416 -442 -() -367 -393 -410 -431 -452 -478 -() -366 -413 -465 -513 -550 -585 -617 -638 -() -364 -*** -1146 -*** -1750 -() -363 -411 -445 -() -359 -396 -*** -1396 -*** -1756 -() -357 -381 -405 -430 -458 -479 -*** -1353 -() -351 -368 -() -350 -389 -*** -1103 -() -349 -397 -433 -() -344 -369 -422 -443 -() -338 -354 -380 -398 -419 -441 -466 -() -335 -385 -421 -() -332 -355 -*** -1320 -() -327 -375 -428 -505 -540 -575 -609 -632 -*** -1321 -() -326 -341 -*** -1182 -() -323 -372 -409 -() -319 -342 -() -318 -331 -343 -356 -370 -382 -395 -408 -420 -432 -444 -456 -468 -480 -492 -() -312 -*** -1161 -() -309 -346 -383 -*** -1366 -() -308 -*** -1262 -() -305 -330 -() -299 -315 -*** -1333 -*** -1676 -() -293 -317 -() -289 -296 -334 -371 -*** -1158 -() -286 -302 -329 -() -281 -303 -*** -1219 -() -280 -292 -304 -316 -*** -1264 -() -275 -290 -() -270 -291 -() -265 -278 -*** -1184 -() -260 -279 -() -255 -268 -*** -1367 -() -250 -269 -*** -1165 -() -245 -*** -1115 -() -240 -259 -*** -1067 -() -235 -248 -*** -1199 -*** -1717 -() -230 -249 -() -225 -238 -*** -1197 -() -220 -239 -() -215 -*** -935 -() -210 -229 -258 -*** -1193 -() -205 -*** -988 -() -200 -219 -() -195 -*** -1166 -*** -1667 -() -190 -209 -*** -1079 -*** -1249 -() -185 -198 -*** -1180 -() -131 -161 -192 -221 -252 -282 -320 -() -118 -151 -182 -211 -242 -271 -306 -*** -1398 -() -112 -127 -140 -*** -1148 -() -105 -141 -172 -201 -232 -261 -294 -() -103 -*** -1144 -() -92 -130 -162 -191 -222 -251 -283 -321 -358 -() -91 -*** -886 -() -80 -136 -174 -216 -254 -301 -348 -404 -473 -520 -555 -591 -619 -() -79 -117 -152 -181 -212 -241 -272 -307 -345 -*** -1267 -() -78 -116 -*** -1042 -*** -1764 -() -74 -87 -100 -114 -126 -() -73 -95 -111 -128 -149 -165 -178 -*** -997 -() -70 -119 -166 -204 -246 -285 -339 -386 -439 -485 -532 -557 -583 -606 -625 -640 -646 -() -66 -104 -142 -171 -202 -231 -262 -295 -333 -*** -1286 -() -62 -86 -108 -124 -139 -159 -175 -188 -*** -1130 -() -61 -72 -88 -113 -134 -148 -160 -179 -208 -228 -*** -1245 -() -57 -106 -157 -193 -236 -273 -328 -374 -427 -474 -519 -552 -() -56 -*** -969 -() -55 -109 -153 -197 -233 -277 -325 -377 -424 -476 -517 -553 -577 -602 -621 -637 -645 -() -54 -110 -154 -196 -234 -276 -324 -379 -425 -477 -522 -561 -595 -624 -642 -() -53 -90 -129 -*** -1190 -() -52 -*** -941 -() -50 -59 -75 -99 -121 -137 -150 -169 -*** -945 -*** -1706 -() -49 -69 -85 -101 -125 -145 -158 -170 -189 -218 -*** -992 -*** -1781 -() -48 -68 -122 -163 -207 -244 -288 -336 -390 -436 -489 -529 -() -45 -96 -143 -187 -223 -267 -310 -360 -*** -1409 -() -41 -60 -82 -98 -115 -138 -155 -168 -180 -199 -() -39 -67 -123 -164 -206 -243 -287 -337 -392 -437 -488 -534 -571 -604 -630 -() -36 -43 -*** -1170 -() -26 -*** -1107 -() -24 -40 -*** -817 -() -20 -46 -97 -144 -186 -224 -266 -311 -365 -412 -463 -507 -542 -567 -593 -614 -631 -() -19 -33 -*** -1185 -*** -1694 -() -18 -44 -94 -146 -184 -226 -263 -314 -361 -415 -460 -509 -546 -573 -597 -618 -634 -644 -() -17 -31 -65 -102 -*** -807 -() -16 -34 -84 -133 -177 -213 -256 -298 -352 -400 -453 -496 -() -14 -37 -81 -135 -173 -217 -253 -300 -347 -403 -448 -499 -537 -563 -587 -611 -628 -641 -() -13 -22 -42 -*** -691 -() -12 -47 -93 -147 -183 -227 -264 -313 -362 -417 -462 -510 -544 -581 -612 -636 -() -11 -29 -*** -760 -() -10 -30 -63 -*** -1121 -() -9 -35 -83 -132 -176 -214 -257 -297 -353 -399 -451 -497 -*** -1304 -() -8 -25 -64 -*** -828 -() -7 -23 -51 -89 -*** -1174 -*** -1300 -() -6 -28 -71 -120 -167 -203 -247 -284 -340 -387 -440 -486 -528 -565 -600 -626 -() -5 -*** -668 -() -4 -32 -77 -*** -1032 -() -3 -15 -38 -76 -*** -1314 -() -2 -27 -*** -1053 +205 () 1 -21 -58 -107 -156 -194 -237 -274 -322 *** -1151 +39 *** -1777 +95 diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/stdout b/tests/long/10.mcf/ref/sparc/linux/simple-timing/stdout index 272fc2ce1..51a3ec215 100644 --- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/stdout +++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/stdout @@ -4,19 +4,15 @@ by Andreas Loebel Copyright (c) 1998,1999 ZIB Berlin All Rights Reserved. -nodes : 1800 -active arcs : 8190 -simplex iterations : 6837 -flow value : 12860044181 -new implicit arcs : 300000 -active arcs : 308190 -simplex iterations : 11843 -flow value : 9360043604 -new implicit arcs : 22787 -active arcs : 330977 -simplex iterations : 11931 -flow value : 9360043512 -checksum : 798014 +nodes : 500 +active arcs : 1905 +simplex iterations : 1502 +flow value : 4990014995 +new implicit arcs : 23867 +active arcs : 25772 +simplex iterations : 2663 +flow value : 3080014995 +checksum : 68389 optimal M5 Simulator System @@ -25,9 +21,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled May 15 2007 13:02:31 -M5 started Tue May 15 15:05:32 2007 +M5 compiled Jun 21 2007 21:15:48 +M5 started Fri Jun 22 02:01:52 2007 M5 executing on zizzer.eecs.umich.edu command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-timing tests/run.py long/10.mcf/sparc/linux/simple-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 2669284585000 because target called exit() +Exiting @ tick 359340764000 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini index f2617931a..f112ef506 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini @@ -21,6 +21,7 @@ SQEntries=32 SSITSize=1024 activity=0 backComSize=5 +cachePorts=200 choiceCtrBits=2 choicePredictorSize=8192 clock=500 @@ -74,6 +75,15 @@ renameToFetchDelay=1 renameToIEWDelay=2 renameToROBDelay=1 renameWidth=8 +smtCommitPolicy=RoundRobin +smtFetchPolicy=SingleThread +smtIQPolicy=Partitioned +smtIQThreshold=100 +smtLSQPolicy=Partitioned +smtLSQThreshold=100 +smtNumFetchingThreads=1 +smtROBPolicy=Partitioned +smtROBThreshold=100 squashWidth=8 system=system trapLatency=13 @@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -261,6 +272,7 @@ opLat=3 [system.cpu.icache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt index e1bed0c51..2ac86dd84 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt @@ -1,39 +1,40 @@ ---------- Begin Simulation Statistics ---------- global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. -global.BPredUnit.BTBHits 524 # Number of BTB hits -global.BPredUnit.BTBLookups 1590 # Number of BTB lookups +global.BPredUnit.BTBHits 522 # Number of BTB hits +global.BPredUnit.BTBLookups 1584 # Number of BTB lookups global.BPredUnit.RASInCorrect 57 # Number of incorrect RAS predictions. global.BPredUnit.condIncorrect 422 # Number of conditional branches incorrect -global.BPredUnit.condPredicted 1093 # Number of conditional branches predicted -global.BPredUnit.lookups 1843 # Number of BP lookups +global.BPredUnit.condPredicted 1088 # Number of conditional branches predicted +global.BPredUnit.lookups 1837 # Number of BP lookups global.BPredUnit.usedRAS 241 # Number of times the RAS was used to get a target. -host_inst_rate 7145 # Simulator instruction rate (inst/s) -host_seconds 0.79 # Real time elapsed on the host -host_tick_rate 5828052 # Simulator tick rate (ticks/s) +host_inst_rate 39303 # Simulator instruction rate (inst/s) +host_mem_usage 153768 # Number of bytes of host memory used +host_seconds 0.14 # Real time elapsed on the host +host_tick_rate 32016268 # Simulator tick rate (ticks/s) memdepunit.memDep.conflictingLoads 17 # Number of conflicting loads. memdepunit.memDep.conflictingStores 127 # Number of conflicting stores. -memdepunit.memDep.insertedLoads 1876 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedStores 1144 # Number of stores inserted to the mem dependence unit. +memdepunit.memDep.insertedLoads 1874 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 1142 # Number of stores inserted to the mem dependence unit. sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5623 # Number of instructions simulated sim_seconds 0.000005 # Number of seconds simulated -sim_ticks 4588000 # Number of ticks simulated +sim_ticks 4589500 # Number of ticks simulated system.cpu.commit.COM:branches 862 # Number of branches committed system.cpu.commit.COM:bw_lim_events 104 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 8514 +system.cpu.commit.COM:committed_per_cycle.samples 8521 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 6195 7276.25% - 1 1158 1360.11% - 2 469 550.86% - 3 176 206.72% - 4 131 153.86% - 5 99 116.28% - 6 109 128.02% - 7 73 85.74% - 8 104 122.15% + 0 6200 7276.14% + 1 1160 1361.34% + 2 469 550.40% + 3 177 207.72% + 4 131 153.74% + 5 98 115.01% + 6 109 127.92% + 7 73 85.67% + 8 104 122.05% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -45,27 +46,27 @@ system.cpu.commit.COM:swp_count 0 # Nu system.cpu.commit.branchMispredicts 350 # The number of times a branch was mispredicted system.cpu.commit.commitCommittedInsts 5640 # The number of committed instructions system.cpu.commit.commitNonSpecStalls 17 # The number of times commit has been forced to stall to communicate backwards -system.cpu.commit.commitSquashedInsts 3588 # The number of squashed insts skipped by commit +system.cpu.commit.commitSquashedInsts 3571 # The number of squashed insts skipped by commit system.cpu.committedInsts 5623 # Number of Instructions Simulated system.cpu.committedInsts_total 5623 # Number of Instructions Simulated -system.cpu.cpi 1.635604 # CPI: Cycles Per Instruction -system.cpu.cpi_total 1.635604 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 1475 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 5928.571429 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 5385 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 1342 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 788500 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.090169 # miss rate for ReadReq accesses +system.cpu.cpi 1.636315 # CPI: Cycles Per Instruction +system.cpu.cpi_total 1.636315 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 1470 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 5932.330827 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 5380 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 1337 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 789000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.090476 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 133 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 33 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 538500 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.067797 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_miss_latency 538000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.068027 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 100 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 812 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 4501.457726 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 4504.373178 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 5116.438356 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 469 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 1544000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 1545000 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.422414 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 343 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_hits 270 # number of WriteReq MSHR hits @@ -74,37 +75,37 @@ system.cpu.dcache.WriteReq_mshr_miss_rate 0.089901 # m system.cpu.dcache.WriteReq_mshr_misses 73 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 10.468208 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 10.439306 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 2287 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 4900.210084 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 5271.676301 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1811 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 2332500 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.208133 # miss rate for demand accesses +system.cpu.dcache.demand_accesses 2282 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 4903.361345 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 5268.786127 # average overall mshr miss latency +system.cpu.dcache.demand_hits 1806 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 2334000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.208589 # miss rate for demand accesses system.cpu.dcache.demand_misses 476 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 303 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 912000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.075645 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_latency 911500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.075811 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 173 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 2287 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 4900.210084 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 5271.676301 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 2282 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 4903.361345 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 5268.786127 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1811 # number of overall hits -system.cpu.dcache.overall_miss_latency 2332500 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.208133 # miss rate for overall accesses +system.cpu.dcache.overall_hits 1806 # number of overall hits +system.cpu.dcache.overall_miss_latency 2334000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.208589 # miss rate for overall accesses system.cpu.dcache.overall_misses 476 # number of overall misses system.cpu.dcache.overall_mshr_hits 303 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 912000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.075645 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_latency 911500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.075811 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 173 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -120,88 +121,88 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 173 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 112.670676 # Cycle average of tags in use -system.cpu.dcache.total_refs 1811 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 112.669258 # Cycle average of tags in use +system.cpu.dcache.total_refs 1806 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.decode.DECODE:BlockedCycles 389 # Number of cycles decode is blocked system.cpu.decode.DECODE:BranchMispred 75 # Number of times decode detected a branch misprediction -system.cpu.decode.DECODE:BranchResolved 144 # Number of times decode resolved a branch -system.cpu.decode.DECODE:DecodedInsts 10499 # Number of instructions handled by decode +system.cpu.decode.DECODE:BranchResolved 143 # Number of times decode resolved a branch +system.cpu.decode.DECODE:DecodedInsts 10466 # Number of instructions handled by decode system.cpu.decode.DECODE:IdleCycles 6230 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 1848 # Number of cycles decode is running -system.cpu.decode.DECODE:SquashCycles 682 # Number of cycles decode is squashing +system.cpu.decode.DECODE:RunCycles 1855 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 679 # Number of cycles decode is squashing system.cpu.decode.DECODE:SquashedInsts 228 # Number of squashed instructions handled by decode system.cpu.decode.DECODE:UnblockCycles 48 # Number of cycles decode is unblocking -system.cpu.fetch.Branches 1843 # Number of branches that fetch encountered -system.cpu.fetch.CacheLines 1471 # Number of cache lines fetched -system.cpu.fetch.Cycles 3451 # Number of cycles fetch has run and was not squashing or blocked -system.cpu.fetch.IcacheSquashes 269 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 11450 # Number of instructions fetch has processed +system.cpu.fetch.Branches 1837 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 1469 # Number of cache lines fetched +system.cpu.fetch.Cycles 3456 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 267 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 11417 # Number of instructions fetch has processed system.cpu.fetch.SquashCycles 455 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.200391 # Number of branch fetches per cycle -system.cpu.fetch.icacheStallCycles 1471 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.predictedBranches 765 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 1.244971 # Number of inst fetches per cycle +system.cpu.fetch.branchRate 0.199652 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 1469 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 763 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 1.240843 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 9197 +system.cpu.fetch.rateDist.samples 9201 system.cpu.fetch.rateDist.min_value 0 - 0 7219 7849.30% - 1 167 181.58% - 2 147 159.83% - 3 129 140.26% - 4 200 217.46% - 5 139 151.14% - 6 181 196.80% - 7 99 107.64% - 8 916 995.98% + 0 7216 7842.63% + 1 168 182.59% + 2 148 160.85% + 3 136 147.81% + 4 214 232.58% + 5 138 149.98% + 6 177 192.37% + 7 95 103.25% + 8 909 987.94% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist -system.cpu.icache.ReadReq_accesses 1471 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 5375.757576 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 4524.038462 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 1141 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 1774000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.224337 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_accesses 1469 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 5381.818182 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 4530.448718 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 1139 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1776000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.224643 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 330 # number of ReadReq misses system.cpu.icache.ReadReq_mshr_hits 18 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 1411500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.212101 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_miss_latency 1413500 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.212389 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 312 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.icache.avg_refs 3.657051 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 3.650641 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 1471 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 5375.757576 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 4524.038462 # average overall mshr miss latency -system.cpu.icache.demand_hits 1141 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 1774000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.224337 # miss rate for demand accesses +system.cpu.icache.demand_accesses 1469 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 5381.818182 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 4530.448718 # average overall mshr miss latency +system.cpu.icache.demand_hits 1139 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1776000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.224643 # miss rate for demand accesses system.cpu.icache.demand_misses 330 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 18 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 1411500 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.212101 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_miss_latency 1413500 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.212389 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 312 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 1471 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 5375.757576 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 4524.038462 # average overall mshr miss latency +system.cpu.icache.overall_accesses 1469 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 5381.818182 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 4530.448718 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 1141 # number of overall hits -system.cpu.icache.overall_miss_latency 1774000 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.224337 # miss rate for overall accesses +system.cpu.icache.overall_hits 1139 # number of overall hits +system.cpu.icache.overall_miss_latency 1776000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.224643 # miss rate for overall accesses system.cpu.icache.overall_misses 330 # number of overall misses system.cpu.icache.overall_mshr_hits 18 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 1411500 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.212101 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_miss_latency 1413500 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.212389 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 312 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -217,39 +218,39 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 312 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 165.938349 # Cycle average of tags in use -system.cpu.icache.total_refs 1141 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 165.921810 # Cycle average of tags in use +system.cpu.icache.total_refs 1139 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 2475 # Total number of cycles that the CPU has spent unscheduled due to idling -system.cpu.iew.EXEC:branches 1148 # Number of branches executed +system.cpu.idleCycles 2474 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 1144 # Number of branches executed system.cpu.iew.EXEC:nop 40 # number of nop insts executed -system.cpu.iew.EXEC:rate 0.837338 # Inst execution rate -system.cpu.iew.EXEC:refs 2524 # number of memory reference insts executed +system.cpu.iew.EXEC:rate 0.835018 # Inst execution rate +system.cpu.iew.EXEC:refs 2519 # number of memory reference insts executed system.cpu.iew.EXEC:stores 977 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed -system.cpu.iew.WB:consumers 5205 # num instructions consuming a value -system.cpu.iew.WB:count 7402 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 0.742747 # average fanout of values written-back +system.cpu.iew.WB:consumers 5193 # num instructions consuming a value +system.cpu.iew.WB:count 7387 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.742923 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 3866 # num instructions producing a value -system.cpu.iew.WB:rate 0.804828 # insts written-back per cycle -system.cpu.iew.WB:sent 7467 # cumulative count of insts sent to commit -system.cpu.iew.branchMispredicts 374 # Number of branch mispredicts detected at execute +system.cpu.iew.WB:producers 3858 # num instructions producing a value +system.cpu.iew.WB:rate 0.802848 # insts written-back per cycle +system.cpu.iew.WB:sent 7452 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 373 # Number of branch mispredicts detected at execute system.cpu.iew.iewBlockCycles 4 # Number of cycles IEW is blocking -system.cpu.iew.iewDispLoadInsts 1876 # Number of dispatched load instructions +system.cpu.iew.iewDispLoadInsts 1874 # Number of dispatched load instructions system.cpu.iew.iewDispNonSpecInsts 22 # Number of dispatched non-speculative instructions -system.cpu.iew.iewDispSquashedInsts 315 # Number of squashed instructions skipped by dispatch -system.cpu.iew.iewDispStoreInsts 1144 # Number of dispatched store instructions -system.cpu.iew.iewDispatchedInsts 9245 # Number of instructions dispatched to IQ -system.cpu.iew.iewExecLoadInsts 1547 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 280 # Number of squashed instructions skipped in execute -system.cpu.iew.iewExecutedInsts 7701 # Number of executed instructions +system.cpu.iew.iewDispSquashedInsts 302 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 1142 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 9228 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 1542 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 285 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 7683 # Number of executed instructions system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall -system.cpu.iew.iewSquashCycles 682 # Number of cycles IEW is squashing +system.cpu.iew.iewSquashCycles 679 # Number of cycles IEW is squashing system.cpu.iew.iewUnblockCycles 0 # Number of cycles IEW is unblocking system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked @@ -259,17 +260,17 @@ system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Nu system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address system.cpu.iew.lsq.thread.0.memOrderViolation 63 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.0.squashedLoads 897 # Number of loads squashed -system.cpu.iew.lsq.thread.0.squashedStores 332 # Number of stores squashed +system.cpu.iew.lsq.thread.0.squashedLoads 895 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 330 # Number of stores squashed system.cpu.iew.memOrderViolationEvents 63 # Number of memory order violations -system.cpu.iew.predictedNotTakenIncorrect 263 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedNotTakenIncorrect 262 # Number of branches that were predicted not taken incorrectly system.cpu.iew.predictedTakenIncorrect 111 # Number of branches that were predicted taken incorrectly -system.cpu.ipc 0.611395 # IPC: Instructions Per Cycle -system.cpu.ipc_total 0.611395 # IPC: Total IPC of All Threads -system.cpu.iq.ISSUE:FU_type_0 7981 # Type of FU issued +system.cpu.ipc 0.611129 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.611129 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 7968 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist No_OpClass 2 0.03% # Type of FU issued - IntAlu 5322 66.68% # Type of FU issued + IntAlu 5314 66.69% # Type of FU issued IntMult 1 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 2 0.03% # Type of FU issued @@ -278,13 +279,13 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 1662 20.82% # Type of FU issued - MemWrite 992 12.43% # Type of FU issued + MemRead 1659 20.82% # Type of FU issued + MemWrite 990 12.42% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.end_dist -system.cpu.iq.ISSUE:fu_busy_cnt 106 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.013282 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_cnt 105 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.013178 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full.start_dist No_OpClass 0 0.00% # attempts to use FU when none available IntAlu 0 0.00% # attempts to use FU when none available @@ -296,41 +297,41 @@ system.cpu.iq.ISSUE:fu_full.start_dist FloatMult 0 0.00% # attempts to use FU when none available FloatDiv 0 0.00% # attempts to use FU when none available FloatSqrt 0 0.00% # attempts to use FU when none available - MemRead 71 66.98% # attempts to use FU when none available - MemWrite 35 33.02% # attempts to use FU when none available + MemRead 70 66.67% # attempts to use FU when none available + MemWrite 35 33.33% # attempts to use FU when none available IprAccess 0 0.00% # attempts to use FU when none available InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 9197 +system.cpu.iq.ISSUE:issued_per_cycle.samples 9201 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 5952 6471.68% - 1 1107 1203.65% - 2 919 999.24% - 3 442 480.59% - 4 375 407.74% - 5 250 271.83% - 6 115 125.04% - 7 26 28.27% - 8 11 11.96% + 0 5952 6468.86% + 1 1111 1207.48% + 2 928 1008.59% + 3 433 470.60% + 4 378 410.82% + 5 251 272.80% + 6 111 120.64% + 7 27 29.34% + 8 10 10.87% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 0.867783 # Inst issue rate -system.cpu.iq.iqInstsAdded 9183 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 7981 # Number of instructions issued +system.cpu.iq.ISSUE:rate 0.865993 # Inst issue rate +system.cpu.iq.iqInstsAdded 9166 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 7968 # Number of instructions issued system.cpu.iq.iqNonSpecInstsAdded 22 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 3171 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsExamined 3154 # Number of squashed instructions iterated over during squash; mainly for profiling system.cpu.iq.iqSquashedInstsIssued 22 # Number of squashed instructions issued system.cpu.iq.iqSquashedNonSpecRemoved 5 # Number of squashed non-spec instructions that were removed -system.cpu.iq.iqSquashedOperandsExamined 2045 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.iq.iqSquashedOperandsExamined 2035 # Number of squashed operands that are examined and possibly removed from graph system.cpu.l2cache.ReadReq_accesses 483 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 4639.751553 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2463.768116 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 2241000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_avg_miss_latency 4644.927536 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2467.908903 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_miss_latency 2243500 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 483 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 1190000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 1192000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 483 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked @@ -342,29 +343,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs 0 # system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 483 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 4639.751553 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 2463.768116 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 4644.927536 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 2467.908903 # average overall mshr miss latency system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 2241000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 2243500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses system.cpu.l2cache.demand_misses 483 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 1190000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 1192000 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 483 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 483 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 4639.751553 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 2463.768116 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 4644.927536 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 2467.908903 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 2241000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 2243500 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses system.cpu.l2cache.overall_misses 483 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 1190000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 1192000 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 483 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -381,27 +382,27 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 483 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 278.222582 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 278.204751 # Cycle average of tags in use system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 9197 # number of cpu cycles simulated +system.cpu.numCycles 9201 # number of cpu cycles simulated system.cpu.rename.RENAME:BlockCycles 15 # Number of cycles rename is blocking system.cpu.rename.RENAME:CommittedMaps 4051 # Number of HB maps that are committed -system.cpu.rename.RENAME:IdleCycles 6383 # Number of cycles rename is idle +system.cpu.rename.RENAME:IdleCycles 6382 # Number of cycles rename is idle system.cpu.rename.RENAME:LSQFullEvents 70 # Number of times rename has blocked due to LSQ full -system.cpu.rename.RENAME:RenameLookups 12854 # Number of register rename lookups that rename has made -system.cpu.rename.RENAME:RenamedInsts 10031 # Number of instructions processed by rename -system.cpu.rename.RENAME:RenamedOperands 7485 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 1746 # Number of cycles rename is running -system.cpu.rename.RENAME:SquashCycles 682 # Number of cycles rename is squashing +system.cpu.rename.RENAME:RenameLookups 12837 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 10018 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 7477 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 1754 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 679 # Number of cycles rename is squashing system.cpu.rename.RENAME:UnblockCycles 101 # Number of cycles rename is unblocking -system.cpu.rename.RENAME:UndoneMaps 3434 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:UndoneMaps 3426 # Number of HB maps that are undone due to squashing system.cpu.rename.RENAME:serializeStallCycles 270 # count of cycles rename stalled for serializing inst system.cpu.rename.RENAME:serializingInsts 26 # count of serializing insts renamed system.cpu.rename.RENAME:skidInsts 380 # count of insts added to the skid buffer system.cpu.rename.RENAME:tempSerializingInsts 20 # count of temporary serializing insts renamed -system.cpu.timesIdled 25 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.timesIdled 26 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload.PROG:num_syscalls 17 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout index d935401d2..142cb9695 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout @@ -6,9 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:06:20 -M5 started Sun Jun 10 14:22:32 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing +M5 compiled Jun 21 2007 21:25:27 +M5 started Fri Jun 22 00:04:38 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 4588000 because target called exit() +Exiting @ tick 4589500 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini index e3080f9e5..36a50c983 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini @@ -21,6 +21,7 @@ SQEntries=32 SSITSize=1024 activity=0 backComSize=5 +cachePorts=200 choiceCtrBits=2 choicePredictorSize=8192 clock=500 @@ -74,6 +75,15 @@ renameToFetchDelay=1 renameToIEWDelay=2 renameToROBDelay=1 renameWidth=8 +smtCommitPolicy=RoundRobin +smtFetchPolicy=SingleThread +smtIQPolicy=Partitioned +smtIQThreshold=100 +smtLSQPolicy=Partitioned +smtLSQThreshold=100 +smtNumFetchingThreads=1 +smtROBPolicy=Partitioned +smtROBThreshold=100 squashWidth=8 system=system trapLatency=13 @@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -261,6 +272,7 @@ opLat=3 [system.cpu.icache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt index 6dd4c291d..d400dcd22 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt @@ -8,9 +8,10 @@ global.BPredUnit.condIncorrect 208 # Nu global.BPredUnit.condPredicted 376 # Number of conditional branches predicted global.BPredUnit.lookups 738 # Number of BP lookups global.BPredUnit.usedRAS 140 # Number of times the RAS was used to get a target. -host_inst_rate 8881 # Simulator instruction rate (inst/s) -host_seconds 0.27 # Real time elapsed on the host -host_tick_rate 7632084 # Simulator tick rate (ticks/s) +host_inst_rate 39805 # Simulator instruction rate (inst/s) +host_mem_usage 153128 # Number of bytes of host memory used +host_seconds 0.06 # Real time elapsed on the host +host_tick_rate 34110715 # Simulator tick rate (ticks/s) memdepunit.memDep.conflictingLoads 8 # Number of conflicting loads. memdepunit.memDep.conflictingStores 7 # Number of conflicting stores. memdepunit.memDep.insertedLoads 608 # Number of loads inserted to the mem dependence unit. @@ -18,22 +19,22 @@ memdepunit.memDep.insertedStores 357 # Nu sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 2387 # Number of instructions simulated sim_seconds 0.000002 # Number of seconds simulated -sim_ticks 2053000 # Number of ticks simulated +sim_ticks 2055000 # Number of ticks simulated system.cpu.commit.COM:branches 396 # Number of branches committed system.cpu.commit.COM:bw_lim_events 41 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 3906 +system.cpu.commit.COM:committed_per_cycle.samples 3910 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 2949 7549.92% - 1 266 681.00% - 2 333 852.53% - 3 131 335.38% - 4 74 189.45% - 5 64 163.85% - 6 29 74.24% - 7 19 48.64% - 8 41 104.97% + 0 2950 7544.76% + 1 266 680.31% + 2 336 859.34% + 3 131 335.04% + 4 76 194.37% + 5 65 166.24% + 6 27 69.05% + 7 18 46.04% + 8 41 104.86% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -48,17 +49,17 @@ system.cpu.commit.commitNonSpecStalls 4 # Th system.cpu.commit.commitSquashedInsts 978 # The number of squashed insts skipped by commit system.cpu.committedInsts 2387 # Number of Instructions Simulated system.cpu.committedInsts_total 2387 # Number of Instructions Simulated -system.cpu.cpi 1.721408 # CPI: Cycles Per Instruction -system.cpu.cpi_total 1.721408 # CPI: Total CPI of All Threads +system.cpu.cpi 1.723083 # CPI: Cycles Per Instruction +system.cpu.cpi_total 1.723083 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 514 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 5456.521739 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 4737.288136 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_miss_latency 5391.304348 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 4669.491525 # average ReadReq mshr miss latency system.cpu.dcache.ReadReq_hits 445 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 376500 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency 372000 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.134241 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 69 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 10 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 279500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency 275500 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.114786 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 59 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 294 # number of WriteReq accesses(hits+misses) @@ -81,29 +82,29 @@ system.cpu.dcache.blocked_cycles_no_mshrs 0 # n system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 808 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 5564.285714 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 4821.428571 # average overall mshr miss latency +system.cpu.dcache.demand_avg_miss_latency 5532.142857 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 4773.809524 # average overall mshr miss latency system.cpu.dcache.demand_hits 668 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 779000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 774500 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.173267 # miss rate for demand accesses system.cpu.dcache.demand_misses 140 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 56 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 405000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 401000 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.103960 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 84 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.overall_accesses 808 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 5564.285714 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 4821.428571 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 5532.142857 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 4773.809524 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.dcache.overall_hits 668 # number of overall hits -system.cpu.dcache.overall_miss_latency 779000 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 774500 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.173267 # miss rate for overall accesses system.cpu.dcache.overall_misses 140 # number of overall misses system.cpu.dcache.overall_mshr_hits 56 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 405000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 401000 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.103960 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 84 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -120,7 +121,7 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 84 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 51.851940 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 51.873008 # Cycle average of tags in use system.cpu.dcache.total_refs 668 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks @@ -129,43 +130,43 @@ system.cpu.decode.DECODE:BranchMispred 81 # Nu system.cpu.decode.DECODE:BranchResolved 123 # Number of times decode resolved a branch system.cpu.decode.DECODE:DecodedInsts 4033 # Number of instructions handled by decode system.cpu.decode.DECODE:IdleCycles 3045 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 767 # Number of cycles decode is running +system.cpu.decode.DECODE:RunCycles 771 # Number of cycles decode is running system.cpu.decode.DECODE:SquashCycles 202 # Number of cycles decode is squashing system.cpu.decode.DECODE:SquashedInsts 298 # Number of squashed instructions handled by decode system.cpu.fetch.Branches 738 # Number of branches that fetch encountered system.cpu.fetch.CacheLines 654 # Number of cache lines fetched -system.cpu.fetch.Cycles 1440 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.Cycles 1444 # Number of cycles fetch has run and was not squashing or blocked system.cpu.fetch.IcacheSquashes 120 # Number of outstanding Icache misses that were squashed system.cpu.fetch.Insts 4685 # Number of instructions fetch has processed system.cpu.fetch.SquashCycles 218 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.179606 # Number of branch fetches per cycle +system.cpu.fetch.branchRate 0.179431 # Number of branch fetches per cycle system.cpu.fetch.icacheStallCycles 654 # Number of cycles fetch is stalled on an Icache miss system.cpu.fetch.predictedBranches 272 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 1.140180 # Number of inst fetches per cycle +system.cpu.fetch.rate 1.139071 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 4109 +system.cpu.fetch.rateDist.samples 4113 system.cpu.fetch.rateDist.min_value 0 - 0 3325 8091.99% - 1 32 77.88% - 2 74 180.09% - 3 53 128.99% - 4 99 240.93% - 5 49 119.25% - 6 38 92.48% - 7 35 85.18% - 8 404 983.21% + 0 3325 8084.12% + 1 32 77.80% + 2 80 194.51% + 3 50 121.57% + 4 99 240.70% + 5 52 126.43% + 6 39 94.82% + 7 35 85.10% + 8 401 974.96% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist system.cpu.icache.ReadReq_accesses 654 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 5296.019900 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 4553.763441 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_avg_miss_latency 5298.507463 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 4556.451613 # average ReadReq mshr miss latency system.cpu.icache.ReadReq_hits 453 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 1064500 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency 1065000 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.307339 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 201 # number of ReadReq misses system.cpu.icache.ReadReq_mshr_hits 15 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 847000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency 847500 # number of ReadReq MSHR miss cycles system.cpu.icache.ReadReq_mshr_miss_rate 0.284404 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 186 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked @@ -177,29 +178,29 @@ system.cpu.icache.blocked_cycles_no_mshrs 0 # n system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed system.cpu.icache.demand_accesses 654 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 5296.019900 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 4553.763441 # average overall mshr miss latency +system.cpu.icache.demand_avg_miss_latency 5298.507463 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 4556.451613 # average overall mshr miss latency system.cpu.icache.demand_hits 453 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 1064500 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency 1065000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.307339 # miss rate for demand accesses system.cpu.icache.demand_misses 201 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 15 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 847000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 847500 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate 0.284404 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 186 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.icache.overall_accesses 654 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 5296.019900 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 4553.763441 # average overall mshr miss latency +system.cpu.icache.overall_avg_miss_latency 5298.507463 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 4556.451613 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.icache.overall_hits 453 # number of overall hits -system.cpu.icache.overall_miss_latency 1064500 # number of overall miss cycles +system.cpu.icache.overall_miss_latency 1065000 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.307339 # miss rate for overall accesses system.cpu.icache.overall_misses 201 # number of overall misses system.cpu.icache.overall_mshr_hits 15 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 847000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 847500 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate 0.284404 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 186 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -216,14 +217,14 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 186 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 106.237740 # Cycle average of tags in use +system.cpu.icache.tagsinuse 106.293956 # Cycle average of tags in use system.cpu.icache.total_refs 453 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idleCycles 2992 # Total number of cycles that the CPU has spent unscheduled due to idling system.cpu.iew.EXEC:branches 501 # Number of branches executed system.cpu.iew.EXEC:nop 234 # number of nop insts executed -system.cpu.iew.EXEC:rate 0.727184 # Inst execution rate +system.cpu.iew.EXEC:rate 0.726477 # Inst execution rate system.cpu.iew.EXEC:refs 878 # number of memory reference insts executed system.cpu.iew.EXEC:stores 333 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed @@ -233,7 +234,7 @@ system.cpu.iew.WB:fanout 0.799637 # av system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ system.cpu.iew.WB:producers 1321 # num instructions producing a value -system.cpu.iew.WB:rate 0.709175 # insts written-back per cycle +system.cpu.iew.WB:rate 0.708485 # insts written-back per cycle system.cpu.iew.WB:sent 2931 # cumulative count of insts sent to commit system.cpu.iew.branchMispredicts 135 # Number of branch mispredicts detected at execute system.cpu.iew.iewBlockCycles 0 # Number of cycles IEW is blocking @@ -263,8 +264,8 @@ system.cpu.iew.lsq.thread.0.squashedStores 63 # system.cpu.iew.memOrderViolationEvents 10 # Number of memory order violations system.cpu.iew.predictedNotTakenIncorrect 98 # Number of branches that were predicted not taken incorrectly system.cpu.iew.predictedTakenIncorrect 37 # Number of branches that were predicted taken incorrectly -system.cpu.ipc 0.580920 # IPC: Instructions Per Cycle -system.cpu.ipc_total 0.580920 # IPC: Total IPC of All Threads +system.cpu.ipc 0.580355 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.580355 # IPC: Total IPC of All Threads system.cpu.iq.ISSUE:FU_type_0 3075 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist No_OpClass 0 0.00% # Type of FU issued @@ -301,21 +302,21 @@ system.cpu.iq.ISSUE:fu_full.start_dist InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 4109 +system.cpu.iq.ISSUE:issued_per_cycle.samples 4113 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 2849 6933.56% - 1 475 1156.00% - 2 270 657.09% - 3 217 528.11% - 4 159 386.96% - 5 86 209.30% - 6 34 82.75% - 7 13 31.64% - 8 6 14.60% + 0 2848 6924.39% + 1 479 1164.60% + 2 276 671.04% + 3 213 517.87% + 4 158 384.15% + 5 86 209.09% + 6 34 82.66% + 7 13 31.61% + 8 6 14.59% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 0.748357 # Inst issue rate +system.cpu.iq.ISSUE:rate 0.747629 # Inst issue rate system.cpu.iq.iqInstsAdded 3330 # Number of instructions added to the IQ (excludes non-spec) system.cpu.iq.iqInstsIssued 3075 # Number of instructions issued system.cpu.iq.iqNonSpecInstsAdded 7 # Number of non-speculative instructions added to the IQ @@ -323,9 +324,9 @@ system.cpu.iq.iqSquashedInstsExamined 790 # Nu system.cpu.iq.iqSquashedNonSpecRemoved 3 # Number of squashed non-spec instructions that were removed system.cpu.iq.iqSquashedOperandsExamined 409 # Number of squashed operands that are examined and possibly removed from graph system.cpu.l2cache.ReadReq_accesses 270 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 4522.222222 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 4509.259259 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2388.888889 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 1221000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 1217500 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 270 # number of ReadReq misses system.cpu.l2cache.ReadReq_mshr_miss_latency 645000 # number of ReadReq MSHR miss cycles @@ -340,10 +341,10 @@ system.cpu.l2cache.blocked_cycles_no_mshrs 0 # system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 270 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 4522.222222 # average overall miss latency +system.cpu.l2cache.demand_avg_miss_latency 4509.259259 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 2388.888889 # average overall mshr miss latency system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 1221000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 1217500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses system.cpu.l2cache.demand_misses 270 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits @@ -354,11 +355,11 @@ system.cpu.l2cache.fast_writes 0 # nu system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 270 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 4522.222222 # average overall miss latency +system.cpu.l2cache.overall_avg_miss_latency 4509.259259 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 2388.888889 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 1221000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 1217500 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses system.cpu.l2cache.overall_misses 270 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits @@ -379,18 +380,18 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 270 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 158.236294 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 158.313436 # Cycle average of tags in use system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 4109 # number of cpu cycles simulated +system.cpu.numCycles 4113 # number of cpu cycles simulated system.cpu.rename.RENAME:CommittedMaps 1768 # Number of HB maps that are committed system.cpu.rename.RENAME:IdleCycles 3116 # Number of cycles rename is idle system.cpu.rename.RENAME:LSQFullEvents 1 # Number of times rename has blocked due to LSQ full system.cpu.rename.RENAME:RenameLookups 4416 # Number of register rename lookups that rename has made system.cpu.rename.RENAME:RenamedInsts 3886 # Number of instructions processed by rename system.cpu.rename.RENAME:RenamedOperands 2777 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 696 # Number of cycles rename is running +system.cpu.rename.RENAME:RunCycles 700 # Number of cycles rename is running system.cpu.rename.RENAME:SquashCycles 202 # Number of cycles rename is squashing system.cpu.rename.RENAME:UnblockCycles 6 # Number of cycles rename is unblocking system.cpu.rename.RENAME:UndoneMaps 1009 # Number of HB maps that are undone due to squashing diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout index 60520dc0c..c276fcaea 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout @@ -6,9 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:06:20 -M5 started Sun Jun 10 14:22:36 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing +M5 compiled Jun 21 2007 21:25:27 +M5 started Fri Jun 22 00:04:44 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 2053000 because target called exit() +Exiting @ tick 2055000 because target called exit() diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini index e9dddb505..f03824f95 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini @@ -21,6 +21,7 @@ SQEntries=32 SSITSize=1024 activity=0 backComSize=5 +cachePorts=200 choiceCtrBits=2 choicePredictorSize=8192 clock=500 @@ -74,6 +75,15 @@ renameToFetchDelay=1 renameToIEWDelay=2 renameToROBDelay=1 renameWidth=8 +smtCommitPolicy=RoundRobin +smtFetchPolicy=SingleThread +smtIQPolicy=Partitioned +smtIQThreshold=100 +smtLSQPolicy=Partitioned +smtLSQThreshold=100 +smtNumFetchingThreads=1 +smtROBPolicy=Partitioned +smtROBThreshold=100 squashWidth=8 system=system trapLatency=13 @@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -261,6 +272,7 @@ opLat=3 [system.cpu.icache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt index dc1fcc248..39a686d6b 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt @@ -1,47 +1,48 @@ ---------- Begin Simulation Statistics ---------- global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. -global.BPredUnit.BTBHits 674 # Number of BTB hits -global.BPredUnit.BTBLookups 3410 # Number of BTB lookups -global.BPredUnit.RASInCorrect 118 # Number of incorrect RAS predictions. -global.BPredUnit.condIncorrect 1115 # Number of conditional branches incorrect -global.BPredUnit.condPredicted 2318 # Number of conditional branches predicted -global.BPredUnit.lookups 3964 # Number of BP lookups -global.BPredUnit.usedRAS 532 # Number of times the RAS was used to get a target. -host_inst_rate 8215 # Simulator instruction rate (inst/s) -host_seconds 1.37 # Real time elapsed on the host -host_tick_rate 4009351 # Simulator tick rate (ticks/s) -memdepunit.memDep.conflictingLoads 19 # Number of conflicting loads. -memdepunit.memDep.conflictingLoads 18 # Number of conflicting loads. -memdepunit.memDep.conflictingStores 54 # Number of conflicting stores. +global.BPredUnit.BTBHits 696 # Number of BTB hits +global.BPredUnit.BTBLookups 3414 # Number of BTB lookups +global.BPredUnit.RASInCorrect 125 # Number of incorrect RAS predictions. +global.BPredUnit.condIncorrect 1124 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 2315 # Number of conditional branches predicted +global.BPredUnit.lookups 3940 # Number of BP lookups +global.BPredUnit.usedRAS 525 # Number of times the RAS was used to get a target. +host_inst_rate 52706 # Simulator instruction rate (inst/s) +host_mem_usage 154396 # Number of bytes of host memory used +host_seconds 0.21 # Real time elapsed on the host +host_tick_rate 25698682 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 16 # Number of conflicting loads. +memdepunit.memDep.conflictingLoads 16 # Number of conflicting loads. +memdepunit.memDep.conflictingStores 53 # Number of conflicting stores. memdepunit.memDep.conflictingStores 59 # Number of conflicting stores. -memdepunit.memDep.insertedLoads 1925 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedLoads 1898 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedStores 1088 # Number of stores inserted to the mem dependence unit. +memdepunit.memDep.insertedLoads 1934 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedLoads 1903 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 1082 # Number of stores inserted to the mem dependence unit. memdepunit.memDep.insertedStores 1090 # Number of stores inserted to the mem dependence unit. sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 11247 # Number of instructions simulated sim_seconds 0.000005 # Number of seconds simulated -sim_ticks 5490000 # Number of ticks simulated +sim_ticks 5491500 # Number of ticks simulated system.cpu.commit.COM:branches 1724 # Number of branches committed system.cpu.commit.COM:branches_0 862 # Number of branches committed system.cpu.commit.COM:branches_1 862 # Number of branches committed -system.cpu.commit.COM:bw_lim_events 165 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_lim_events 168 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:bw_limited_0 0 # number of insts not committed due to BW limits system.cpu.commit.COM:bw_limited_1 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 10929 +system.cpu.commit.COM:committed_per_cycle.samples 10926 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 6410 5865.13% - 1 2019 1847.38% - 2 999 914.08% - 3 454 415.41% - 4 300 274.50% - 5 246 225.09% - 6 200 183.00% - 7 136 124.44% - 8 165 150.97% + 0 6353 5814.57% + 1 2078 1901.89% + 2 996 911.59% + 3 472 432.00% + 4 296 270.91% + 5 241 220.57% + 6 192 175.73% + 7 130 118.98% + 8 168 153.76% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -60,133 +61,133 @@ system.cpu.commit.COM:refs_1 1791 # Nu system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed system.cpu.commit.COM:swp_count_0 0 # Number of s/w prefetches committed system.cpu.commit.COM:swp_count_1 0 # Number of s/w prefetches committed -system.cpu.commit.branchMispredicts 874 # The number of times a branch was mispredicted +system.cpu.commit.branchMispredicts 885 # The number of times a branch was mispredicted system.cpu.commit.commitCommittedInsts 11281 # The number of committed instructions system.cpu.commit.commitNonSpecStalls 34 # The number of times commit has been forced to stall to communicate backwards -system.cpu.commit.commitSquashedInsts 7769 # The number of squashed insts skipped by commit +system.cpu.commit.commitSquashedInsts 7777 # The number of squashed insts skipped by commit system.cpu.committedInsts_0 5623 # Number of Instructions Simulated system.cpu.committedInsts_1 5624 # Number of Instructions Simulated system.cpu.committedInsts_total 11247 # Number of Instructions Simulated -system.cpu.cpi_0 1.952516 # CPI: Cycles Per Instruction -system.cpu.cpi_1 1.952169 # CPI: Cycles Per Instruction -system.cpu.cpi_total 0.976171 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 2969 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_accesses_0 2969 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency_0 7072.992701 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 6972.361809 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 2695 # number of ReadReq hits -system.cpu.dcache.ReadReq_hits_0 2695 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 1938000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_latency_0 1938000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate_0 0.092287 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 274 # number of ReadReq misses -system.cpu.dcache.ReadReq_misses_0 274 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 75 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_hits_0 75 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 1387500 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_latency_0 1387500 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate_0 0.067026 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 199 # number of ReadReq MSHR misses -system.cpu.dcache.ReadReq_mshr_misses_0 199 # number of ReadReq MSHR misses +system.cpu.cpi_0 1.952872 # CPI: Cycles Per Instruction +system.cpu.cpi_1 1.952525 # CPI: Cycles Per Instruction +system.cpu.cpi_total 0.976349 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 2981 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_accesses_0 2981 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency_0 7040.892193 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 6979.591837 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 2712 # number of ReadReq hits +system.cpu.dcache.ReadReq_hits_0 2712 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 1894000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency_0 1894000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate_0 0.090238 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 269 # number of ReadReq misses +system.cpu.dcache.ReadReq_misses_0 269 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 73 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_hits_0 73 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 1368000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency_0 1368000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate_0 0.065750 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 196 # number of ReadReq MSHR misses +system.cpu.dcache.ReadReq_mshr_misses_0 196 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 1624 # number of WriteReq accesses(hits+misses) system.cpu.dcache.WriteReq_accesses_0 1624 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency_0 5352.409639 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0 5859.589041 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 1126 # number of WriteReq hits -system.cpu.dcache.WriteReq_hits_0 1126 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 2665500 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_latency_0 2665500 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate_0 0.306650 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 498 # number of WriteReq misses -system.cpu.dcache.WriteReq_misses_0 498 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_hits 352 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_hits_0 352 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_miss_latency 855500 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_latency_0 855500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_avg_miss_latency_0 5306.613226 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0 5852.739726 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 1125 # number of WriteReq hits +system.cpu.dcache.WriteReq_hits_0 1125 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 2648000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency_0 2648000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate_0 0.307266 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 499 # number of WriteReq misses +system.cpu.dcache.WriteReq_misses_0 499 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_hits 353 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_hits_0 353 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_miss_latency 854500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency_0 854500 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate_0 0.089901 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 146 # number of WriteReq MSHR misses system.cpu.dcache.WriteReq_mshr_misses_0 146 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 11.075362 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 11.219298 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 4593 # number of demand (read+write) accesses -system.cpu.dcache.demand_accesses_0 4593 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses 4605 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses_0 4605 # number of demand (read+write) accesses system.cpu.dcache.demand_accesses_1 0 # number of demand (read+write) accesses system.cpu.dcache.demand_avg_miss_latency # average overall miss latency -system.cpu.dcache.demand_avg_miss_latency_0 5963.082902 # average overall miss latency +system.cpu.dcache.demand_avg_miss_latency_0 5914.062500 # average overall miss latency system.cpu.dcache.demand_avg_miss_latency_1 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency_0 6501.449275 # average overall mshr miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency_0 6498.538012 # average overall mshr miss latency system.cpu.dcache.demand_avg_mshr_miss_latency_1 # average overall mshr miss latency -system.cpu.dcache.demand_hits 3821 # number of demand (read+write) hits -system.cpu.dcache.demand_hits_0 3821 # number of demand (read+write) hits +system.cpu.dcache.demand_hits 3837 # number of demand (read+write) hits +system.cpu.dcache.demand_hits_0 3837 # number of demand (read+write) hits system.cpu.dcache.demand_hits_1 0 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 4603500 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_latency_0 4603500 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 4542000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency_0 4542000 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate # miss rate for demand accesses -system.cpu.dcache.demand_miss_rate_0 0.168082 # miss rate for demand accesses +system.cpu.dcache.demand_miss_rate_0 0.166775 # miss rate for demand accesses system.cpu.dcache.demand_miss_rate_1 # miss rate for demand accesses -system.cpu.dcache.demand_misses 772 # number of demand (read+write) misses -system.cpu.dcache.demand_misses_0 772 # number of demand (read+write) misses +system.cpu.dcache.demand_misses 768 # number of demand (read+write) misses +system.cpu.dcache.demand_misses_0 768 # number of demand (read+write) misses system.cpu.dcache.demand_misses_1 0 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 427 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_hits_0 427 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_hits 426 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_hits_0 426 # number of demand (read+write) MSHR hits system.cpu.dcache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 2243000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_latency_0 2243000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 2222500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency_0 2222500 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_miss_rate_0 0.075114 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_rate_0 0.074267 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_miss_rate_1 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 345 # number of demand (read+write) MSHR misses -system.cpu.dcache.demand_mshr_misses_0 345 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_misses 342 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_misses_0 342 # number of demand (read+write) MSHR misses system.cpu.dcache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.mshr_cap_events_0 0 # number of times MSHR cap was activated system.cpu.dcache.mshr_cap_events_1 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 4593 # number of overall (read+write) accesses -system.cpu.dcache.overall_accesses_0 4593 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses 4605 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses_0 4605 # number of overall (read+write) accesses system.cpu.dcache.overall_accesses_1 0 # number of overall (read+write) accesses system.cpu.dcache.overall_avg_miss_latency # average overall miss latency -system.cpu.dcache.overall_avg_miss_latency_0 5963.082902 # average overall miss latency +system.cpu.dcache.overall_avg_miss_latency_0 5914.062500 # average overall miss latency system.cpu.dcache.overall_avg_miss_latency_1 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency_0 6501.449275 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency_0 6498.538012 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_miss_latency_1 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency_0 # average overall mshr uncacheable latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency_1 # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 3821 # number of overall hits -system.cpu.dcache.overall_hits_0 3821 # number of overall hits +system.cpu.dcache.overall_hits 3837 # number of overall hits +system.cpu.dcache.overall_hits_0 3837 # number of overall hits system.cpu.dcache.overall_hits_1 0 # number of overall hits -system.cpu.dcache.overall_miss_latency 4603500 # number of overall miss cycles -system.cpu.dcache.overall_miss_latency_0 4603500 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 4542000 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency_0 4542000 # number of overall miss cycles system.cpu.dcache.overall_miss_latency_1 0 # number of overall miss cycles system.cpu.dcache.overall_miss_rate # miss rate for overall accesses -system.cpu.dcache.overall_miss_rate_0 0.168082 # miss rate for overall accesses +system.cpu.dcache.overall_miss_rate_0 0.166775 # miss rate for overall accesses system.cpu.dcache.overall_miss_rate_1 # miss rate for overall accesses -system.cpu.dcache.overall_misses 772 # number of overall misses -system.cpu.dcache.overall_misses_0 772 # number of overall misses +system.cpu.dcache.overall_misses 768 # number of overall misses +system.cpu.dcache.overall_misses_0 768 # number of overall misses system.cpu.dcache.overall_misses_1 0 # number of overall misses -system.cpu.dcache.overall_mshr_hits 427 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_hits_0 427 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_hits 426 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_hits_0 426 # number of overall MSHR hits system.cpu.dcache.overall_mshr_hits_1 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 2243000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_latency_0 2243000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 2222500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency_0 2222500 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_miss_rate_0 0.075114 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_rate_0 0.074267 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_miss_rate_1 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 345 # number of overall MSHR misses -system.cpu.dcache.overall_mshr_misses_0 345 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_misses 342 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_misses_0 342 # number of overall MSHR misses system.cpu.dcache.overall_mshr_misses_1 0 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles @@ -206,149 +207,149 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.replacements_0 0 # number of replacements system.cpu.dcache.replacements_1 0 # number of replacements -system.cpu.dcache.sampled_refs 345 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 342 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 221.724795 # Cycle average of tags in use -system.cpu.dcache.total_refs 3821 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 221.287284 # Cycle average of tags in use +system.cpu.dcache.total_refs 3837 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.dcache.writebacks_0 0 # number of writebacks system.cpu.dcache.writebacks_1 0 # number of writebacks -system.cpu.decode.DECODE:BlockedCycles 1857 # Number of cycles decode is blocked -system.cpu.decode.DECODE:BranchMispred 251 # Number of times decode detected a branch misprediction -system.cpu.decode.DECODE:BranchResolved 346 # Number of times decode resolved a branch -system.cpu.decode.DECODE:DecodedInsts 21806 # Number of instructions handled by decode -system.cpu.decode.DECODE:IdleCycles 14535 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 3658 # Number of cycles decode is running -system.cpu.decode.DECODE:SquashCycles 1498 # Number of cycles decode is squashing -system.cpu.decode.DECODE:SquashedInsts 351 # Number of squashed instructions handled by decode +system.cpu.decode.DECODE:BlockedCycles 1876 # Number of cycles decode is blocked +system.cpu.decode.DECODE:BranchMispred 246 # Number of times decode detected a branch misprediction +system.cpu.decode.DECODE:BranchResolved 345 # Number of times decode resolved a branch +system.cpu.decode.DECODE:DecodedInsts 21769 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 14522 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 3673 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 1511 # Number of cycles decode is squashing +system.cpu.decode.DECODE:SquashedInsts 346 # Number of squashed instructions handled by decode system.cpu.decode.DECODE:UnblockCycles 145 # Number of cycles decode is unblocking -system.cpu.fetch.Branches 3964 # Number of branches that fetch encountered -system.cpu.fetch.CacheLines 2983 # Number of cache lines fetched -system.cpu.fetch.Cycles 6940 # Number of cycles fetch has run and was not squashing or blocked -system.cpu.fetch.IcacheSquashes 525 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 24033 # Number of instructions fetch has processed -system.cpu.fetch.SquashCycles 1178 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.361053 # Number of branch fetches per cycle -system.cpu.fetch.icacheStallCycles 2983 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.predictedBranches 1206 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 2.188997 # Number of inst fetches per cycle +system.cpu.fetch.Branches 3940 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 3009 # Number of cache lines fetched +system.cpu.fetch.Cycles 6972 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 537 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 23897 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 1189 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.358802 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 3009 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 1221 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 2.176213 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 10979 +system.cpu.fetch.rateDist.samples 10981 system.cpu.fetch.rateDist.min_value 0 - 0 7023 6396.76% - 1 285 259.59% - 2 224 204.03% - 3 248 225.89% - 4 335 305.13% - 5 281 255.94% - 6 301 274.16% - 7 251 228.62% - 8 2031 1849.90% + 0 7019 6391.95% + 1 293 266.82% + 2 225 204.90% + 3 260 236.77% + 4 345 314.18% + 5 288 262.27% + 6 304 276.84% + 7 246 224.02% + 8 2001 1822.24% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist -system.cpu.icache.ReadReq_accesses 2983 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_accesses_0 2983 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency_0 5910.313901 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency_0 5152.173913 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 2314 # number of ReadReq hits -system.cpu.icache.ReadReq_hits_0 2314 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 3954000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_latency_0 3954000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate_0 0.224271 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 669 # number of ReadReq misses -system.cpu.icache.ReadReq_misses_0 669 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 48 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_hits_0 48 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 3199500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_latency_0 3199500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate_0 0.208180 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 621 # number of ReadReq MSHR misses -system.cpu.icache.ReadReq_mshr_misses_0 621 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_accesses 3009 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_accesses_0 3009 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency_0 5911.144578 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency_0 5119.774920 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 2345 # number of ReadReq hits +system.cpu.icache.ReadReq_hits_0 2345 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 3925000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency_0 3925000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate_0 0.220671 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 664 # number of ReadReq misses +system.cpu.icache.ReadReq_misses_0 664 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 42 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_hits_0 42 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 3184500 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency_0 3184500 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate_0 0.206713 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 622 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_mshr_misses_0 622 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.icache.avg_refs 3.726248 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 3.770096 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 2983 # number of demand (read+write) accesses -system.cpu.icache.demand_accesses_0 2983 # number of demand (read+write) accesses +system.cpu.icache.demand_accesses 3009 # number of demand (read+write) accesses +system.cpu.icache.demand_accesses_0 3009 # number of demand (read+write) accesses system.cpu.icache.demand_accesses_1 0 # number of demand (read+write) accesses system.cpu.icache.demand_avg_miss_latency # average overall miss latency -system.cpu.icache.demand_avg_miss_latency_0 5910.313901 # average overall miss latency +system.cpu.icache.demand_avg_miss_latency_0 5911.144578 # average overall miss latency system.cpu.icache.demand_avg_miss_latency_1 # average overall miss latency system.cpu.icache.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.icache.demand_avg_mshr_miss_latency_0 5152.173913 # average overall mshr miss latency +system.cpu.icache.demand_avg_mshr_miss_latency_0 5119.774920 # average overall mshr miss latency system.cpu.icache.demand_avg_mshr_miss_latency_1 # average overall mshr miss latency -system.cpu.icache.demand_hits 2314 # number of demand (read+write) hits -system.cpu.icache.demand_hits_0 2314 # number of demand (read+write) hits +system.cpu.icache.demand_hits 2345 # number of demand (read+write) hits +system.cpu.icache.demand_hits_0 2345 # number of demand (read+write) hits system.cpu.icache.demand_hits_1 0 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 3954000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_latency_0 3954000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency 3925000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency_0 3925000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate # miss rate for demand accesses -system.cpu.icache.demand_miss_rate_0 0.224271 # miss rate for demand accesses +system.cpu.icache.demand_miss_rate_0 0.220671 # miss rate for demand accesses system.cpu.icache.demand_miss_rate_1 # miss rate for demand accesses -system.cpu.icache.demand_misses 669 # number of demand (read+write) misses -system.cpu.icache.demand_misses_0 669 # number of demand (read+write) misses +system.cpu.icache.demand_misses 664 # number of demand (read+write) misses +system.cpu.icache.demand_misses_0 664 # number of demand (read+write) misses system.cpu.icache.demand_misses_1 0 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 48 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_hits_0 48 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_hits 42 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_hits_0 42 # number of demand (read+write) MSHR hits system.cpu.icache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 3199500 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_latency_0 3199500 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 3184500 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency_0 3184500 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_miss_rate_0 0.208180 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_miss_rate_0 0.206713 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_miss_rate_1 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 621 # number of demand (read+write) MSHR misses -system.cpu.icache.demand_mshr_misses_0 621 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_misses 622 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_misses_0 622 # number of demand (read+write) MSHR misses system.cpu.icache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.mshr_cap_events_0 0 # number of times MSHR cap was activated system.cpu.icache.mshr_cap_events_1 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 2983 # number of overall (read+write) accesses -system.cpu.icache.overall_accesses_0 2983 # number of overall (read+write) accesses +system.cpu.icache.overall_accesses 3009 # number of overall (read+write) accesses +system.cpu.icache.overall_accesses_0 3009 # number of overall (read+write) accesses system.cpu.icache.overall_accesses_1 0 # number of overall (read+write) accesses system.cpu.icache.overall_avg_miss_latency # average overall miss latency -system.cpu.icache.overall_avg_miss_latency_0 5910.313901 # average overall miss latency +system.cpu.icache.overall_avg_miss_latency_0 5911.144578 # average overall miss latency system.cpu.icache.overall_avg_miss_latency_1 # average overall miss latency system.cpu.icache.overall_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.icache.overall_avg_mshr_miss_latency_0 5152.173913 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_miss_latency_0 5119.774920 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_miss_latency_1 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.icache.overall_avg_mshr_uncacheable_latency_0 # average overall mshr uncacheable latency system.cpu.icache.overall_avg_mshr_uncacheable_latency_1 # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 2314 # number of overall hits -system.cpu.icache.overall_hits_0 2314 # number of overall hits +system.cpu.icache.overall_hits 2345 # number of overall hits +system.cpu.icache.overall_hits_0 2345 # number of overall hits system.cpu.icache.overall_hits_1 0 # number of overall hits -system.cpu.icache.overall_miss_latency 3954000 # number of overall miss cycles -system.cpu.icache.overall_miss_latency_0 3954000 # number of overall miss cycles +system.cpu.icache.overall_miss_latency 3925000 # number of overall miss cycles +system.cpu.icache.overall_miss_latency_0 3925000 # number of overall miss cycles system.cpu.icache.overall_miss_latency_1 0 # number of overall miss cycles system.cpu.icache.overall_miss_rate # miss rate for overall accesses -system.cpu.icache.overall_miss_rate_0 0.224271 # miss rate for overall accesses +system.cpu.icache.overall_miss_rate_0 0.220671 # miss rate for overall accesses system.cpu.icache.overall_miss_rate_1 # miss rate for overall accesses -system.cpu.icache.overall_misses 669 # number of overall misses -system.cpu.icache.overall_misses_0 669 # number of overall misses +system.cpu.icache.overall_misses 664 # number of overall misses +system.cpu.icache.overall_misses_0 664 # number of overall misses system.cpu.icache.overall_misses_1 0 # number of overall misses -system.cpu.icache.overall_mshr_hits 48 # number of overall MSHR hits -system.cpu.icache.overall_mshr_hits_0 48 # number of overall MSHR hits +system.cpu.icache.overall_mshr_hits 42 # number of overall MSHR hits +system.cpu.icache.overall_mshr_hits_0 42 # number of overall MSHR hits system.cpu.icache.overall_mshr_hits_1 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 3199500 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_latency_0 3199500 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 3184500 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency_0 3184500 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_miss_rate_0 0.208180 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_miss_rate_0 0.206713 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_miss_rate_1 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 621 # number of overall MSHR misses -system.cpu.icache.overall_mshr_misses_0 621 # number of overall MSHR misses +system.cpu.icache.overall_mshr_misses 622 # number of overall MSHR misses +system.cpu.icache.overall_mshr_misses_0 622 # number of overall MSHR misses system.cpu.icache.overall_mshr_misses_1 0 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles @@ -368,104 +369,104 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 9 # number of replacements system.cpu.icache.replacements_0 9 # number of replacements system.cpu.icache.replacements_1 0 # number of replacements -system.cpu.icache.sampled_refs 621 # Sample count of references to valid blocks. +system.cpu.icache.sampled_refs 622 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.icache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions system.cpu.icache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 322.894952 # Cycle average of tags in use -system.cpu.icache.total_refs 2314 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 323.196356 # Cycle average of tags in use +system.cpu.icache.total_refs 2345 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.icache.writebacks_0 0 # number of writebacks system.cpu.icache.writebacks_1 0 # number of writebacks -system.cpu.idleCycles 1998 # Total number of cycles that the CPU has spent unscheduled due to idling -system.cpu.iew.EXEC:branches 2367 # Number of branches executed -system.cpu.iew.EXEC:branches_0 1185 # Number of branches executed -system.cpu.iew.EXEC:branches_1 1182 # Number of branches executed -system.cpu.iew.EXEC:nop 73 # number of nop insts executed +system.cpu.idleCycles 2997 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 2377 # Number of branches executed +system.cpu.iew.EXEC:branches_0 1192 # Number of branches executed +system.cpu.iew.EXEC:branches_1 1185 # Number of branches executed +system.cpu.iew.EXEC:nop 72 # number of nop insts executed system.cpu.iew.EXEC:nop_0 37 # number of nop insts executed -system.cpu.iew.EXEC:nop_1 36 # number of nop insts executed -system.cpu.iew.EXEC:rate 1.416158 # Inst execution rate -system.cpu.iew.EXEC:refs 4978 # number of memory reference insts executed -system.cpu.iew.EXEC:refs_0 2514 # number of memory reference insts executed -system.cpu.iew.EXEC:refs_1 2464 # number of memory reference insts executed -system.cpu.iew.EXEC:stores 1867 # Number of stores executed -system.cpu.iew.EXEC:stores_0 938 # Number of stores executed -system.cpu.iew.EXEC:stores_1 929 # Number of stores executed +system.cpu.iew.EXEC:nop_1 35 # number of nop insts executed +system.cpu.iew.EXEC:rate 1.419725 # Inst execution rate +system.cpu.iew.EXEC:refs 5002 # number of memory reference insts executed +system.cpu.iew.EXEC:refs_0 2507 # number of memory reference insts executed +system.cpu.iew.EXEC:refs_1 2495 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 1874 # Number of stores executed +system.cpu.iew.EXEC:stores_0 933 # Number of stores executed +system.cpu.iew.EXEC:stores_1 941 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed system.cpu.iew.EXEC:swp_0 0 # number of swp insts executed system.cpu.iew.EXEC:swp_1 0 # number of swp insts executed -system.cpu.iew.WB:consumers 10219 # num instructions consuming a value -system.cpu.iew.WB:consumers_0 5113 # num instructions consuming a value -system.cpu.iew.WB:consumers_1 5106 # num instructions consuming a value -system.cpu.iew.WB:count 14974 # cumulative count of insts written-back -system.cpu.iew.WB:count_0 7532 # cumulative count of insts written-back -system.cpu.iew.WB:count_1 7442 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 1.526960 # average fanout of values written-back -system.cpu.iew.WB:fanout_0 0.762957 # average fanout of values written-back -system.cpu.iew.WB:fanout_1 0.764003 # average fanout of values written-back +system.cpu.iew.WB:consumers 10260 # num instructions consuming a value +system.cpu.iew.WB:consumers_0 5135 # num instructions consuming a value +system.cpu.iew.WB:consumers_1 5125 # num instructions consuming a value +system.cpu.iew.WB:count 14994 # cumulative count of insts written-back +system.cpu.iew.WB:count_0 7526 # cumulative count of insts written-back +system.cpu.iew.WB:count_1 7468 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 1.530607 # average fanout of values written-back +system.cpu.iew.WB:fanout_0 0.763778 # average fanout of values written-back +system.cpu.iew.WB:fanout_1 0.766829 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_0 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_1 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ system.cpu.iew.WB:penalized_rate_0 0 # fraction of instructions written-back that wrote to 'other' IQ system.cpu.iew.WB:penalized_rate_1 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 7802 # num instructions producing a value -system.cpu.iew.WB:producers_0 3901 # num instructions producing a value -system.cpu.iew.WB:producers_1 3901 # num instructions producing a value -system.cpu.iew.WB:rate 1.363876 # insts written-back per cycle -system.cpu.iew.WB:rate_0 0.686037 # insts written-back per cycle -system.cpu.iew.WB:rate_1 0.677840 # insts written-back per cycle -system.cpu.iew.WB:sent 15105 # cumulative count of insts sent to commit -system.cpu.iew.WB:sent_0 7590 # cumulative count of insts sent to commit -system.cpu.iew.WB:sent_1 7515 # cumulative count of insts sent to commit -system.cpu.iew.branchMispredicts 941 # Number of branch mispredicts detected at execute -system.cpu.iew.iewBlockCycles 7 # Number of cycles IEW is blocking -system.cpu.iew.iewDispLoadInsts 3823 # Number of dispatched load instructions +system.cpu.iew.WB:producers 7852 # num instructions producing a value +system.cpu.iew.WB:producers_0 3922 # num instructions producing a value +system.cpu.iew.WB:producers_1 3930 # num instructions producing a value +system.cpu.iew.WB:rate 1.365449 # insts written-back per cycle +system.cpu.iew.WB:rate_0 0.685366 # insts written-back per cycle +system.cpu.iew.WB:rate_1 0.680084 # insts written-back per cycle +system.cpu.iew.WB:sent 15132 # cumulative count of insts sent to commit +system.cpu.iew.WB:sent_0 7582 # cumulative count of insts sent to commit +system.cpu.iew.WB:sent_1 7550 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 958 # Number of branch mispredicts detected at execute +system.cpu.iew.iewBlockCycles 6 # Number of cycles IEW is blocking +system.cpu.iew.iewDispLoadInsts 3837 # Number of dispatched load instructions system.cpu.iew.iewDispNonSpecInsts 42 # Number of dispatched non-speculative instructions -system.cpu.iew.iewDispSquashedInsts 501 # Number of squashed instructions skipped by dispatch -system.cpu.iew.iewDispStoreInsts 2178 # Number of dispatched store instructions -system.cpu.iew.iewDispatchedInsts 19078 # Number of instructions dispatched to IQ -system.cpu.iew.iewExecLoadInsts 3111 # Number of load instructions executed -system.cpu.iew.iewExecLoadInsts_0 1576 # Number of load instructions executed -system.cpu.iew.iewExecLoadInsts_1 1535 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 864 # Number of squashed instructions skipped in execute -system.cpu.iew.iewExecutedInsts 15548 # Number of executed instructions +system.cpu.iew.iewDispSquashedInsts 445 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 2172 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 19086 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 3128 # Number of load instructions executed +system.cpu.iew.iewExecLoadInsts_0 1574 # Number of load instructions executed +system.cpu.iew.iewExecLoadInsts_1 1554 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 852 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 15590 # Number of executed instructions system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall -system.cpu.iew.iewSquashCycles 1498 # Number of cycles IEW is squashing +system.cpu.iew.iewSquashCycles 1511 # Number of cycles IEW is squashing system.cpu.iew.iewUnblockCycles 0 # Number of cycles IEW is unblocking system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked -system.cpu.iew.lsq.thread.0.forwLoads 42 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.0.forwLoads 43 # Number of loads that had data forwarded from stores system.cpu.iew.lsq.thread.0.ignoredResponses 4 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address -system.cpu.iew.lsq.thread.0.memOrderViolation 63 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.memOrderViolation 64 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.0.squashedLoads 946 # Number of loads squashed -system.cpu.iew.lsq.thread.0.squashedStores 276 # Number of stores squashed +system.cpu.iew.lsq.thread.0.squashedLoads 955 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 270 # Number of stores squashed system.cpu.iew.lsq.thread.1.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.1.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked -system.cpu.iew.lsq.thread.1.forwLoads 38 # Number of loads that had data forwarded from stores -system.cpu.iew.lsq.thread.1.ignoredResponses 0 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread.1.forwLoads 42 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.1.ignoredResponses 2 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.1.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.1.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address -system.cpu.iew.lsq.thread.1.memOrderViolation 54 # Number of memory ordering violations +system.cpu.iew.lsq.thread.1.memOrderViolation 58 # Number of memory ordering violations system.cpu.iew.lsq.thread.1.rescheduledLoads 1 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.1.squashedLoads 919 # Number of loads squashed +system.cpu.iew.lsq.thread.1.squashedLoads 924 # Number of loads squashed system.cpu.iew.lsq.thread.1.squashedStores 278 # Number of stores squashed -system.cpu.iew.memOrderViolationEvents 117 # Number of memory order violations -system.cpu.iew.predictedNotTakenIncorrect 761 # Number of branches that were predicted not taken incorrectly -system.cpu.iew.predictedTakenIncorrect 180 # Number of branches that were predicted taken incorrectly -system.cpu.ipc_0 0.512160 # IPC: Instructions Per Cycle -system.cpu.ipc_1 0.512251 # IPC: Instructions Per Cycle -system.cpu.ipc_total 1.024410 # IPC: Total IPC of All Threads -system.cpu.iq.ISSUE:FU_type_0 8232 # Type of FU issued +system.cpu.iew.memOrderViolationEvents 122 # Number of memory order violations +system.cpu.iew.predictedNotTakenIncorrect 767 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedTakenIncorrect 191 # Number of branches that were predicted taken incorrectly +system.cpu.ipc_0 0.512066 # IPC: Instructions Per Cycle +system.cpu.ipc_1 0.512157 # IPC: Instructions Per Cycle +system.cpu.ipc_total 1.024224 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 8235 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist No_OpClass 2 0.02% # Type of FU issued - IntAlu 5551 67.43% # Type of FU issued + IntAlu 5567 67.60% # Type of FU issued IntMult 1 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 2 0.02% # Type of FU issued @@ -474,15 +475,15 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 1704 20.70% # Type of FU issued - MemWrite 972 11.81% # Type of FU issued + MemRead 1702 20.67% # Type of FU issued + MemWrite 961 11.67% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.end_dist -system.cpu.iq.ISSUE:FU_type_1 8180 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_1 8207 # Type of FU issued system.cpu.iq.ISSUE:FU_type_1.start_dist No_OpClass 2 0.02% # Type of FU issued - IntAlu 5536 67.68% # Type of FU issued + IntAlu 5547 67.59% # Type of FU issued IntMult 1 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 2 0.02% # Type of FU issued @@ -491,15 +492,15 @@ system.cpu.iq.ISSUE:FU_type_1.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 1681 20.55% # Type of FU issued - MemWrite 958 11.71% # Type of FU issued + MemRead 1690 20.59% # Type of FU issued + MemWrite 965 11.76% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_1.end_dist -system.cpu.iq.ISSUE:FU_type 16412 # Type of FU issued +system.cpu.iq.ISSUE:FU_type 16442 # Type of FU issued system.cpu.iq.ISSUE:FU_type.start_dist No_OpClass 4 0.02% # Type of FU issued - IntAlu 11087 67.55% # Type of FU issued + IntAlu 11114 67.60% # Type of FU issued IntMult 2 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 4 0.02% # Type of FU issued @@ -508,20 +509,20 @@ system.cpu.iq.ISSUE:FU_type.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 3385 20.63% # Type of FU issued - MemWrite 1930 11.76% # Type of FU issued + MemRead 3392 20.63% # Type of FU issued + MemWrite 1926 11.71% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type.end_dist -system.cpu.iq.ISSUE:fu_busy_cnt 180 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_cnt_0 92 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_cnt_1 88 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.010968 # FU busy rate (busy events/executed inst) -system.cpu.iq.ISSUE:fu_busy_rate_0 0.005606 # FU busy rate (busy events/executed inst) -system.cpu.iq.ISSUE:fu_busy_rate_1 0.005362 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_cnt 189 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_cnt_0 98 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_cnt_1 91 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.011495 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate_0 0.005960 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate_1 0.005535 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full.start_dist No_OpClass 0 0.00% # attempts to use FU when none available - IntAlu 16 8.89% # attempts to use FU when none available + IntAlu 14 7.41% # attempts to use FU when none available IntMult 0 0.00% # attempts to use FU when none available IntDiv 0 0.00% # attempts to use FU when none available FloatAdd 0 0.00% # attempts to use FU when none available @@ -530,104 +531,104 @@ system.cpu.iq.ISSUE:fu_full.start_dist FloatMult 0 0.00% # attempts to use FU when none available FloatDiv 0 0.00% # attempts to use FU when none available FloatSqrt 0 0.00% # attempts to use FU when none available - MemRead 97 53.89% # attempts to use FU when none available - MemWrite 67 37.22% # attempts to use FU when none available + MemRead 107 56.61% # attempts to use FU when none available + MemWrite 68 35.98% # attempts to use FU when none available IprAccess 0 0.00% # attempts to use FU when none available InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 10979 +system.cpu.iq.ISSUE:issued_per_cycle.samples 10981 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 4788 4361.05% - 1 1816 1654.07% - 2 1657 1509.24% - 3 1039 946.35% - 4 774 704.98% - 5 501 456.33% - 6 289 263.23% - 7 90 81.97% - 8 25 22.77% + 0 4775 4348.42% + 1 1817 1654.68% + 2 1638 1491.67% + 3 1107 1008.10% + 4 745 678.44% + 5 490 446.23% + 6 287 261.36% + 7 100 91.07% + 8 22 20.03% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 1.494854 # Inst issue rate -system.cpu.iq.iqInstsAdded 18963 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 16412 # Number of instructions issued +system.cpu.iq.ISSUE:rate 1.497314 # Inst issue rate +system.cpu.iq.iqInstsAdded 18972 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 16442 # Number of instructions issued system.cpu.iq.iqNonSpecInstsAdded 42 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 6896 # Number of squashed instructions iterated over during squash; mainly for profiling -system.cpu.iq.iqSquashedInstsIssued 34 # Number of squashed instructions issued +system.cpu.iq.iqSquashedInstsExamined 6918 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 63 # Number of squashed instructions issued system.cpu.iq.iqSquashedNonSpecRemoved 8 # Number of squashed non-spec instructions that were removed -system.cpu.iq.iqSquashedOperandsExamined 4313 # Number of squashed operands that are examined and possibly removed from graph -system.cpu.l2cache.ReadReq_accesses 963 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_accesses_0 963 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency_0 5220.374220 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0 2725.051975 # average ReadReq mshr miss latency +system.cpu.iq.iqSquashedOperandsExamined 4274 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.l2cache.ReadReq_accesses 962 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_accesses_0 962 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency_0 5208.636837 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0 2724.765869 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 1 # number of ReadReq hits system.cpu.l2cache.ReadReq_hits_0 1 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 5022000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_latency_0 5022000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate_0 0.998962 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 962 # number of ReadReq misses -system.cpu.l2cache.ReadReq_misses_0 962 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 2621500 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_latency_0 2621500 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate_0 0.998962 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 962 # number of ReadReq MSHR misses -system.cpu.l2cache.ReadReq_mshr_misses_0 962 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_miss_latency 5005500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency_0 5005500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate_0 0.998960 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 961 # number of ReadReq misses +system.cpu.l2cache.ReadReq_misses_0 961 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 2618500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency_0 2618500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate_0 0.998960 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 961 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_mshr_misses_0 961 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0.001040 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.001041 # Average number of references to valid blocks. system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 963 # number of demand (read+write) accesses -system.cpu.l2cache.demand_accesses_0 963 # number of demand (read+write) accesses +system.cpu.l2cache.demand_accesses 962 # number of demand (read+write) accesses +system.cpu.l2cache.demand_accesses_0 962 # number of demand (read+write) accesses system.cpu.l2cache.demand_accesses_1 0 # number of demand (read+write) accesses system.cpu.l2cache.demand_avg_miss_latency # average overall miss latency -system.cpu.l2cache.demand_avg_miss_latency_0 5220.374220 # average overall miss latency +system.cpu.l2cache.demand_avg_miss_latency_0 5208.636837 # average overall miss latency system.cpu.l2cache.demand_avg_miss_latency_1 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency_0 2725.051975 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency_0 2724.765869 # average overall mshr miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency_1 # average overall mshr miss latency system.cpu.l2cache.demand_hits 1 # number of demand (read+write) hits system.cpu.l2cache.demand_hits_0 1 # number of demand (read+write) hits system.cpu.l2cache.demand_hits_1 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 5022000 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_latency_0 5022000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 5005500 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency_0 5005500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate # miss rate for demand accesses -system.cpu.l2cache.demand_miss_rate_0 0.998962 # miss rate for demand accesses +system.cpu.l2cache.demand_miss_rate_0 0.998960 # miss rate for demand accesses system.cpu.l2cache.demand_miss_rate_1 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 962 # number of demand (read+write) misses -system.cpu.l2cache.demand_misses_0 962 # number of demand (read+write) misses +system.cpu.l2cache.demand_misses 961 # number of demand (read+write) misses +system.cpu.l2cache.demand_misses_0 961 # number of demand (read+write) misses system.cpu.l2cache.demand_misses_1 0 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.cpu.l2cache.demand_mshr_hits_0 0 # number of demand (read+write) MSHR hits system.cpu.l2cache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 2621500 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_latency_0 2621500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 2618500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency_0 2618500 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_miss_rate_0 0.998962 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_miss_rate_0 0.998960 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_miss_rate_1 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 962 # number of demand (read+write) MSHR misses -system.cpu.l2cache.demand_mshr_misses_0 962 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_misses 961 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_misses_0 961 # number of demand (read+write) MSHR misses system.cpu.l2cache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.mshr_cap_events_0 0 # number of times MSHR cap was activated system.cpu.l2cache.mshr_cap_events_1 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 963 # number of overall (read+write) accesses -system.cpu.l2cache.overall_accesses_0 963 # number of overall (read+write) accesses +system.cpu.l2cache.overall_accesses 962 # number of overall (read+write) accesses +system.cpu.l2cache.overall_accesses_0 962 # number of overall (read+write) accesses system.cpu.l2cache.overall_accesses_1 0 # number of overall (read+write) accesses system.cpu.l2cache.overall_avg_miss_latency # average overall miss latency -system.cpu.l2cache.overall_avg_miss_latency_0 5220.374220 # average overall miss latency +system.cpu.l2cache.overall_avg_miss_latency_0 5208.636837 # average overall miss latency system.cpu.l2cache.overall_avg_miss_latency_1 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency_0 2725.051975 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency_0 2724.765869 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency_1 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_0 # average overall mshr uncacheable latency @@ -635,26 +636,26 @@ system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_1 system.cpu.l2cache.overall_hits 1 # number of overall hits system.cpu.l2cache.overall_hits_0 1 # number of overall hits system.cpu.l2cache.overall_hits_1 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 5022000 # number of overall miss cycles -system.cpu.l2cache.overall_miss_latency_0 5022000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 5005500 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency_0 5005500 # number of overall miss cycles system.cpu.l2cache.overall_miss_latency_1 0 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate # miss rate for overall accesses -system.cpu.l2cache.overall_miss_rate_0 0.998962 # miss rate for overall accesses +system.cpu.l2cache.overall_miss_rate_0 0.998960 # miss rate for overall accesses system.cpu.l2cache.overall_miss_rate_1 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 962 # number of overall misses -system.cpu.l2cache.overall_misses_0 962 # number of overall misses +system.cpu.l2cache.overall_misses 961 # number of overall misses +system.cpu.l2cache.overall_misses_0 961 # number of overall misses system.cpu.l2cache.overall_misses_1 0 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits system.cpu.l2cache.overall_mshr_hits_0 0 # number of overall MSHR hits system.cpu.l2cache.overall_mshr_hits_1 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 2621500 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_latency_0 2621500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 2618500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency_0 2618500 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_miss_rate_0 0.998962 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_miss_rate_0 0.998960 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_miss_rate_1 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 962 # number of overall MSHR misses -system.cpu.l2cache.overall_mshr_misses_0 962 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_misses 961 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_misses_0 961 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_misses_1 0 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles @@ -674,33 +675,33 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.replacements_0 0 # number of replacements system.cpu.l2cache.replacements_1 0 # number of replacements -system.cpu.l2cache.sampled_refs 962 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 961 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.l2cache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions system.cpu.l2cache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 545.133409 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 545.318204 # Cycle average of tags in use system.cpu.l2cache.total_refs 1 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.l2cache.writebacks_0 0 # number of writebacks system.cpu.l2cache.writebacks_1 0 # number of writebacks -system.cpu.numCycles 10979 # number of cpu cycles simulated -system.cpu.rename.RENAME:BlockCycles 614 # Number of cycles rename is blocking +system.cpu.numCycles 10981 # number of cpu cycles simulated +system.cpu.rename.RENAME:BlockCycles 612 # Number of cycles rename is blocking system.cpu.rename.RENAME:CommittedMaps 8102 # Number of HB maps that are committed -system.cpu.rename.RENAME:IdleCycles 14840 # Number of cycles rename is idle -system.cpu.rename.RENAME:LSQFullEvents 684 # Number of times rename has blocked due to LSQ full -system.cpu.rename.RENAME:RenameLookups 26359 # Number of register rename lookups that rename has made -system.cpu.rename.RENAME:RenamedInsts 20748 # Number of instructions processed by rename -system.cpu.rename.RENAME:RenamedOperands 15612 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 3480 # Number of cycles rename is running -system.cpu.rename.RENAME:SquashCycles 1498 # Number of cycles rename is squashing -system.cpu.rename.RENAME:UnblockCycles 744 # Number of cycles rename is unblocking -system.cpu.rename.RENAME:UndoneMaps 7510 # Number of HB maps that are undone due to squashing -system.cpu.rename.RENAME:serializeStallCycles 517 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:IdleCycles 14828 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 692 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:RenameLookups 26356 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 20731 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 15606 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 3494 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 1511 # Number of cycles rename is squashing +system.cpu.rename.RENAME:UnblockCycles 761 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 7504 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 521 # count of cycles rename stalled for serializing inst system.cpu.rename.RENAME:serializingInsts 48 # count of serializing insts renamed -system.cpu.rename.RENAME:skidInsts 2147 # count of insts added to the skid buffer +system.cpu.rename.RENAME:skidInsts 2159 # count of insts added to the skid buffer system.cpu.rename.RENAME:tempSerializingInsts 37 # count of temporary serializing insts renamed -system.cpu.timesIdled 2 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.timesIdled 3 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload0.PROG:num_syscalls 17 # Number of system calls system.cpu.workload1.PROG:num_syscalls 17 # Number of system calls diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout index 6f3d2a7c5..76288ac1d 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout @@ -7,9 +7,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:06:20 -M5 started Sun Jun 10 14:22:38 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing +M5 compiled Jun 21 2007 21:25:27 +M5 started Fri Jun 22 00:04:51 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 5490000 because target called exit() +Exiting @ tick 5491500 because target called exit() diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini index 61102139c..0ef239ef4 100644 --- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini @@ -21,6 +21,7 @@ SQEntries=32 SSITSize=1024 activity=0 backComSize=5 +cachePorts=200 choiceCtrBits=2 choicePredictorSize=8192 clock=500 @@ -74,6 +75,15 @@ renameToFetchDelay=1 renameToIEWDelay=2 renameToROBDelay=1 renameWidth=8 +smtCommitPolicy=RoundRobin +smtFetchPolicy=SingleThread +smtIQPolicy=Partitioned +smtIQThreshold=100 +smtLSQPolicy=Partitioned +smtLSQThreshold=100 +smtNumFetchingThreads=1 +smtROBPolicy=Partitioned +smtROBThreshold=100 squashWidth=8 system=system trapLatency=13 @@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -99,7 +110,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=10000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -261,6 +272,7 @@ opLat=3 [system.cpu.icache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -274,7 +286,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=10000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 compressed_bus=false @@ -312,7 +325,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=10000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -366,7 +379,7 @@ bus_id=0 clock=1000 responder_set=false width=64 -port=system.physmem.port system.cpu.l2cache.mem_side +port=system.physmem.port[0] system.cpu.l2cache.mem_side [system.physmem] type=PhysicalMemory diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out index 70564f749..bdf29a72a 100644 --- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out @@ -275,7 +275,7 @@ prefetch_access=false prefetcher_size=100 prefetch_past_page=false prefetch_serial_squash=false -prefetch_latency=10 +prefetch_latency=10000 prefetch_degree=1 prefetch_policy=none prefetch_cache_check_push=true @@ -312,7 +312,7 @@ prefetch_access=false prefetcher_size=100 prefetch_past_page=false prefetch_serial_squash=false -prefetch_latency=10 +prefetch_latency=10000 prefetch_degree=1 prefetch_policy=none prefetch_cache_check_push=true @@ -349,7 +349,7 @@ prefetch_access=false prefetcher_size=100 prefetch_past_page=false prefetch_serial_squash=false -prefetch_latency=10 +prefetch_latency=10000 prefetch_degree=1 prefetch_policy=none prefetch_cache_check_push=true diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt index 7859d5c2b..ca9f1caa8 100644 --- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt @@ -1,40 +1,40 @@ ---------- Begin Simulation Statistics ---------- global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. -global.BPredUnit.BTBHits 2726 # Number of BTB hits -global.BPredUnit.BTBLookups 7230 # Number of BTB lookups +global.BPredUnit.BTBHits 2589 # Number of BTB hits +global.BPredUnit.BTBLookups 6396 # Number of BTB lookups global.BPredUnit.RASInCorrect 0 # Number of incorrect RAS predictions. -global.BPredUnit.condIncorrect 2062 # Number of conditional branches incorrect -global.BPredUnit.condPredicted 7954 # Number of conditional branches predicted -global.BPredUnit.lookups 7954 # Number of BP lookups +global.BPredUnit.condIncorrect 2002 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 6955 # Number of conditional branches predicted +global.BPredUnit.lookups 6955 # Number of BP lookups global.BPredUnit.usedRAS 0 # Number of times the RAS was used to get a target. -host_inst_rate 37089 # Simulator instruction rate (inst/s) -host_mem_usage 154932 # Number of bytes of host memory used -host_seconds 0.30 # Real time elapsed on the host -host_tick_rate 53780846 # Simulator tick rate (ticks/s) +host_inst_rate 33806 # Simulator instruction rate (inst/s) +host_mem_usage 154936 # Number of bytes of host memory used +host_seconds 0.32 # Real time elapsed on the host +host_tick_rate 48256964 # Simulator tick rate (ticks/s) memdepunit.memDep.conflictingLoads 10 # Number of conflicting loads. memdepunit.memDep.conflictingStores 0 # Number of conflicting stores. -memdepunit.memDep.insertedLoads 3198 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedStores 2970 # Number of stores inserted to the mem dependence unit. +memdepunit.memDep.insertedLoads 2999 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 2872 # Number of stores inserted to the mem dependence unit. sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 10976 # Number of instructions simulated sim_seconds 0.000016 # Number of seconds simulated -sim_ticks 15931500 # Number of ticks simulated +sim_ticks 15682500 # Number of ticks simulated system.cpu.commit.COM:branches 2152 # Number of branches committed -system.cpu.commit.COM:bw_lim_events 146 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_lim_events 199 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 28801 +system.cpu.commit.COM:committed_per_cycle.samples 28561 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 23411 8128.54% - 1 2862 993.72% - 2 1174 407.62% - 3 608 211.10% - 4 359 124.65% - 5 123 42.71% - 6 103 35.76% - 7 15 5.21% - 8 146 50.69% + 0 23237 8135.92% + 1 2855 999.61% + 2 1132 396.34% + 3 638 223.38% + 4 273 95.58% + 5 119 41.67% + 6 92 32.21% + 7 16 5.60% + 8 199 69.68% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -43,71 +43,71 @@ system.cpu.commit.COM:loads 1462 # Nu system.cpu.commit.COM:membars 0 # Number of memory barriers committed system.cpu.commit.COM:refs 2760 # Number of memory references committed system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed -system.cpu.commit.branchMispredicts 2062 # The number of times a branch was mispredicted +system.cpu.commit.branchMispredicts 2002 # The number of times a branch was mispredicted system.cpu.commit.commitCommittedInsts 10976 # The number of committed instructions -system.cpu.commit.commitNonSpecStalls 327 # The number of times commit has been forced to stall to communicate backwards -system.cpu.commit.commitSquashedInsts 14297 # The number of squashed insts skipped by commit +system.cpu.commit.commitNonSpecStalls 329 # The number of times commit has been forced to stall to communicate backwards +system.cpu.commit.commitSquashedInsts 12659 # The number of squashed insts skipped by commit system.cpu.committedInsts 10976 # Number of Instructions Simulated system.cpu.committedInsts_total 10976 # Number of Instructions Simulated -system.cpu.cpi 2.903061 # CPI: Cycles Per Instruction -system.cpu.cpi_total 2.903061 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 2743 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 5392.857143 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 4696.969697 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 2659 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 453000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.030623 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 84 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 18 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 310000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.024061 # mshr miss rate for ReadReq accesses +system.cpu.cpi 2.857598 # CPI: Cycles Per Instruction +system.cpu.cpi_total 2.857598 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 2313 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 5451.807229 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 4719.696970 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 2230 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 452500 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.035884 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 83 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 17 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 311500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.028534 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 66 # number of ReadReq MSHR misses system.cpu.dcache.SwapReq_accesses 6 # number of SwapReq accesses(hits+misses) system.cpu.dcache.SwapReq_hits 6 # number of SwapReq hits system.cpu.dcache.WriteReq_accesses 1292 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 5505 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 5522.613065 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 4802.325581 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 1092 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 1101000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.154799 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 200 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_hits 114 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_hits 1093 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 1099000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.154025 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 199 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_hits 113 # number of WriteReq MSHR hits system.cpu.dcache.WriteReq_mshr_miss_latency 413000 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.066563 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 86 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 24.717105 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 21.901316 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 4035 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 5471.830986 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 4756.578947 # average overall mshr miss latency -system.cpu.dcache.demand_hits 3751 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 1554000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.070384 # miss rate for demand accesses -system.cpu.dcache.demand_misses 284 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 132 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 723000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.037670 # mshr miss rate for demand accesses +system.cpu.dcache.demand_accesses 3605 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 5501.773050 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 4766.447368 # average overall mshr miss latency +system.cpu.dcache.demand_hits 3323 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 1551500 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.078225 # miss rate for demand accesses +system.cpu.dcache.demand_misses 282 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 130 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 724500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.042164 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 152 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 4035 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 5471.830986 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 4756.578947 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 3605 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 5501.773050 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 4766.447368 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 3751 # number of overall hits -system.cpu.dcache.overall_miss_latency 1554000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.070384 # miss rate for overall accesses -system.cpu.dcache.overall_misses 284 # number of overall misses -system.cpu.dcache.overall_mshr_hits 132 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 723000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.037670 # mshr miss rate for overall accesses +system.cpu.dcache.overall_hits 3323 # number of overall hits +system.cpu.dcache.overall_miss_latency 1551500 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.078225 # miss rate for overall accesses +system.cpu.dcache.overall_misses 282 # number of overall misses +system.cpu.dcache.overall_mshr_hits 130 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 724500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.042164 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 152 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -123,85 +123,85 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 152 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 113.439038 # Cycle average of tags in use -system.cpu.dcache.total_refs 3757 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 113.060803 # Cycle average of tags in use +system.cpu.dcache.total_refs 3329 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks -system.cpu.decode.DECODE:BlockedCycles 4602 # Number of cycles decode is blocked -system.cpu.decode.DECODE:DecodedInsts 38937 # Number of instructions handled by decode -system.cpu.decode.DECODE:IdleCycles 16098 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 7883 # Number of cycles decode is running -system.cpu.decode.DECODE:SquashCycles 3063 # Number of cycles decode is squashing -system.cpu.decode.DECODE:UnblockCycles 218 # Number of cycles decode is unblocking -system.cpu.fetch.Branches 7954 # Number of branches that fetch encountered -system.cpu.fetch.CacheLines 4933 # Number of cache lines fetched -system.cpu.fetch.Cycles 14166 # Number of cycles fetch has run and was not squashing or blocked -system.cpu.fetch.IcacheSquashes 565 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 44421 # Number of instructions fetch has processed -system.cpu.fetch.SquashCycles 2121 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.249623 # Number of branch fetches per cycle -system.cpu.fetch.icacheStallCycles 4933 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.predictedBranches 2726 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 1.394081 # Number of inst fetches per cycle +system.cpu.decode.DECODE:BlockedCycles 3802 # Number of cycles decode is blocked +system.cpu.decode.DECODE:DecodedInsts 34098 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 15413 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 9282 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 2804 # Number of cycles decode is squashing +system.cpu.decode.DECODE:UnblockCycles 64 # Number of cycles decode is unblocking +system.cpu.fetch.Branches 6955 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 4655 # Number of cache lines fetched +system.cpu.fetch.Cycles 15062 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 489 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 38520 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 2061 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.221744 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 4655 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 2589 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 1.228121 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 31864 +system.cpu.fetch.rateDist.samples 31365 system.cpu.fetch.rateDist.min_value 0 - 0 22632 7102.69% - 1 2187 686.35% - 2 562 176.37% - 3 869 272.72% - 4 521 163.51% - 5 770 241.65% - 6 886 278.06% - 7 243 76.26% - 8 3194 1002.39% + 0 20959 6682.29% + 1 4502 1435.36% + 2 577 183.96% + 3 682 217.44% + 4 776 247.41% + 5 629 200.54% + 6 581 185.24% + 7 189 60.26% + 8 2470 787.50% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist -system.cpu.icache.ReadReq_accesses 4933 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 5310.666667 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 4396.174863 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 4558 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 1991500 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.076019 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 375 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 9 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 1609000 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.074194 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_accesses 4655 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 5308.823529 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 4382.513661 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 4281 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1985500 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.080344 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 374 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 8 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 1604000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.078625 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 366 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.icache.avg_refs 12.453552 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 11.696721 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 4933 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 5310.666667 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 4396.174863 # average overall mshr miss latency -system.cpu.icache.demand_hits 4558 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 1991500 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.076019 # miss rate for demand accesses -system.cpu.icache.demand_misses 375 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 9 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 1609000 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.074194 # mshr miss rate for demand accesses +system.cpu.icache.demand_accesses 4655 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 5308.823529 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 4382.513661 # average overall mshr miss latency +system.cpu.icache.demand_hits 4281 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1985500 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.080344 # miss rate for demand accesses +system.cpu.icache.demand_misses 374 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 8 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 1604000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.078625 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 366 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 4933 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 5310.666667 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 4396.174863 # average overall mshr miss latency +system.cpu.icache.overall_accesses 4655 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 5308.823529 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 4382.513661 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 4558 # number of overall hits -system.cpu.icache.overall_miss_latency 1991500 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.076019 # miss rate for overall accesses -system.cpu.icache.overall_misses 375 # number of overall misses -system.cpu.icache.overall_mshr_hits 9 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 1609000 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.074194 # mshr miss rate for overall accesses +system.cpu.icache.overall_hits 4281 # number of overall hits +system.cpu.icache.overall_miss_latency 1985500 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.080344 # miss rate for overall accesses +system.cpu.icache.overall_misses 374 # number of overall misses +system.cpu.icache.overall_mshr_hits 8 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 1604000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.078625 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 366 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -217,59 +217,59 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 1 # number of replacements system.cpu.icache.sampled_refs 366 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 233.760012 # Cycle average of tags in use -system.cpu.icache.total_refs 4558 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 232.692086 # Cycle average of tags in use +system.cpu.icache.total_refs 4281 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 499 # Total number of cycles that the CPU has spent unscheduled due to idling -system.cpu.iew.EXEC:branches 3548 # Number of branches executed +system.cpu.idleCycles 1997 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 3040 # Number of branches executed system.cpu.iew.EXEC:nop 0 # number of nop insts executed -system.cpu.iew.EXEC:rate 0.670318 # Inst execution rate -system.cpu.iew.EXEC:refs 5385 # number of memory reference insts executed -system.cpu.iew.EXEC:stores 2502 # Number of stores executed +system.cpu.iew.EXEC:rate 0.582082 # Inst execution rate +system.cpu.iew.EXEC:refs 4490 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 2077 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed -system.cpu.iew.WB:consumers 10159 # num instructions consuming a value -system.cpu.iew.WB:count 20199 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 0.790629 # average fanout of values written-back +system.cpu.iew.WB:consumers 8997 # num instructions consuming a value +system.cpu.iew.WB:count 17565 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.831833 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 8032 # num instructions producing a value -system.cpu.iew.WB:rate 0.633913 # insts written-back per cycle -system.cpu.iew.WB:sent 20448 # cumulative count of insts sent to commit -system.cpu.iew.branchMispredicts 2568 # Number of branch mispredicts detected at execute +system.cpu.iew.WB:producers 7484 # num instructions producing a value +system.cpu.iew.WB:rate 0.560019 # insts written-back per cycle +system.cpu.iew.WB:sent 17724 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 2199 # Number of branch mispredicts detected at execute system.cpu.iew.iewBlockCycles 0 # Number of cycles IEW is blocking -system.cpu.iew.iewDispLoadInsts 3198 # Number of dispatched load instructions -system.cpu.iew.iewDispNonSpecInsts 610 # Number of dispatched non-speculative instructions -system.cpu.iew.iewDispSquashedInsts 2750 # Number of squashed instructions skipped by dispatch -system.cpu.iew.iewDispStoreInsts 2970 # Number of dispatched store instructions -system.cpu.iew.iewDispatchedInsts 25274 # Number of instructions dispatched to IQ -system.cpu.iew.iewExecLoadInsts 2883 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 1463 # Number of squashed instructions skipped in execute -system.cpu.iew.iewExecutedInsts 21359 # Number of executed instructions +system.cpu.iew.iewDispLoadInsts 2999 # Number of dispatched load instructions +system.cpu.iew.iewDispNonSpecInsts 609 # Number of dispatched non-speculative instructions +system.cpu.iew.iewDispSquashedInsts 1287 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 2872 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 23636 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 2413 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 3118 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 18257 # Number of executed instructions system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall -system.cpu.iew.iewSquashCycles 3063 # Number of cycles IEW is squashing +system.cpu.iew.iewSquashCycles 2804 # Number of cycles IEW is squashing system.cpu.iew.iewUnblockCycles 0 # Number of cycles IEW is unblocking system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked -system.cpu.iew.lsq.thread.0.forwLoads 48 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.0.forwLoads 43 # Number of loads that had data forwarded from stores system.cpu.iew.lsq.thread.0.ignoredResponses 8 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address system.cpu.iew.lsq.thread.0.memOrderViolation 52 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 0 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.0.squashedLoads 1736 # Number of loads squashed -system.cpu.iew.lsq.thread.0.squashedStores 1672 # Number of stores squashed +system.cpu.iew.lsq.thread.0.squashedLoads 1537 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 1574 # Number of stores squashed system.cpu.iew.memOrderViolationEvents 52 # Number of memory order violations -system.cpu.iew.predictedNotTakenIncorrect 958 # Number of branches that were predicted not taken incorrectly -system.cpu.iew.predictedTakenIncorrect 1610 # Number of branches that were predicted taken incorrectly -system.cpu.ipc 0.344464 # IPC: Instructions Per Cycle -system.cpu.ipc_total 0.344464 # IPC: Total IPC of All Threads -system.cpu.iq.ISSUE:FU_type_0 22822 # Type of FU issued +system.cpu.iew.predictedNotTakenIncorrect 682 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedTakenIncorrect 1517 # Number of branches that were predicted taken incorrectly +system.cpu.ipc 0.349944 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.349944 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 21375 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist - (null) 1826 8.00% # Type of FU issued - IntAlu 15247 66.81% # Type of FU issued + No_OpClass 1750 8.19% # Type of FU issued + IntAlu 14209 66.47% # Type of FU issued IntMult 0 0.00% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 0 0.00% # Type of FU issued @@ -278,16 +278,16 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 3042 13.33% # Type of FU issued - MemWrite 2707 11.86% # Type of FU issued + MemRead 2832 13.25% # Type of FU issued + MemWrite 2584 12.09% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.end_dist -system.cpu.iq.ISSUE:fu_busy_cnt 190 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.008325 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_cnt 160 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.007485 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full.start_dist - (null) 0 0.00% # attempts to use FU when none available - IntAlu 50 26.32% # attempts to use FU when none available + No_OpClass 0 0.00% # attempts to use FU when none available + IntAlu 27 16.88% # attempts to use FU when none available IntMult 0 0.00% # attempts to use FU when none available IntDiv 0 0.00% # attempts to use FU when none available FloatAdd 0 0.00% # attempts to use FU when none available @@ -296,41 +296,41 @@ system.cpu.iq.ISSUE:fu_full.start_dist FloatMult 0 0.00% # attempts to use FU when none available FloatDiv 0 0.00% # attempts to use FU when none available FloatSqrt 0 0.00% # attempts to use FU when none available - MemRead 25 13.16% # attempts to use FU when none available - MemWrite 115 60.53% # attempts to use FU when none available + MemRead 23 14.37% # attempts to use FU when none available + MemWrite 110 68.75% # attempts to use FU when none available IprAccess 0 0.00% # attempts to use FU when none available InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 31864 +system.cpu.iq.ISSUE:issued_per_cycle.samples 31365 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 22879 7180.20% - 1 3824 1200.10% - 2 1304 409.24% - 3 1251 392.61% - 4 1252 392.92% - 5 751 235.69% - 6 414 129.93% - 7 122 38.29% - 8 67 21.03% + 0 21827 6959.03% + 1 4212 1342.90% + 2 2084 664.43% + 3 1568 499.92% + 4 766 244.22% + 5 454 144.75% + 6 283 90.23% + 7 109 34.75% + 8 62 19.77% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 0.716231 # Inst issue rate -system.cpu.iq.iqInstsAdded 24664 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 22822 # Number of instructions issued -system.cpu.iq.iqNonSpecInstsAdded 610 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 11119 # Number of squashed instructions iterated over during squash; mainly for profiling -system.cpu.iq.iqSquashedInstsIssued 83 # Number of squashed instructions issued -system.cpu.iq.iqSquashedNonSpecRemoved 283 # Number of squashed non-spec instructions that were removed -system.cpu.iq.iqSquashedOperandsExamined 5685 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.iq.ISSUE:rate 0.681492 # Inst issue rate +system.cpu.iq.iqInstsAdded 23027 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 21375 # Number of instructions issued +system.cpu.iq.iqNonSpecInstsAdded 609 # Number of non-speculative instructions added to the IQ +system.cpu.iq.iqSquashedInstsExamined 10843 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 99 # Number of squashed instructions issued +system.cpu.iq.iqSquashedNonSpecRemoved 280 # Number of squashed non-spec instructions that were removed +system.cpu.iq.iqSquashedOperandsExamined 7823 # Number of squashed operands that are examined and possibly removed from graph system.cpu.l2cache.ReadReq_accesses 514 # number of ReadReq accesses(hits+misses) system.cpu.l2cache.ReadReq_avg_miss_latency 4458.171206 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2373.540856 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2375.486381 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_miss_latency 2291500 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 514 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 1220000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 1221000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 514 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked @@ -343,13 +343,13 @@ system.cpu.l2cache.blocked_cycles_no_targets 0 system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 514 # number of demand (read+write) accesses system.cpu.l2cache.demand_avg_miss_latency 4458.171206 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 2373.540856 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 2375.486381 # average overall mshr miss latency system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits system.cpu.l2cache.demand_miss_latency 2291500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses system.cpu.l2cache.demand_misses 514 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 1220000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 1221000 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 514 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed @@ -357,14 +357,14 @@ system.cpu.l2cache.mshr_cap_events 0 # nu system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 514 # number of overall (read+write) accesses system.cpu.l2cache.overall_avg_miss_latency 4458.171206 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 2373.540856 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 2375.486381 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 0 # number of overall hits system.cpu.l2cache.overall_miss_latency 2291500 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses system.cpu.l2cache.overall_misses 514 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 1220000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 1221000 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 514 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -381,26 +381,25 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 514 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 345.564898 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 344.125692 # Cycle average of tags in use system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 31864 # number of cpu cycles simulated +system.cpu.numCycles 31365 # number of cpu cycles simulated system.cpu.rename.RENAME:CommittedMaps 9868 # Number of HB maps that are committed -system.cpu.rename.RENAME:IdleCycles 16082 # Number of cycles rename is idle -system.cpu.rename.RENAME:RenameLookups 44650 # Number of register rename lookups that rename has made -system.cpu.rename.RENAME:RenamedInsts 29655 # Number of instructions processed by rename -system.cpu.rename.RENAME:RenamedOperands 24195 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 7618 # Number of cycles rename is running -system.cpu.rename.RENAME:SquashCycles 3063 # Number of cycles rename is squashing -system.cpu.rename.RENAME:SquashedInsts 8815 # Number of squashed instructions processed by rename -system.cpu.rename.RENAME:UnblockCycles 684 # Number of cycles rename is unblocking -system.cpu.rename.RENAME:UndoneMaps 14327 # Number of HB maps that are undone due to squashing -system.cpu.rename.RENAME:serializeStallCycles 3915 # count of cycles rename stalled for serializing inst -system.cpu.rename.RENAME:serializingInsts 631 # count of serializing insts renamed -system.cpu.rename.RENAME:skidInsts 4702 # count of insts added to the skid buffer -system.cpu.rename.RENAME:tempSerializingInsts 623 # count of temporary serializing insts renamed -system.cpu.timesIdled 1 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.rename.RENAME:IdleCycles 16585 # Number of cycles rename is idle +system.cpu.rename.RENAME:RenameLookups 46161 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 26550 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 21893 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 8196 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 2804 # Number of cycles rename is squashing +system.cpu.rename.RENAME:UnblockCycles 229 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 12025 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 3551 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:serializingInsts 628 # count of serializing insts renamed +system.cpu.rename.RENAME:skidInsts 4297 # count of insts added to the skid buffer +system.cpu.rename.RENAME:tempSerializingInsts 640 # count of temporary serializing insts renamed +system.cpu.timesIdled 3 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload.PROG:num_syscalls 8 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout index 0b6e54449..692223ccd 100644 --- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout @@ -16,9 +16,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled May 15 2007 13:02:31 -M5 started Tue May 15 17:00:06 2007 +M5 compiled Jun 21 2007 21:15:48 +M5 started Fri Jun 22 00:32:08 2007 M5 executing on zizzer.eecs.umich.edu command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/o3-timing tests/run.py quick/02.insttest/sparc/linux/o3-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 15931500 because target called exit() +Exiting @ tick 15682500 because target called exit() -- cgit v1.2.3