diff options
author | Brad Beckmann <Brad.Beckmann@amd.com> | 2010-01-31 22:28:13 -0800 |
---|---|---|
committer | Brad Beckmann <Brad.Beckmann@amd.com> | 2010-01-31 22:28:13 -0800 |
commit | 4e00cc9900ec4f61899ee5be0c5b3827487e91f5 (patch) | |
tree | 372809113645b16ab99adb5c4b81d7f3512780e5 | |
parent | deb97742c7ada2008ec79aaf1791f7db3c6a2b06 (diff) | |
parent | 04466ab4ca04a4e1e195a6f68423792b2553dadb (diff) | |
download | gem5-4e00cc9900ec4f61899ee5be0c5b3827487e91f5.tar.xz |
merge
45 files changed, 2640 insertions, 1380 deletions
diff --git a/src/cpu/inorder/InOrderCPU.py b/src/cpu/inorder/InOrderCPU.py index a0b0466a7..d6db346d4 100644 --- a/src/cpu/inorder/InOrderCPU.py +++ b/src/cpu/inorder/InOrderCPU.py @@ -30,10 +30,15 @@ from m5.params import * from m5.proxy import * from BaseCPU import BaseCPU +class ThreadModel(Enum): + vals = ['Single', 'SMT', 'SwitchOnCacheMiss'] + class InOrderCPU(BaseCPU): type = 'InOrderCPU' activity = Param.Unsigned(0, "Initial count") + threadModel = Param.ThreadModel('SMT', "Multithreading model (SE-MODE only)") + cachePorts = Param.Unsigned(2, "Cache Ports") stageWidth = Param.Unsigned(1, "Stage width") diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript index 82a1028c2..f222350af 100644 --- a/src/cpu/inorder/SConscript +++ b/src/cpu/inorder/SConscript @@ -52,12 +52,16 @@ if 'InOrderCPU' in env['CPU_MODELS']: TraceFlag('InOrderUseDef') TraceFlag('InOrderMDU') TraceFlag('InOrderGraduation') + TraceFlag('ThreadModel') TraceFlag('RefCount') + TraceFlag('AddrDep') + CompoundFlag('InOrderCPUAll', [ 'InOrderStage', 'InOrderStall', 'InOrderCPU', 'InOrderMDU', 'InOrderAGEN', 'InOrderFetchSeq', 'InOrderTLB', 'InOrderBPred', 'InOrderDecode', 'InOrderExecute', 'InOrderInstBuffer', 'InOrderUseDef', - 'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource']) + 'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource', + 'ThreadModel']) Source('pipeline_traits.cc') Source('inorder_dyn_inst.cc') diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 1e3fdc40e..7342f9bc5 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -84,25 +84,25 @@ InOrderCPU::TickEvent::description() } InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, - Fault fault, ThreadID _tid, unsigned _vpe) - : Event(CPU_Tick_Pri), cpu(_cpu) + Fault fault, ThreadID _tid, DynInstPtr inst, + unsigned event_pri_offset) + : Event(Event::Priority((unsigned int)CPU_Tick_Pri + event_pri_offset)), + cpu(_cpu) { - setEvent(e_type, fault, _tid, _vpe); + setEvent(e_type, fault, _tid, inst); } std::string InOrderCPU::eventNames[NumCPUEvents] = { "ActivateThread", - "DeallocateThread", + "ActivateNextReadyThread", + "DeactivateThread", + "HaltThread", "SuspendThread", - "DisableThreads", - "EnableThreads", - "DisableVPEs", - "EnableVPEs", "Trap", "InstGraduated", - "SquashAll", + "SquashFromMemStall", "UpdatePCs" }; @@ -115,28 +115,24 @@ InOrderCPU::CPUEvent::process() cpu->activateThread(tid); break; - //@TODO: Consider Implementing "Suspend Thread" as Separate from Deallocate - case SuspendThread: // Suspend & Deallocate are same for now. - //cpu->suspendThread(tid); - //break; - case DeallocateThread: - cpu->deallocateThread(tid); + case ActivateNextReadyThread: + cpu->activateNextReadyThread(); break; - case EnableVPEs: - cpu->enableVPEs(vpe); + case DeactivateThread: + cpu->deactivateThread(tid); break; - case DisableVPEs: - cpu->disableVPEs(tid, vpe); + case HaltThread: + cpu->haltThread(tid); break; - case EnableThreads: - cpu->enableThreads(vpe); + case SuspendThread: + cpu->suspendThread(tid); break; - case DisableThreads: - cpu->disableThreads(tid, vpe); + case SquashFromMemStall: + cpu->squashDueToMemStall(inst->squashingStage, inst->seqNum, tid); break; case Trap: @@ -144,12 +140,14 @@ InOrderCPU::CPUEvent::process() break; default: - fatal("Unrecognized Event Type %d", cpuEventType); + fatal("Unrecognized Event Type %s", eventNames[cpuEventType]); } - + cpu->cpuEventRemoveList.push(this); } + + const char * InOrderCPU::CPUEvent::description() { @@ -185,11 +183,15 @@ InOrderCPU::InOrderCPU(Params *params) system(params->system), physmem(system->physmem), #endif // FULL_SYSTEM +#ifdef DEBUG + cpuEventNum(0), + resReqCount(0), +#endif // DEBUG switchCount(0), deferRegistration(false/*params->deferRegistration*/), stageTracing(params->stageTracing), - numVirtProcs(1) -{ + instsPerSwitch(0) +{ ThreadID active_threads; cpu_params = params; @@ -208,6 +210,24 @@ InOrderCPU::InOrderCPU(Params *params) "in your InOrder implementation or " "edit your workload size."); } + + + if (active_threads > 1) { + threadModel = (InOrderCPU::ThreadModel) params->threadModel; + + if (threadModel == SMT) { + DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n"); + } else if (threadModel == SwitchOnCacheMiss) { + DPRINTF(InOrderCPU, "Setting Thread Model to " + "Switch On Cache Miss\n"); + } + + } else { + threadModel = Single; + } + + + #endif // Bind the fetch & data ports from the resource pool. @@ -238,6 +258,9 @@ InOrderCPU::InOrderCPU(Params *params) Process* dummy_proc = params->workload[0]; thread[tid] = new Thread(this, tid, dummy_proc); } + + // Eventually set this with parameters... + asid[tid] = tid; #endif // Setup the TC that will serve as the interface to the threads/CPU. @@ -293,15 +316,30 @@ InOrderCPU::InOrderCPU(Params *params) memset(floatRegs.i[tid], 0, sizeof(floatRegs.i[tid])); isa[tid].clear(); - isa[tid].expandForMultithreading(numThreads, numVirtProcs); + isa[tid].expandForMultithreading(numThreads, 1/*numVirtProcs*/); + + // Define dummy instructions and resource requests to be used. + dummyInst[tid] = new InOrderDynInst(this, + thread[tid], + 0, + tid, + asid[tid]); + + dummyReq[tid] = new ResourceRequest(resPool->getResource(0), + dummyInst[tid], + 0, + 0, + 0, + 0); } - lastRunningCycle = curTick; - contextSwitch = false; + dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0); + dummyReqInst->setSquashed(); - // Define dummy instructions and resource requests to be used. - DynInstPtr dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0); - dummyReq = new ResourceRequest(NULL, NULL, 0, 0, 0, 0); + dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0, 0); + dummyBufferInst->setSquashed(); + + lastRunningCycle = curTick; // Reset CPU to reset state. #if FULL_SYSTEM @@ -311,10 +349,17 @@ InOrderCPU::InOrderCPU(Params *params) reset(); #endif + dummyBufferInst->resetInstCount(); + // Schedule First Tick Event, CPU will reschedule itself from here on out. scheduleTickEvent(0); } +InOrderCPU::~InOrderCPU() +{ + delete resPool; +} + void InOrderCPU::regStats() @@ -322,7 +367,28 @@ InOrderCPU::regStats() /* Register the Resource Pool's stats here.*/ resPool->regStats(); +#ifdef DEBUG + maxResReqCount + .name(name() + ".maxResReqCount") + .desc("Maximum number of live resource requests in CPU") + .prereq(maxResReqCount); +#endif + + /* Register for each Pipeline Stage */ + for (int stage_num=0; stage_num < ThePipeline::NumStages; stage_num++) { + pipelineStage[stage_num]->regStats(); + } + /* Register any of the InOrderCPU's stats here.*/ + instsPerCtxtSwitch + .name(name() + ".instsPerContextSwitch") + .desc("Instructions Committed Per Context Switch") + .prereq(instsPerCtxtSwitch); + + numCtxtSwitches + .name(name() + ".contextSwitches") + .desc("Number of context switches"); + timesIdled .name(name() + ".timesIdled") .desc("Number of times that the entire CPU went into an idle state and" @@ -331,9 +397,17 @@ InOrderCPU::regStats() idleCycles .name(name() + ".idleCycles") - .desc("Total number of cycles that the CPU has spent unscheduled due " - "to idling") - .prereq(idleCycles); + .desc("Number of cycles cpu's stages were not processed"); + + runCycles + .name(name() + ".runCycles") + .desc("Number of cycles cpu stages are processed."); + + activity + .name(name() + ".activity") + .desc("Percentage of cycles cpu is active") + .precision(6); + activity = (runCycles / numCycles) * 100; threadCycles .init(numThreads) @@ -342,7 +416,7 @@ InOrderCPU::regStats() smtCycles .name(name() + ".smtCycles") - .desc("Total number of cycles that the CPU was simultaneous multithreading.(SMT)"); + .desc("Total number of cycles that the CPU was in SMT-mode"); committedInsts .init(numThreads) @@ -405,18 +479,27 @@ InOrderCPU::tick() ++numCycles; + bool pipes_idle = true; + //Tick each of the stages for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) { pipelineStage[stNum]->tick(); + + pipes_idle = pipes_idle && pipelineStage[stNum]->idle; } + if (pipes_idle) + idleCycles++; + else + runCycles++; + // Now advance the time buffers one tick timeBuffer.advance(); for (int sqNum=0; sqNum < NumStages - 1; sqNum++) { stageQueue[sqNum]->advance(); } activityRec.advance(); - + // Any squashed requests, events, or insts then remove them now cleanUpRemovedReqs(); cleanUpRemovedEvents(); @@ -435,7 +518,8 @@ InOrderCPU::tick() //Tick next_tick = curTick + cycles(1); //tickEvent.schedule(next_tick); mainEventQueue.schedule(&tickEvent, nextCycle(curTick + 1)); - DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", nextCycle(curTick + 1)); + DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", + nextCycle(curTick + 1)); } } @@ -476,7 +560,7 @@ InOrderCPU::reset() { for (int i = 0; i < numThreads; i++) { isa[i].reset(coreType, numThreads, - numVirtProcs, dynamic_cast<BaseCPU*>(this)); + 1/*numVirtProcs*/, dynamic_cast<BaseCPU*>(this)); } } @@ -545,7 +629,7 @@ void InOrderCPU::trap(Fault fault, ThreadID tid, int delay) { //@ Squash Pipeline during TRAP - scheduleCpuEvent(Trap, fault, tid, 0/*vpe*/, delay); + scheduleCpuEvent(Trap, fault, tid, dummyInst[tid], delay); } void @@ -554,15 +638,42 @@ InOrderCPU::trapCPU(Fault fault, ThreadID tid) fault->invoke(tcBase(tid)); } +void +InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay) +{ + scheduleCpuEvent(SquashFromMemStall, NoFault, tid, inst, delay); +} + + +void +InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid) +{ + DPRINTF(InOrderCPU, "Squashing Pipeline Stages Due to Memory Stall...\n"); + + // Squash all instructions in each stage including + // instruction that caused the squash (seq_num - 1) + // NOTE: The stage bandwidth needs to be cleared so thats why + // the stalling instruction is squashed as well. The stalled + // instruction is previously placed in another intermediate buffer + // while it's stall is being handled. + InstSeqNum squash_seq_num = seq_num - 1; + + for (int stNum=stage_num; stNum >= 0 ; stNum--) { + pipelineStage[stNum]->squashDueToMemStall(squash_seq_num, tid); + } +} + void InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault, - ThreadID tid, unsigned vpe, unsigned delay) + ThreadID tid, DynInstPtr inst, + unsigned delay, unsigned event_pri_offset) { - CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, vpe); + CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst, + event_pri_offset); if (delay >= 0) { - DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i.\n", - eventNames[c_event], curTick + delay); + DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n", + eventNames[c_event], curTick + delay, tid); mainEventQueue.schedule(cpu_event,curTick + delay); } else { cpu_event->process(); @@ -570,12 +681,12 @@ InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault, } // Broadcast event to the Resource Pool - DynInstPtr dummy_inst = - new InOrderDynInst(this, NULL, getNextEventNum(), tid); - resPool->scheduleEvent(c_event, dummy_inst, 0, 0, tid); + // Need to reset tid just in case this is a dummy instruction + inst->setTid(tid); + resPool->scheduleEvent(c_event, inst, 0, 0, tid); } -inline bool +bool InOrderCPU::isThreadActive(ThreadID tid) { list<ThreadID>::iterator isActive = @@ -584,206 +695,144 @@ InOrderCPU::isThreadActive(ThreadID tid) return (isActive != activeThreads.end()); } - -void -InOrderCPU::activateThread(ThreadID tid) +bool +InOrderCPU::isThreadReady(ThreadID tid) { - if (!isThreadActive(tid)) { - DPRINTF(InOrderCPU, - "Adding Thread %i to active threads list in CPU.\n", tid); - activeThreads.push_back(tid); + list<ThreadID>::iterator isReady = + std::find(readyThreads.begin(), readyThreads.end(), tid); - wakeCPU(); - } + return (isReady != readyThreads.end()); } -void -InOrderCPU::deactivateThread(ThreadID tid) -{ - DPRINTF(InOrderCPU, "[tid:%i]: Calling deactivate thread.\n", tid); - - if (isThreadActive(tid)) { - DPRINTF(InOrderCPU,"[tid:%i]: Removing from active threads list\n", - tid); - list<ThreadID>::iterator thread_it = - std::find(activeThreads.begin(), activeThreads.end(), tid); - - removePipelineStalls(*thread_it); - - //@TODO: change stage status' to Idle? - - activeThreads.erase(thread_it); - } -} - -void -InOrderCPU::removePipelineStalls(ThreadID tid) -{ - DPRINTF(InOrderCPU,"[tid:%i]: Removing all pipeline stalls\n", - tid); - - for (int stNum = 0; stNum < NumStages ; stNum++) { - pipelineStage[stNum]->removeStalls(tid); - } - -} bool -InOrderCPU::isThreadInCPU(ThreadID tid) +InOrderCPU::isThreadSuspended(ThreadID tid) { - list<ThreadID>::iterator isCurrent = - std::find(currentThreads.begin(), currentThreads.end(), tid); + list<ThreadID>::iterator isSuspended = + std::find(suspendedThreads.begin(), suspendedThreads.end(), tid); - return (isCurrent != currentThreads.end()); + return (isSuspended != suspendedThreads.end()); } void -InOrderCPU::addToCurrentThreads(ThreadID tid) -{ - if (!isThreadInCPU(tid)) { - DPRINTF(InOrderCPU, "Adding Thread %i to current threads list in CPU.\n", - tid); - currentThreads.push_back(tid); - } +InOrderCPU::activateNextReadyThread() +{ + if (readyThreads.size() >= 1) { + ThreadID ready_tid = readyThreads.front(); + + // Activate in Pipeline + activateThread(ready_tid); + + // Activate in Resource Pool + resPool->activateAll(ready_tid); + + list<ThreadID>::iterator ready_it = + std::find(readyThreads.begin(), readyThreads.end(), ready_tid); + readyThreads.erase(ready_it); + } else { + DPRINTF(InOrderCPU, + "Attempting to activate new thread, but No Ready Threads to" + "activate.\n"); + DPRINTF(InOrderCPU, + "Unable to switch to next active thread.\n"); + } } void -InOrderCPU::removeFromCurrentThreads(ThreadID tid) +InOrderCPU::activateThread(ThreadID tid) { - if (isThreadInCPU(tid)) { + if (isThreadSuspended(tid)) { DPRINTF(InOrderCPU, - "Adding Thread %i to current threads list in CPU.\n", tid); - list<ThreadID>::iterator isCurrent = - std::find(currentThreads.begin(), currentThreads.end(), tid); - currentThreads.erase(isCurrent); - } -} - -bool -InOrderCPU::isThreadSuspended(ThreadID tid) -{ - list<ThreadID>::iterator isSuspended = - std::find(suspendedThreads.begin(), suspendedThreads.end(), tid); + "Removing [tid:%i] from suspended threads list.\n", tid); - return (isSuspended!= suspendedThreads.end()); -} + list<ThreadID>::iterator susp_it = + std::find(suspendedThreads.begin(), suspendedThreads.end(), + tid); + suspendedThreads.erase(susp_it); + } -void -InOrderCPU::enableVirtProcElement(unsigned vpe) -{ - DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling " - "Enabling of concurrent virtual processor execution", - vpe); + if (threadModel == SwitchOnCacheMiss && + numActiveThreads() == 1) { + DPRINTF(InOrderCPU, + "Ignoring activation of [tid:%i], since [tid:%i] is " + "already running.\n", tid, activeThreadId()); + + DPRINTF(InOrderCPU,"Placing [tid:%i] on ready threads list\n", + tid); + + readyThreads.push_back(tid); + + } else if (!isThreadActive(tid)) { + DPRINTF(InOrderCPU, + "Adding [tid:%i] to active threads list.\n", tid); + activeThreads.push_back(tid); + + activateThreadInPipeline(tid); - scheduleCpuEvent(EnableVPEs, NoFault, 0/*tid*/, vpe); -} + thread[tid]->lastActivate = curTick; -void -InOrderCPU::enableVPEs(unsigned vpe) -{ - DPRINTF(InOrderCPU, "[vpe:%i]: Enabling Concurrent Execution " - "virtual processors %i", vpe); + tcBase(tid)->setStatus(ThreadContext::Active); - list<ThreadID>::iterator thread_it = currentThreads.begin(); + wakeCPU(); - while (thread_it != currentThreads.end()) { - if (!isThreadSuspended(*thread_it)) { - activateThread(*thread_it); - } - thread_it++; + numCtxtSwitches++; } } void -InOrderCPU::disableVirtProcElement(ThreadID tid, unsigned vpe) +InOrderCPU::activateThreadInPipeline(ThreadID tid) { - DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling " - "Disabling of concurrent virtual processor execution", - vpe); - - scheduleCpuEvent(DisableVPEs, NoFault, 0/*tid*/, vpe); + for (int stNum=0; stNum < NumStages; stNum++) { + pipelineStage[stNum]->activateThread(tid); + } } void -InOrderCPU::disableVPEs(ThreadID tid, unsigned vpe) +InOrderCPU::deactivateContext(ThreadID tid, int delay) { - DPRINTF(InOrderCPU, "[vpe:%i]: Disabling Concurrent Execution of " - "virtual processors %i", vpe); - - unsigned base_vpe = TheISA::getVirtProcNum(tcBase(tid)); + DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid); - list<ThreadID>::iterator thread_it = activeThreads.begin(); - - vector<list<ThreadID>::iterator> removeList; + scheduleCpuEvent(DeactivateThread, NoFault, tid, dummyInst[tid], delay); - while (thread_it != activeThreads.end()) { - if (base_vpe != vpe) { - removeList.push_back(thread_it); - } - thread_it++; - } + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); - for (int i = 0; i < removeList.size(); i++) { - activeThreads.erase(removeList[i]); - } + _status = Running; } void -InOrderCPU::enableMultiThreading(unsigned vpe) +InOrderCPU::deactivateThread(ThreadID tid) { - // Schedule event to take place at end of cycle - DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling Enable Multithreading on " - "virtual processor %i", vpe); + DPRINTF(InOrderCPU, "[tid:%i]: Calling deactivate thread.\n", tid); - scheduleCpuEvent(EnableThreads, NoFault, 0/*tid*/, vpe); -} + if (isThreadActive(tid)) { + DPRINTF(InOrderCPU,"[tid:%i]: Removing from active threads list\n", + tid); + list<ThreadID>::iterator thread_it = + std::find(activeThreads.begin(), activeThreads.end(), tid); -void -InOrderCPU::enableThreads(unsigned vpe) -{ - DPRINTF(InOrderCPU, "[vpe:%i]: Enabling Multithreading on " - "virtual processor %i", vpe); + removePipelineStalls(*thread_it); - list<ThreadID>::iterator thread_it = currentThreads.begin(); + activeThreads.erase(thread_it); - while (thread_it != currentThreads.end()) { - if (TheISA::getVirtProcNum(tcBase(*thread_it)) == vpe) { - if (!isThreadSuspended(*thread_it)) { - activateThread(*thread_it); - } - } - thread_it++; + // Ideally, this should be triggered from the + // suspendContext/Thread functions + tcBase(tid)->setStatus(ThreadContext::Suspended); } -} -void -InOrderCPU::disableMultiThreading(ThreadID tid, unsigned vpe) -{ - // Schedule event to take place at end of cycle - DPRINTF(InOrderCPU, "[tid:%i]: Scheduling Disable Multithreading on " - "virtual processor %i", tid, vpe); - scheduleCpuEvent(DisableThreads, NoFault, tid, vpe); + assert(!isThreadActive(tid)); } void -InOrderCPU::disableThreads(ThreadID tid, unsigned vpe) +InOrderCPU::removePipelineStalls(ThreadID tid) { - DPRINTF(InOrderCPU, "[tid:%i]: Disabling Multithreading on " - "virtual processor %i", tid, vpe); - - list<ThreadID>::iterator thread_it = activeThreads.begin(); - - vector<list<ThreadID>::iterator> removeList; + DPRINTF(InOrderCPU,"[tid:%i]: Removing all pipeline stalls\n", + tid); - while (thread_it != activeThreads.end()) { - if (TheISA::getVirtProcNum(tcBase(*thread_it)) == vpe) { - removeList.push_back(thread_it); - } - thread_it++; + for (int stNum = 0; stNum < NumStages ; stNum++) { + pipelineStage[stNum]->removeStalls(tid); } - for (int i = 0; i < removeList.size(); i++) { - activeThreads.erase(removeList[i]); - } } void @@ -825,7 +874,8 @@ InOrderCPU::activateContext(ThreadID tid, int delay) { DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid); - scheduleCpuEvent(ActivateThread, NoFault, tid, 0/*vpe*/, delay); + + scheduleCpuEvent(ActivateThread, NoFault, tid, dummyInst[tid], delay); // Be sure to signal that there's some activity so the CPU doesn't // deschedule itself. @@ -834,71 +884,73 @@ InOrderCPU::activateContext(ThreadID tid, int delay) _status = Running; } - void -InOrderCPU::suspendContext(ThreadID tid, int delay) +InOrderCPU::activateNextReadyContext(int delay) { - scheduleCpuEvent(SuspendThread, NoFault, tid, 0/*vpe*/, delay); - //_status = Idle; -} + DPRINTF(InOrderCPU,"Activating next ready thread\n"); -void -InOrderCPU::suspendThread(ThreadID tid) -{ - DPRINTF(InOrderCPU,"[tid: %i]: Suspended ...\n", tid); - deactivateThread(tid); -} + // NOTE: Add 5 to the event priority so that we always activate + // threads after we've finished deactivating, squashing,etc. + // other threads + scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst[0], + delay, 5); -void -InOrderCPU::deallocateContext(ThreadID tid, int delay) -{ - scheduleCpuEvent(DeallocateThread, NoFault, tid, 0/*vpe*/, delay); + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + + _status = Running; } void -InOrderCPU::deallocateThread(ThreadID tid) +InOrderCPU::haltContext(ThreadID tid, int delay) { - DPRINTF(InOrderCPU,"[tid:%i]: Deallocating ...", tid); - - removeFromCurrentThreads(tid); + DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid); - deactivateThread(tid); + scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid], delay); - squashThreadInPipeline(tid); + activityRec.activity(); } void -InOrderCPU::squashThreadInPipeline(ThreadID tid) +InOrderCPU::haltThread(ThreadID tid) { - //Squash all instructions in each stage - for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) { - pipelineStage[stNum]->squash(0 /*seq_num*/, tid); + DPRINTF(InOrderCPU, "[tid:%i]: Placing on Halted Threads List...\n", tid); + deactivateThread(tid); + squashThreadInPipeline(tid); + haltedThreads.push_back(tid); + + tcBase(tid)->setStatus(ThreadContext::Halted); + + if (threadModel == SwitchOnCacheMiss) { + activateNextReadyContext(); } } void -InOrderCPU::haltContext(ThreadID tid, int delay) +InOrderCPU::suspendContext(ThreadID tid, int delay) { - DPRINTF(InOrderCPU, "[tid:%i]: Halt context called.\n", tid); - - // Halt is same thing as deallocate for now - // @TODO: Differentiate between halt & deallocate in the CPU - // model - deallocateContext(tid, delay); + scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid], delay); } void -InOrderCPU::insertThread(ThreadID tid) +InOrderCPU::suspendThread(ThreadID tid) { - panic("Unimplemented Function\n."); + DPRINTF(InOrderCPU, "[tid:%i]: Placing on Suspended Threads List...\n", tid); + deactivateThread(tid); + suspendedThreads.push_back(tid); + thread[tid]->lastSuspend = curTick; + + tcBase(tid)->setStatus(ThreadContext::Suspended); } void -InOrderCPU::removeThread(ThreadID tid) +InOrderCPU::squashThreadInPipeline(ThreadID tid) { - DPRINTF(InOrderCPU, "Removing Thread %i from CPU.\n", tid); - - /** Broadcast to CPU resources*/ + //Squash all instructions in each stage + for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) { + pipelineStage[stNum]->squash(0 /*seq_num*/, tid); + } } PipelineStage* @@ -907,14 +959,6 @@ InOrderCPU::getPipeStage(int stage_num) return pipelineStage[stage_num]; } - -void -InOrderCPU::activateWhenReady(ThreadID tid) -{ - panic("Unimplemented Function\n."); -} - - uint64_t InOrderCPU::readPC(ThreadID tid) { @@ -1002,9 +1046,11 @@ InOrderCPU::readRegOtherThread(unsigned reg_idx, ThreadID tid) tid = TheISA::getTargetThread(tcBase(tid)); } - if (reg_idx < FP_Base_DepTag) { // Integer Register File + if (reg_idx < FP_Base_DepTag) { + // Integer Register File return readIntReg(reg_idx, tid); - } else if (reg_idx < Ctrl_Base_DepTag) { // Float Register File + } else if (reg_idx < Ctrl_Base_DepTag) { + // Float Register File reg_idx -= FP_Base_DepTag; return readFloatRegBits(reg_idx, tid); } else { @@ -1067,16 +1113,35 @@ InOrderCPU::addInst(DynInstPtr &inst) return --(instList[tid].end()); } +void +InOrderCPU::updateContextSwitchStats() +{ + // Set Average Stat Here, then reset to 0 + instsPerCtxtSwitch = instsPerSwitch; + instsPerSwitch = 0; +} + + void InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) { - // Set the CPU's PCs - This contributes to the precise state of the CPU which can be used - // when restoring a thread to the CPU after a fork or after an exception - // @TODO: Set-Up Grad-Info/Committed-Info to let ThreadState know if it's a branch or not + // Set the CPU's PCs - This contributes to the precise state of the CPU + // which can be used when restoring a thread to the CPU after after any + // type of context switching activity (fork, exception, etc.) setPC(inst->readPC(), tid); setNextPC(inst->readNextPC(), tid); setNextNPC(inst->readNextNPC(), tid); + if (inst->isControl()) { + thread[tid]->lastGradIsBranch = true; + thread[tid]->lastBranchPC = inst->readPC(); + thread[tid]->lastBranchNextPC = inst->readNextPC(); + thread[tid]->lastBranchNextNPC = inst->readNextNPC(); + } else { + thread[tid]->lastGradIsBranch = false; + } + + // Finalize Trace Data For Instruction if (inst->traceData) { //inst->traceData->setCycle(curTick); @@ -1087,9 +1152,9 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) inst->traceData = NULL; } - // Set Last Graduated Instruction In Thread State - //thread[tid]->lastGradInst = inst; - + // Increment active thread's instruction count + instsPerSwitch++; + // Increment thread-state's instruction count thread[tid]->numInst++; @@ -1112,18 +1177,31 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) // Broadcast to other resources an instruction // has been completed - resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, tid); + resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, + 0, 0, tid); // Finally, remove instruction from CPU removeInst(inst); } +// currently unused function, but substitute repetitive code w/this function +// call void InOrderCPU::addToRemoveList(DynInstPtr &inst) { removeInstsThisCycle = true; - - removeList.push(inst->getInstListIt()); + if (!inst->isRemoveList()) { + DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x " + "[sn:%lli] to remove list\n", + inst->threadNumber, inst->readPC(), inst->seqNum); + inst->setRemoveList(); + removeList.push(inst->getInstListIt()); + } else { + DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x " + "[sn:%lli], already remove list\n", + inst->threadNumber, inst->readPC(), inst->seqNum); + } + } void @@ -1136,7 +1214,18 @@ InOrderCPU::removeInst(DynInstPtr &inst) removeInstsThisCycle = true; // Remove the instruction. - removeList.push(inst->getInstListIt()); + if (!inst->isRemoveList()) { + DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x " + "[sn:%lli] to remove list\n", + inst->threadNumber, inst->readPC(), inst->seqNum); + inst->setRemoveList(); + removeList.push(inst->getInstListIt()); + } else { + DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x " + "[sn:%lli], already on remove list\n", + inst->threadNumber, inst->readPC(), inst->seqNum); + } + } void @@ -1150,7 +1239,7 @@ InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid) inst_iter--; - DPRINTF(InOrderCPU, "Deleting instructions from CPU instruction " + DPRINTF(InOrderCPU, "Squashing instructions from CPU instruction " "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", tid, seq_num, (*inst_iter)->seqNum); @@ -1180,8 +1269,22 @@ InOrderCPU::squashInstIt(const ListIt &instIt, ThreadID tid) (*instIt)->setSquashed(); - removeList.push(instIt); + if (!(*instIt)->isRemoveList()) { + DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x " + "[sn:%lli] to remove list\n", + (*instIt)->threadNumber, (*instIt)->readPC(), + (*instIt)->seqNum); + (*instIt)->setRemoveList(); + removeList.push(instIt); + } else { + DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x " + "[sn:%lli], already on remove list\n", + (*instIt)->threadNumber, (*instIt)->readPC(), + (*instIt)->seqNum); + } + } + } @@ -1193,7 +1296,7 @@ InOrderCPU::cleanUpRemovedInsts() "[tid:%i] [sn:%lli] PC %#x\n", (*removeList.front())->threadNumber, (*removeList.front())->seqNum, - (*removeList.front())->readPC()); + (*removeList.front())->readPC()); DynInstPtr inst = *removeList.front(); ThreadID tid = inst->threadNumber; @@ -1221,11 +1324,6 @@ InOrderCPU::cleanUpRemovedInsts() instList[tid].erase(removeList.front()); removeList.pop(); - - DPRINTF(RefCount, "pop from remove list: [sn:%i]: Refcount = %i.\n", - inst->seqNum, - 0/*inst->curCount()*/); - } removeInstsThisCycle = false; @@ -1237,22 +1335,18 @@ InOrderCPU::cleanUpRemovedReqs() while (!reqRemoveList.empty()) { ResourceRequest *res_req = reqRemoveList.front(); - DPRINTF(RefCount, "[tid:%i]: Removing Request, " - "[sn:%lli] [slot:%i] [stage_num:%i] [res:%s] [refcount:%i].\n", + DPRINTF(InOrderCPU, "[tid:%i] [sn:%lli]: Removing Request " + "[stage_num:%i] [res:%s] [slot:%i] [completed:%i].\n", res_req->inst->threadNumber, res_req->inst->seqNum, - res_req->getSlot(), res_req->getStageNum(), res_req->res->name(), - 0/*res_req->inst->curCount()*/); + (res_req->isCompleted()) ? res_req->getComplSlot() : res_req->getSlot(), + res_req->isCompleted()); reqRemoveList.pop(); delete res_req; - - DPRINTF(RefCount, "after remove request: [sn:%i]: Refcount = %i.\n", - res_req->inst->seqNum, - 0/*res_req->inst->curCount()*/); } } @@ -1297,8 +1391,14 @@ InOrderCPU::wakeCPU() DPRINTF(Activity, "Waking up CPU\n"); - //@todo: figure out how to count idleCycles correctly - //idleCycles += (curTick - 1) - lastRunningCycle; + Tick extra_cycles = tickToCycles((curTick - 1) - lastRunningCycle); + + idleCycles += extra_cycles; + for (int stage_num = 0; stage_num < NumStages; stage_num++) { + pipelineStage[stage_num]->idleCycles += extra_cycles; + } + + numCycles += extra_cycles; mainEventQueue.schedule(&tickEvent, curTick); } @@ -1380,7 +1480,8 @@ InOrderCPU::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) { //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case // you want to run w/out caches? - CacheUnit *cache_res = dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx)); + CacheUnit *cache_res = + dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx)); return cache_res->read(inst, addr, data, flags); } @@ -1483,14 +1584,16 @@ InOrderCPU::write(DynInstPtr inst, uint8_t data, Addr addr, template<> Fault -InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res) +InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint64_t*)&data, addr, flags, res); } template<> Fault -InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res) +InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint32_t*)&data, addr, flags, res); } @@ -1498,7 +1601,8 @@ InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64 template<> Fault -InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res) +InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, (uint32_t)data, addr, flags, res); } diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 3320532ba..0c42f349e 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -89,17 +89,31 @@ class InOrderCPU : public BaseCPU typedef TimeBuffer<InterStageStruct> StageQueue; friend class Resource; - + public: /** Constructs a CPU with the given parameters. */ InOrderCPU(Params *params); - + /* Destructor */ + ~InOrderCPU(); + /** CPU ID */ int cpu_id; + // SE Mode ASIDs + ThreadID asid[ThePipeline::MaxThreads]; + /** Type of core that this is */ std::string coreType; + // Only need for SE MODE + enum ThreadModel { + Single, + SMT, + SwitchOnCacheMiss + }; + + ThreadModel threadModel; + int readCpuId() { return cpu_id; } void setCpuId(int val) { cpu_id = val; } @@ -117,7 +131,6 @@ class InOrderCPU : public BaseCPU /** Overall CPU status. */ Status _status; - private: /** Define TickEvent for the CPU */ class TickEvent : public Event @@ -144,9 +157,11 @@ class InOrderCPU : public BaseCPU void scheduleTickEvent(int delay) { if (tickEvent.squashed()) - mainEventQueue.reschedule(&tickEvent, nextCycle(curTick + ticks(delay))); + mainEventQueue.reschedule(&tickEvent, + nextCycle(curTick + ticks(delay))); else if (!tickEvent.scheduled()) - mainEventQueue.schedule(&tickEvent, nextCycle(curTick + ticks(delay))); + mainEventQueue.schedule(&tickEvent, + nextCycle(curTick + ticks(delay))); } /** Unschedule tick event, regardless of its current state. */ @@ -165,15 +180,13 @@ class InOrderCPU : public BaseCPU // pool event. enum CPUEventType { ActivateThread, - DeallocateThread, + ActivateNextReadyThread, + DeactivateThread, + HaltThread, SuspendThread, - DisableThreads, - EnableThreads, - DisableVPEs, - EnableVPEs, Trap, InstGraduated, - SquashAll, + SquashFromMemStall, UpdatePCs, NumCPUEvents }; @@ -189,22 +202,24 @@ class InOrderCPU : public BaseCPU public: CPUEventType cpuEventType; ThreadID tid; - unsigned vpe; + DynInstPtr inst; Fault fault; - + unsigned vpe; + public: /** Constructs a CPU event. */ CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, Fault fault, - ThreadID _tid, unsigned _vpe); + ThreadID _tid, DynInstPtr inst, unsigned event_pri_offset); /** Set Type of Event To Be Scheduled */ void setEvent(CPUEventType e_type, Fault _fault, ThreadID _tid, - unsigned _vpe) + DynInstPtr _inst) { fault = _fault; cpuEventType = e_type; tid = _tid; - vpe = _vpe; + inst = _inst; + vpe = 0; } /** Processes a resource event. */ @@ -222,17 +237,21 @@ class InOrderCPU : public BaseCPU /** Schedule a CPU Event */ void scheduleCpuEvent(CPUEventType cpu_event, Fault fault, ThreadID tid, - unsigned vpe, unsigned delay = 0); + DynInstPtr inst, unsigned delay = 0, + unsigned event_pri_offset = 0); public: /** Interface between the CPU and CPU resources. */ ResourcePool *resPool; - /** Instruction used to signify that there is no *real* instruction in buffer slot */ + /** Instruction used to signify that there is no *real* instruction in + buffer slot */ + DynInstPtr dummyInst[ThePipeline::MaxThreads]; DynInstPtr dummyBufferInst; + DynInstPtr dummyReqInst; /** Used by resources to signify a denied access to a resource. */ - ResourceRequest *dummyReq; + ResourceRequest *dummyReq[ThePipeline::MaxThreads]; /** Identifies the resource id that identifies a fetch * access unit. @@ -331,26 +350,39 @@ class InOrderCPU : public BaseCPU void trap(Fault fault, ThreadID tid, int delay = 0); void trapCPU(Fault fault, ThreadID tid); - /** Setup CPU to insert a thread's context */ - void insertThread(ThreadID tid); - - /** Remove all of a thread's context from CPU */ - void removeThread(ThreadID tid); - /** Add Thread to Active Threads List. */ void activateContext(ThreadID tid, int delay = 0); void activateThread(ThreadID tid); + void activateThreadInPipeline(ThreadID tid); + + /** Add Thread to Active Threads List. */ + void activateNextReadyContext(int delay = 0); + void activateNextReadyThread(); + + /** Remove from Active Thread List */ + void deactivateContext(ThreadID tid, int delay = 0); + void deactivateThread(ThreadID tid); - /** Remove Thread from Active Threads List */ + /** Suspend Thread, Remove from Active Threads List, Add to Suspend List */ void suspendContext(ThreadID tid, int delay = 0); void suspendThread(ThreadID tid); - /** Remove Thread from Active Threads List && - * Remove Thread Context from CPU. + /** Halt Thread, Remove from Active Thread List, Place Thread on Halted + * Threads List */ - void deallocateContext(ThreadID tid, int delay = 0); - void deallocateThread(ThreadID tid); - void deactivateThread(ThreadID tid); + void haltContext(ThreadID tid, int delay = 0); + void haltThread(ThreadID tid); + + /** squashFromMemStall() - sets up a squash event + * squashDueToMemStall() - squashes pipeline + * @note: maybe squashContext/squashThread would be better? + */ + void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0); + void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid); + + void removePipelineStalls(ThreadID tid); + void squashThreadInPipeline(ThreadID tid); + void squashBehindMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid); PipelineStage* getPipeStage(int stage_num); @@ -361,37 +393,6 @@ class InOrderCPU : public BaseCPU return 0; } - /** Remove Thread from Active Threads List && - * Remove Thread Context from CPU. - */ - void haltContext(ThreadID tid, int delay = 0); - - void removePipelineStalls(ThreadID tid); - - void squashThreadInPipeline(ThreadID tid); - - /// Notify the CPU to enable a virtual processor element. - virtual void enableVirtProcElement(unsigned vpe); - void enableVPEs(unsigned vpe); - - /// Notify the CPU to disable a virtual processor element. - virtual void disableVirtProcElement(ThreadID tid, unsigned vpe); - void disableVPEs(ThreadID tid, unsigned vpe); - - /// Notify the CPU that multithreading is enabled. - virtual void enableMultiThreading(unsigned vpe); - void enableThreads(unsigned vpe); - - /// Notify the CPU that multithreading is disabled. - virtual void disableMultiThreading(ThreadID tid, unsigned vpe); - void disableThreads(ThreadID tid, unsigned vpe); - - /** Activate a Thread When CPU Resources are Available. */ - void activateWhenReady(ThreadID tid); - - /** Add or Remove a Thread Context in the CPU. */ - void doContextSwitch(); - /** Update The Order In Which We Process Threads. */ void updateThreadPriority(); @@ -420,7 +421,11 @@ class InOrderCPU : public BaseCPU /** Get & Update Next Event Number */ InstSeqNum getNextEventNum() { +#ifdef DEBUG return cpuEventNum++; +#else + return 0; +#endif } /** Register file accessors */ @@ -550,8 +555,8 @@ class InOrderCPU : public BaseCPU */ std::queue<ListIt> removeList; - /** List of all the resource requests that will be removed at the end of this - * cycle. + /** List of all the resource requests that will be removed at the end + * of this cycle. */ std::queue<ResourceRequest*> reqRemoveList; @@ -585,18 +590,19 @@ class InOrderCPU : public BaseCPU /** Active Threads List */ std::list<ThreadID> activeThreads; - /** Current Threads List */ - std::list<ThreadID> currentThreads; + /** Ready Threads List */ + std::list<ThreadID> readyThreads; /** Suspended Threads List */ std::list<ThreadID> suspendedThreads; - /** Thread Status Functions (Unused Currently) */ - bool isThreadInCPU(ThreadID tid); + /** Halted Threads List */ + std::list<ThreadID> haltedThreads; + + /** Thread Status Functions */ bool isThreadActive(ThreadID tid); + bool isThreadReady(ThreadID tid); bool isThreadSuspended(ThreadID tid); - void addToCurrentThreads(ThreadID tid); - void removeFromCurrentThreads(ThreadID tid); private: /** The activity recorder; used to tell if the CPU has any @@ -609,6 +615,19 @@ class InOrderCPU : public BaseCPU /** Number of Active Threads in the CPU */ ThreadID numActiveThreads() { return activeThreads.size(); } + /** Thread id of active thread + * Only used for SwitchOnCacheMiss model. + * Assumes only 1 thread active + */ + ThreadID activeThreadId() + { + if (numActiveThreads() > 0) + return activeThreads.front(); + else + return InvalidThreadID; + } + + /** Records that there was time buffer activity this cycle. */ void activityThisCycle() { activityRec.activity(); } @@ -627,13 +646,14 @@ class InOrderCPU : public BaseCPU virtual void wakeup(); #endif - /** Gets a free thread id. Use if thread ids change across system. */ - ThreadID getFreeTid(); - // LL/SC debug functionality unsigned stCondFails; - unsigned readStCondFailures() { return stCondFails; } - unsigned setStCondFailures(unsigned st_fails) { return stCondFails = st_fails; } + + unsigned readStCondFailures() + { return stCondFails; } + + unsigned setStCondFailures(unsigned st_fails) + { return stCondFails = st_fails; } /** Returns a pointer to a thread context. */ ThreadContext *tcBase(ThreadID tid = 0) @@ -663,9 +683,16 @@ class InOrderCPU : public BaseCPU /** The global sequence number counter. */ InstSeqNum globalSeqNum[ThePipeline::MaxThreads]; +#ifdef DEBUG /** The global event number counter. */ InstSeqNum cpuEventNum; + /** Number of resource requests active in CPU **/ + unsigned resReqCount; + + Stats::Scalar maxResReqCount; +#endif + /** Counter of how many stages have completed switching out. */ int switchCount; @@ -684,18 +711,14 @@ class InOrderCPU : public BaseCPU /** Per-Stage Instruction Tracing */ bool stageTracing; - /** Is there a context switch pending? */ - bool contextSwitch; - - /** Threads Scheduled to Enter CPU */ - std::list<int> cpuWaitList; - /** The cycle that the CPU was last running, used for statistics. */ Tick lastRunningCycle; - /** Number of Virtual Processors the CPU can process */ - unsigned numVirtProcs; - + void updateContextSwitchStats(); + unsigned instsPerSwitch; + Stats::Average instsPerCtxtSwitch; + Stats::Scalar numCtxtSwitches; + /** Update Thread , used for statistic purposes*/ inline void tickThreadStats(); @@ -708,9 +731,15 @@ class InOrderCPU : public BaseCPU /** Stat for total number of times the CPU is descheduled. */ Stats::Scalar timesIdled; - /** Stat for total number of cycles the CPU spends descheduled. */ + /** Stat for total number of cycles the CPU spends descheduled or no stages active. */ Stats::Scalar idleCycles; + /** Stat for total number of cycles the CPU is active. */ + Stats::Scalar runCycles; + + /** Percentage of cycles a stage was active */ + Stats::Formula activity; + /** Stat for the number of committed instructions per thread. */ Stats::Vector committedInsts; diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc index 8bd703c56..658ce37d3 100644 --- a/src/cpu/inorder/first_stage.cc +++ b/src/cpu/inorder/first_stage.cc @@ -67,11 +67,12 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid) // Clear the instruction list and skid buffer in case they have any // insts in them. - DPRINTF(InOrderStage, "Removing instructions from stage instruction list.\n"); + DPRINTF(InOrderStage, "Removing instructions from stage instruction " + "list.\n"); while (!insts[tid].empty()) { if (insts[tid].front()->seqNum <= squash_seq_num) { - DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because it's <= " - "squashing seqNum %i.\n", + DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because " + "it's <= squashing seqNum %i.\n", tid, insts[tid].front()->seqNum, squash_seq_num); @@ -82,8 +83,9 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid) insts[tid].size()); break; } - DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n", - tid, insts[tid].front()->seqNum, insts[tid].front()->PC); + DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] " + "PC %08p.\n", tid, insts[tid].front()->seqNum, + insts[tid].front()->PC); insts[tid].pop(); } @@ -93,6 +95,18 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid) cpu->removeInstsUntil(squash_seq_num, tid); } +void +FirstStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid) +{ + // Need to preserve the stalling instruction in first-stage + // since the squash() from first stage also removes + // the instruction from the CPU (removeInstsUntil). If that + // functionality gets changed then you can move this offset. + // (stalling instruction = seq_num + 1) + squash(seq_num+1, tid); +} + + void FirstStage::processStage(bool &status_change) { @@ -104,8 +118,9 @@ FirstStage::processStage(bool &status_change) status_change = checkSignalsAndUpdate(tid) || status_change; } - for (int threadFetched = 0; threadFetched < numFetchingThreads; - threadFetched++) { + for (int insts_fetched = 0; + insts_fetched < stageWidth && canSendInstToStage(1); + insts_fetched++) { ThreadID tid = getFetchingThread(fetchPolicy); if (tid >= 0) { @@ -115,16 +130,28 @@ FirstStage::processStage(bool &status_change) DPRINTF(InOrderStage, "No more threads to fetch from.\n"); } } + + if (instsProcessed > 0) { + ++runCycles; + idle = false; + } else { + ++idleCycles; + idle = true; + } + } -//@TODO: Note in documentation, that when you make a pipeline stage change, then -//make sure you change the first stage too +//@TODO: Note in documentation, that when you make a pipeline stage change, +//then make sure you change the first stage too void FirstStage::processInsts(ThreadID tid) { bool all_reqs_completed = true; - for (int insts_fetched = 0; insts_fetched < stageWidth && canSendInstToStage(1); insts_fetched++) { + for (int insts_fetched = 0; + insts_fetched < stageWidth && canSendInstToStage(1); + insts_fetched++) { + DynInstPtr inst; bool new_inst = false; @@ -150,26 +177,21 @@ FirstStage::processInsts(ThreadID tid) inst->traceData = NULL; #endif // TRACING_ON - DPRINTF(RefCount, "creation: [tid:%i]: [sn:%i]: Refcount = %i.\n", - inst->readTid(), - inst->seqNum, - 0/*inst->curCount()*/); - // Add instruction to the CPU's list of instructions. inst->setInstListIt(cpu->addInst(inst)); - DPRINTF(RefCount, "after add to CPU List: [tid:%i]: [sn:%i]: Refcount = %i.\n", - inst->readTid(), - inst->seqNum, - 0/*inst->curCount()*/); - // Create Front-End Resource Schedule For Instruction ThePipeline::createFrontEndSchedule(inst); } - // Don't let instruction pass to next stage if it hasnt completed - // all of it's requests for this stage. - all_reqs_completed = processInstSchedule(inst); + int reqs_processed = 0; + all_reqs_completed = processInstSchedule(inst, reqs_processed); + + // If the instruction isnt squashed & we've completed one request + // Then we can officially count this instruction toward the stage's + // bandwidth count + if (reqs_processed > 0) + instsProcessed++; if (!all_reqs_completed) { if (new_inst) { @@ -184,7 +206,6 @@ FirstStage::processInsts(ThreadID tid) } sendInstToNextStage(inst); - //++stageProcessedInsts; } // Record that stage has written to the time buffer for activity @@ -197,11 +218,12 @@ FirstStage::processInsts(ThreadID tid) ThreadID FirstStage::getFetchingThread(FetchPriority &fetch_priority) { - if (numThreads > 1) { - switch (fetch_priority) { + ThreadID num_active_threads = cpu->numActiveThreads(); + if (num_active_threads > 1) { + switch (fetch_priority) { case SingleThread: - return 0; + return cpu->activeThreadId(); case RoundRobin: return roundRobin(); @@ -209,7 +231,7 @@ FirstStage::getFetchingThread(FetchPriority &fetch_priority) default: return InvalidThreadID; } - } else { + } else if (num_active_threads == 1) { ThreadID tid = *activeThreads->begin(); if (stageStatus[tid] == Running || @@ -218,8 +240,9 @@ FirstStage::getFetchingThread(FetchPriority &fetch_priority) } else { return InvalidThreadID; } - } - + } else { + return InvalidThreadID; + } } ThreadID diff --git a/src/cpu/inorder/first_stage.hh b/src/cpu/inorder/first_stage.hh index 2a69678e4..383b799f3 100644 --- a/src/cpu/inorder/first_stage.hh +++ b/src/cpu/inorder/first_stage.hh @@ -61,6 +61,8 @@ class FirstStage : public PipelineStage { /** Squash Instructions Above a Seq. Num */ void squash(InstSeqNum squash_seq_num, ThreadID tid); + void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid); + /** There are no insts. coming from previous stages, so there is * no need to sort insts here */ diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index 5ab839615..1b55c90e0 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -111,7 +111,13 @@ InOrderDynInst::initVars() { fetchMemReq = NULL; dataMemReq = NULL; - + splitMemData = NULL; + split2ndAddr = 0; + split2ndAccess = false; + splitInst = false; + splitInstSked = false; + splitFinishCnt = 0; + effAddr = 0; physEffAddr = 0; @@ -159,7 +165,7 @@ InOrderDynInst::initVars() // Update Instruction Count for this instruction ++instcount; - if (instcount > 500) { + if (instcount > 100) { fatal("Number of Active Instructions in CPU is too high. " "(Not Dereferencing Ptrs. Correctly?)\n"); } @@ -170,6 +176,12 @@ InOrderDynInst::initVars() threadNumber, seqNum, instcount); } +void +InOrderDynInst::resetInstCount() +{ + instcount = 0; +} + InOrderDynInst::~InOrderDynInst() { @@ -187,6 +199,10 @@ InOrderDynInst::~InOrderDynInst() delete traceData; } + if (splitMemData) { + delete [] splitMemData; + } + fault = NoFault; --instcount; @@ -583,30 +599,6 @@ InOrderDynInst::deallocateContext(int thread_num) this->cpu->deallocateContext(thread_num); } -void -InOrderDynInst::enableVirtProcElement(unsigned vpe) -{ - this->cpu->enableVirtProcElement(vpe); -} - -void -InOrderDynInst::disableVirtProcElement(unsigned vpe) -{ - this->cpu->disableVirtProcElement(threadNumber, vpe); -} - -void -InOrderDynInst::enableMultiThreading(unsigned vpe) -{ - this->cpu->enableMultiThreading(vpe); -} - -void -InOrderDynInst::disableMultiThreading(unsigned vpe) -{ - this->cpu->disableMultiThreading(threadNumber, vpe); -} - template<class T> inline Fault InOrderDynInst::read(Addr addr, T &data, unsigned flags) diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index 522b4e8d7..8c9cd69e0 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -164,6 +164,7 @@ class InOrderDynInst : public FastAlloc, public RefCounted /// instructions ahead of it SerializeAfter, /// Needs to serialize instructions behind it SerializeHandled, /// Serialization has been handled + RemoveList, /// Is Instruction on Remove List? NumStatus }; @@ -330,6 +331,20 @@ class InOrderDynInst : public FastAlloc, public RefCounted public: Tick memTime; + PacketDataPtr splitMemData; + RequestPtr splitMemReq; + int splitTotalSize; + int split2ndSize; + Addr split2ndAddr; + bool split2ndAccess; + uint8_t split2ndData; + PacketDataPtr split2ndDataPtr; + unsigned split2ndFlags; + bool splitInst; + int splitFinishCnt; + uint64_t *split2ndStoreDataPtr; + bool splitInstSked; + //////////////////////////////////////////////////////////// // // BASE INSTRUCTION INFORMATION. @@ -468,7 +483,10 @@ class InOrderDynInst : public FastAlloc, public RefCounted if (!resSched.empty()) { ThePipeline::ScheduleEntry* sked = resSched.top(); resSched.pop(); - delete sked; + if (sked != 0) { + delete sked; + + } } } @@ -515,12 +533,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted //////////////////////////////////////////////////////////// virtual void deallocateContext(int thread_num); - virtual void enableVirtProcElement(unsigned vpe); - virtual void disableVirtProcElement(unsigned vpe); - - virtual void enableMultiThreading(unsigned vpe); - virtual void disableMultiThreading(unsigned vpe); - //////////////////////////////////////////////////////////// // // PROGRAM COUNTERS - PC/NPC/NPC @@ -905,6 +917,12 @@ class InOrderDynInst : public FastAlloc, public RefCounted /** Returns whether or not the entry is on the CPU Reg Dep Map */ bool isRegDepEntry() const { return status[RegDepMapEntry]; } + /** Sets this instruction as entered on the CPU Reg Dep Map */ + void setRemoveList() { status.set(RemoveList); } + + /** Returns whether or not the entry is on the CPU Reg Dep Map */ + bool isRemoveList() const { return status[RemoveList]; } + /** Sets this instruction as completed. */ void setCompleted() { status.set(Completed); } @@ -1022,14 +1040,15 @@ class InOrderDynInst : public FastAlloc, public RefCounted /** Count of total number of dynamic instructions. */ static int instcount; + void resetInstCount(); + /** Dumps out contents of this BaseDynInst. */ void dump(); /** Dumps out contents of this BaseDynInst into given string. */ void dump(std::string &outstring); - - //inline int curCount() { return curCount(); } + //inline int curCount() { return curCount(); } }; diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index dc0378bf3..dcf4d81bf 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -42,8 +42,11 @@ PipelineStage::PipelineStage(Params *params, unsigned stage_num) : stageNum(stage_num), stageWidth(ThePipeline::StageWidth), numThreads(ThePipeline::MaxThreads), _status(Inactive), stageBufferMax(ThePipeline::interStageBuffSize[stage_num]), - prevStageValid(false), nextStageValid(false) + prevStageValid(false), nextStageValid(false), idle(false) { + switchedOutBuffer.resize(ThePipeline::MaxThreads); + switchedOutValid.resize(ThePipeline::MaxThreads); + init(params); } @@ -69,41 +72,27 @@ PipelineStage::init(Params *params) std::string PipelineStage::name() const { - return cpu->name() + ".stage-" + to_string(stageNum); + return cpu->name() + ".stage-" + to_string(stageNum); } void PipelineStage::regStats() { -/* stageIdleCycles - .name(name() + ".IdleCycles") - .desc("Number of cycles stage is idle") - .prereq(stageIdleCycles); - stageBlockedCycles - .name(name() + ".BlockedCycles") - .desc("Number of cycles stage is blocked") - .prereq(stageBlockedCycles); - stageRunCycles - .name(name() + ".RunCycles") - .desc("Number of cycles stage is running") - .prereq(stageRunCycles); - stageUnblockCycles - .name(name() + ".UnblockCycles") - .desc("Number of cycles stage is unblocking") - .prereq(stageUnblockCycles); - stageSquashCycles - .name(name() + ".SquashCycles") - .desc("Number of cycles stage is squashing") - .prereq(stageSquashCycles); - stageProcessedInsts - .name(name() + ".ProcessedInsts") - .desc("Number of instructions handled by stage") - .prereq(stageProcessedInsts); - stageSquashedInsts - .name(name() + ".SquashedInsts") - .desc("Number of squashed instructions handled by stage") - .prereq(stageSquashedInsts);*/ + idleCycles + .name(name() + ".idleCycles") + .desc("Number of cycles 0 instructions are processed."); + + runCycles + .name(name() + ".runCycles") + .desc("Number of cycles 1+ instructions are processed."); + + utilization + .name(name() + ".utilization") + .desc("Percentage of cycles stage was utilized (processing insts).") + .precision(6); + utilization = (runCycles / cpu->numCycles) * 100; + } @@ -112,8 +101,6 @@ PipelineStage::setCPU(InOrderCPU *cpu_ptr) { cpu = cpu_ptr; - dummyBufferInst = new InOrderDynInst(cpu_ptr, NULL, 0, 0, 0); - DPRINTF(InOrderStage, "Set CPU pointer.\n"); tracer = dynamic_cast<Trace::InOrderTrace *>(cpu->getTracer()); @@ -267,7 +254,8 @@ PipelineStage::isBlocked(ThreadID tid) bool PipelineStage::block(ThreadID tid) { - DPRINTF(InOrderStage, "[tid:%d]: Blocking, sending block signal back to previous stages.\n", tid); + DPRINTF(InOrderStage, "[tid:%d]: Blocking, sending block signal back to " + "previous stages.\n", tid); // Add the current inputs to the skid buffer so they can be // reprocessed when this stage unblocks. @@ -296,7 +284,8 @@ PipelineStage::block(ThreadID tid) void PipelineStage::blockDueToBuffer(ThreadID tid) { - DPRINTF(InOrderStage, "[tid:%d]: Blocking instructions from passing to next stage.\n", tid); + DPRINTF(InOrderStage, "[tid:%d]: Blocking instructions from passing to " + "next stage.\n", tid); if (stageStatus[tid] != Blocked) { // Set the status to Blocked. @@ -334,8 +323,9 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid) { if (cpu->squashSeqNum[tid] < inst->seqNum && cpu->lastSquashCycle[tid] == curTick){ - DPRINTF(Resource, "Ignoring [sn:%i] squash signal due to another stage's squash " - "signal for after [sn:%i].\n", inst->seqNum, cpu->squashSeqNum[tid]); + DPRINTF(Resource, "Ignoring [sn:%i] branch squash signal due to " + "another stage's squash signal for after [sn:%i].\n", + inst->seqNum, cpu->squashSeqNum[tid]); } else { // Send back mispredict information. toPrevStages->stageInfo[stageNum][tid].branchMispredict = true; @@ -346,20 +336,28 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid) #if ISA_HAS_DELAY_SLOT - toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextNPC() != + toPrevStages->stageInfo[stageNum][tid].branchTaken = + inst->readNextNPC() != (inst->readNextPC() + sizeof(TheISA::MachInst)); - toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->bdelaySeqNum; + + toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = + inst->bdelaySeqNum; + InstSeqNum squash_seq_num = inst->bdelaySeqNum; #else - toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextPC() != + toPrevStages->stageInfo[stageNum][tid].branchTaken = + inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->seqNum; InstSeqNum squash_seq_num = inst->seqNum; #endif - DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg()); - DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], due to [sn:%i] " - "branch.\n", tid, squash_seq_num, inst->seqNum); + DPRINTF(InOrderStage, "Target being re-set to %08p\n", + inst->readPredTarg()); + DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], " + "due to [sn:%i] branch.\n", tid, squash_seq_num, + inst->seqNum); // Save squash num for later stage use cpu->squashSeqNum[tid] = squash_seq_num; @@ -368,6 +366,12 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid) } void +PipelineStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid) +{ + squash(seq_num, tid); +} + +void PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid) { DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from " @@ -376,12 +380,15 @@ PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid) for (int i=0; i < prevStage->size; i++) { if (prevStage->insts[i]->threadNumber == tid && prevStage->insts[i]->seqNum > squash_seq_num) { + // Change Comment to Annulling previous instruction DPRINTF(InOrderStage, "[tid:%i]: Squashing instruction, " "[sn:%i] PC %08p.\n", tid, prevStage->insts[i]->seqNum, prevStage->insts[i]->readPC()); prevStage->insts[i]->setSquashed(); + + prevStage->insts[i] = cpu->dummyBufferInst; } } } @@ -394,18 +401,20 @@ PipelineStage::squash(InstSeqNum squash_seq_num, ThreadID tid) squashPrevStageInsts(squash_seq_num, tid); - DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from incoming stage skidbuffer.\n", - tid); + DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from incoming stage" + " skidbuffer.\n", tid); while (!skidBuffer[tid].empty()) { if (skidBuffer[tid].front()->seqNum <= squash_seq_num) { DPRINTF(InOrderStage, "[tid:%i]: Cannot remove skidBuffer " - "instructions before delay slot [sn:%i]. %i insts" - "left.\n", tid, squash_seq_num, + "instructions (starting w/[sn:%i]) before delay slot " + "[sn:%i]. %i insts left.\n", tid, + skidBuffer[tid].front()->seqNum, squash_seq_num, skidBuffer[tid].size()); break; } - DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n", - tid, skidBuffer[tid].front()->seqNum, skidBuffer[tid].front()->PC); + DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] " + " PC %08p.\n", tid, skidBuffer[tid].front()->seqNum, + skidBuffer[tid].front()->PC); skidBuffer[tid].pop(); } @@ -427,7 +436,8 @@ PipelineStage::stageBufferAvail() int avail = stageBufferMax - total -0;// incoming_insts; if (avail < 0) - fatal("stageNum %i:stageBufferAvail() < 0...stBMax=%i,total=%i,incoming=%i=>%i", + fatal("stageNum %i:stageBufferAvail() < 0..." + "stBMax=%i,total=%i,incoming=%i=>%i", stageNum, stageBufferMax, total, incoming_insts, avail); return avail; @@ -443,7 +453,8 @@ PipelineStage::canSendInstToStage(unsigned stage_num) } if (!buffer_avail && nextStageQueueValid(stage_num)) { - DPRINTF(InOrderStall, "STALL: No room in stage %i buffer.\n", stageNum + 1); + DPRINTF(InOrderStall, "STALL: No room in stage %i buffer.\n", + stageNum + 1); } return buffer_avail; @@ -461,8 +472,9 @@ PipelineStage::skidInsert(ThreadID tid) assert(tid == inst->threadNumber); - DPRINTF(InOrderStage,"[tid:%i]: Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n", - tid, inst->seqNum, inst->readPC(), inst->threadNumber); + DPRINTF(InOrderStage,"[tid:%i]: Inserting [sn:%lli] PC:%#x into stage " + "skidBuffer %i\n", tid, inst->seqNum, inst->readPC(), + inst->threadNumber); skidBuffer[tid].push(inst); } @@ -533,6 +545,39 @@ PipelineStage::updateStatus() } } +void +PipelineStage::activateThread(ThreadID tid) +{ + if (cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { + if (!switchedOutValid[tid]) { + DPRINTF(InOrderStage, "[tid:%i] No instruction available in " + "switch out buffer.\n", tid); + } else { + DynInstPtr inst = switchedOutBuffer[tid]; + + DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into " + "stage skidBuffer %i\n", tid, inst->seqNum, + inst->readPC(), inst->threadNumber); + + // Make instruction available for pipeline processing + skidBuffer[tid].push(inst); + + // Update PC so that we start fetching after this instruction to prevent + // "double"-execution of instructions + cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType) + ResourcePool::UpdateAfterContextSwitch, + inst, 0, 0, tid); + + // Clear switchout buffer + switchedOutBuffer[tid] = NULL; + switchedOutValid[tid] = false; + + // Update any CPU stats based off context switches + cpu->updateContextSwitchStats(); + } + } + +} void @@ -547,16 +592,16 @@ PipelineStage::sortInsts() for (int i = 0; i < insts_from_prev_stage; ++i) { if (prevStage->insts[i]->isSquashed()) { - DPRINTF(InOrderStage, "[tid:%i]: Ignoring squashed [sn:%i], not inserting " - "into stage buffer.\n", + DPRINTF(InOrderStage, "[tid:%i]: Ignoring squashed [sn:%i], " + "not inserting into stage buffer.\n", prevStage->insts[i]->readTid(), prevStage->insts[i]->seqNum); continue; } - DPRINTF(InOrderStage, "[tid:%i]: Inserting [sn:%i] into stage buffer.\n", - prevStage->insts[i]->readTid(), + DPRINTF(InOrderStage, "[tid:%i]: Inserting [sn:%i] into stage " + "buffer.\n", prevStage->insts[i]->readTid(), prevStage->insts[i]->seqNum); ThreadID tid = prevStage->insts[i]->threadNumber; @@ -565,7 +610,7 @@ PipelineStage::sortInsts() skidBuffer[tid].push(prevStage->insts[i]); - prevStage->insts[i] = dummyBufferInst; + prevStage->insts[i] = cpu->dummyBufferInst; } } @@ -611,8 +656,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid) // Check for squash from later pipeline stages for (int stage_idx=stageNum; stage_idx < NumStages; stage_idx++) { if (fromNextStages->stageInfo[stage_idx][tid].squash) { - DPRINTF(InOrderStage, "[tid:%u]: Squashing instructions due to squash " - "from stage %u.\n", tid, stage_idx); + DPRINTF(InOrderStage, "[tid:%u]: Squashing instructions due to " + "squash from stage %u.\n", tid, stage_idx); InstSeqNum squash_seq_num = fromNextStages-> stageInfo[stage_idx][tid].bdelayDoneSeqNum; squash(squash_seq_num, tid); @@ -625,8 +670,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid) } if (stageStatus[tid] == Blocked) { - DPRINTF(InOrderStage, "[tid:%u]: Done blocking, switching to unblocking.\n", - tid); + DPRINTF(InOrderStage, "[tid:%u]: Done blocking, switching to " + "unblocking.\n", tid); stageStatus[tid] = Unblocking; @@ -637,15 +682,15 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid) if (stageStatus[tid] == Squashing) { if (!skidBuffer[tid].empty()) { - DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to unblocking.\n", - tid); + DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to " + "unblocking.\n", tid); stageStatus[tid] = Unblocking; } else { // Switch status to running if stage isn't being told to block or // squash this cycle. - DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to running.\n", - tid); + DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to " + "running.\n", tid); stageStatus[tid] = Running; } @@ -663,6 +708,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid) void PipelineStage::tick() { + idle = false; + wroteToTimeBuffer = false; bool status_change = false; @@ -671,9 +718,11 @@ PipelineStage::tick() nextStage->size = 0; toNextStageIndex = 0; - + sortInsts(); + instsProcessed = 0; + processStage(status_change); if (status_change) { @@ -717,13 +766,13 @@ PipelineStage::unsetResStall(ResReqPtr res_req, ThreadID tid) } if (stalls[tid].resources.size() == 0) { - DPRINTF(InOrderStage, "[tid:%u]: There are no remaining resource stalls.\n", - tid); + DPRINTF(InOrderStage, "[tid:%u]: There are no remaining resource" + "stalls.\n", tid); } } -// @TODO: Update How we handled threads in CPU. Maybe threads shouldnt be handled -// one at a time, but instead first come first serve by instruction? +// @TODO: Update How we handled threads in CPU. Maybe threads shouldnt be +// handled one at a time, but instead first come first serve by instruction? // Questions are how should a pipeline stage handle thread-specific stalls & // pipeline squashes void @@ -746,30 +795,32 @@ PipelineStage::processStage(bool &status_change) nextStage->size, stageNum + 1); } + if (instsProcessed > 0) { + ++runCycles; + idle = false; + } else { + ++idleCycles; + idle = true; + } + DPRINTF(InOrderStage, "%i left in stage %i incoming buffer.\n", skidSize(), stageNum); - DPRINTF(InOrderStage, "%i available in stage %i incoming buffer.\n", stageBufferAvail(), - stageNum); + DPRINTF(InOrderStage, "%i available in stage %i incoming buffer.\n", + stageBufferAvail(), stageNum); } void PipelineStage::processThread(bool &status_change, ThreadID tid) { // If status is Running or idle, - // call stageInsts() + // call processInsts() // If status is Unblocking, // buffer any instructions coming from fetch - // continue trying to empty skid buffer + // continue trying to empty skid buffer // check if stall conditions have passed - if (stageStatus[tid] == Blocked) { - ;//++stageBlockedCycles; - } else if (stageStatus[tid] == Squashing) { - ;//++stageSquashCycles; - } - - // Stage should try to stage as many instructions as its bandwidth + // Stage should try to process as many instructions as its bandwidth // will allow, as long as it is not currently blocked. if (stageStatus[tid] == Running || stageStatus[tid] == Idle) { @@ -810,26 +861,22 @@ PipelineStage::processInsts(ThreadID tid) if (insts_available == 0) { DPRINTF(InOrderStage, "[tid:%u]: Nothing to do, breaking out" " early.\n",tid); - // Should I change the status to idle? - //++stageIdleCycles; return; } DynInstPtr inst; bool last_req_completed = true; - int insts_processed = 0; - while (insts_available > 0 && - insts_processed < stageWidth && + instsProcessed < stageWidth && (!nextStageValid || canSendInstToStage(stageNum+1)) && last_req_completed) { assert(!insts_to_stage.empty()); inst = insts_to_stage.front(); - DPRINTF(InOrderStage, "[tid:%u]: Processing instruction [sn:%lli] with " - "PC %#x\n", + DPRINTF(InOrderStage, "[tid:%u]: Processing instruction [sn:%lli] " + "with PC %#x\n", tid, inst->seqNum, inst->readPC()); if (inst->isSquashed()) { @@ -837,8 +884,6 @@ PipelineStage::processInsts(ThreadID tid) "squashed, skipping.\n", tid, inst->seqNum, inst->readPC()); - //++stageSquashedInsts; - insts_to_stage.pop(); --insts_available; @@ -846,8 +891,14 @@ PipelineStage::processInsts(ThreadID tid) continue; } + int reqs_processed = 0; + last_req_completed = processInstSchedule(inst, reqs_processed); - last_req_completed = processInstSchedule(inst); + // If the instruction isnt squashed & we've completed one request + // Then we can officially count this instruction toward the stage's + // bandwidth count + if (reqs_processed > 0) + instsProcessed++; // Don't let instruction pass to next stage if it hasnt completed // all of it's requests for this stage. @@ -856,16 +907,13 @@ PipelineStage::processInsts(ThreadID tid) // Send to Next Stage or Break Loop if (nextStageValid && !sendInstToNextStage(inst)) { - DPRINTF(InOrderStage, "[tid:%i] [sn:%i] unable to proceed to stage %i.\n", - tid, inst->seqNum,inst->nextStage); + DPRINTF(InOrderStage, "[tid:%i] [sn:%i] unable to proceed to stage" + " %i.\n", tid, inst->seqNum,inst->nextStage); break; } - insts_processed++; - insts_to_stage.pop(); - //++stageProcessedInsts; --insts_available; } @@ -883,12 +931,10 @@ PipelineStage::processInsts(ThreadID tid) } bool -PipelineStage::processInstSchedule(DynInstPtr inst) +PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) { bool last_req_completed = true; -#if TRACING_ON ThreadID tid = inst->readTid(); -#endif if (inst->nextResStage() == stageNum) { int res_stage_num = inst->nextResStage(); @@ -897,14 +943,15 @@ PipelineStage::processInstSchedule(DynInstPtr inst) int res_num = inst->nextResource(); - DPRINTF(InOrderStage, "[tid:%i]: [sn:%i]: sending request to %s.\n", - tid, inst->seqNum, cpu->resPool->name(res_num)); + DPRINTF(InOrderStage, "[tid:%i]: [sn:%i]: sending request to %s." + "\n", tid, inst->seqNum, cpu->resPool->name(res_num)); ResReqPtr req = cpu->resPool->request(res_num, inst); if (req->isCompleted()) { - DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s completed.\n", - tid, inst->seqNum, cpu->resPool->name(res_num)); + DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s " + "completed.\n", tid, inst->seqNum, + cpu->resPool->name(res_num)); if (req->fault == NoFault) { inst->popSchedEntry(); @@ -912,12 +959,58 @@ PipelineStage::processInstSchedule(DynInstPtr inst) panic("%i: encountered %s fault!\n", curTick, req->fault->name()); } + + reqs_processed++; + + req->stagePasses++; } else { - DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed.\n", - tid, inst->seqNum, cpu->resPool->name(res_num)); + DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed." + "\n", tid, inst->seqNum, cpu->resPool->name(res_num)); last_req_completed = false; + if (req->isMemStall() && + cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { + // Save Stalling Instruction + DPRINTF(ThreadModel, "[tid:%i] [sn:%i] Detected cache miss.\n", tid, inst->seqNum); + + DPRINTF(InOrderStage, "Inserting [tid:%i][sn:%i] into switch out buffer.\n", + tid, inst->seqNum); + + switchedOutBuffer[tid] = inst; + switchedOutValid[tid] = true; + + // Remove Thread From Pipeline & Resource Pool + inst->squashingStage = stageNum; + inst->bdelaySeqNum = inst->seqNum; + cpu->squashFromMemStall(inst, tid); + + // Switch On Cache Miss + //===================== + // Suspend Thread at end of cycle + DPRINTF(ThreadModel, "Suspending [tid:%i] due to cache miss.\n", tid); + cpu->suspendContext(tid); + + // Activate Next Ready Thread at end of cycle + DPRINTF(ThreadModel, "Attempting to activate next ready thread due to" + " cache miss.\n"); + cpu->activateNextReadyContext(); + } + + // Mark request for deletion + // if it isnt currently being used by a resource + if (!req->hasSlot()) { + DPRINTF(InOrderStage, "[sn:%i] Deleting Request, has no slot in resource.\n", + inst->seqNum); + + cpu->reqRemoveList.push(req); + } else { + DPRINTF(InOrderStage, "[sn:%i] Ignoring Request Deletion, in resource [slot:%i].\n", + inst->seqNum, req->getSlot()); + //req = cpu->dummyReq[tid]; + } + + break; } @@ -956,12 +1049,12 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst) assert(next_stage >= 1); assert(prev_stage >= 0); - DPRINTF(InOrderStage, "[tid:%u]: Attempting to send instructions to stage %u.\n", tid, - stageNum+1); + DPRINTF(InOrderStage, "[tid:%u]: Attempting to send instructions to " + "stage %u.\n", tid, stageNum+1); if (!canSendInstToStage(inst->nextStage)) { - DPRINTF(InOrderStage, "[tid:%u]: Could not send instruction to stage %u.\n", tid, - stageNum+1); + DPRINTF(InOrderStage, "[tid:%u]: Could not send instruction to " + "stage %u.\n", tid, stageNum+1); return false; } @@ -969,12 +1062,14 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst) if (nextStageQueueValid(inst->nextStage - 1)) { if (inst->seqNum > cpu->squashSeqNum[tid] && curTick == cpu->lastSquashCycle[tid]) { - DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping insertion " - "into stage %i queue.\n", tid, inst->seqNum, inst->nextStage); + DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping " + "insertion into stage %i queue.\n", tid, inst->seqNum, + inst->nextStage); } else { if (nextStageValid) { - DPRINTF(InOrderStage, "[tid:%u] %i slots available in next stage buffer.\n", - tid, cpu->pipelineStage[next_stage]->stageBufferAvail()); + DPRINTF(InOrderStage, "[tid:%u] %i slots available in next " + "stage buffer.\n", tid, + cpu->pipelineStage[next_stage]->stageBufferAvail()); } DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: being placed into " @@ -982,11 +1077,13 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst) tid, inst->seqNum, toNextStageIndex, cpu->pipelineStage[prev_stage]->nextStageQueue->id()); - int next_stage_idx = cpu->pipelineStage[prev_stage]->nextStage->size; + int next_stage_idx = + cpu->pipelineStage[prev_stage]->nextStage->size; - // Place instructions in inter-stage communication struct for the next + // Place instructions in inter-stage communication struct for next // pipeline stage to read next cycle - cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx] = inst; + cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx] + = inst; ++(cpu->pipelineStage[prev_stage]->nextStage->size); diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh index 86ee98132..6c9cf0d99 100644 --- a/src/cpu/inorder/pipeline_stage.hh +++ b/src/cpu/inorder/pipeline_stage.hh @@ -178,7 +178,7 @@ class PipelineStage virtual void processInsts(ThreadID tid); /** Process all resources on an instruction's resource schedule */ - virtual bool processInstSchedule(DynInstPtr inst); + virtual bool processInstSchedule(DynInstPtr inst, int &reqs_processed); /** Is there room in the next stage buffer for this instruction? */ virtual bool canSendInstToStage(unsigned stage_num); @@ -235,11 +235,15 @@ class PipelineStage public: + virtual void activateThread(ThreadID tid); + /** Squashes if there is a PC-relative branch that was predicted * incorrectly. Sends squash information back to fetch. */ virtual void squashDueToBranch(DynInstPtr &inst, ThreadID tid); + virtual void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid); + /** Squash instructions from stage buffer */ virtual void squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid); @@ -259,19 +263,33 @@ class PipelineStage /** List of active thread ids */ std::list<ThreadID> *activeThreads; + /** Buffer of instructions switched out to mem-stall. + * Only used when using SwitchOnCacheMiss threading model + * Used as 1-to-1 mapping between ThreadID and Entry. + */ + std::vector<DynInstPtr> switchedOutBuffer; + std::vector<bool> switchedOutValid; + + /** Instructions that we've processed this tick + * NOTE: "Processed" means completed at least 1 instruction request + */ + unsigned instsProcessed; + /** Queue of all instructions coming from previous stage on this cycle. */ std::queue<DynInstPtr> insts[ThePipeline::MaxThreads]; - /** Queue of instructions that are finished processing and ready to go next stage. - * This is used to prevent from processing an instrution more than once on any - * stage. NOTE: It is up to the PROGRAMMER must manage this as a queue + /** Queue of instructions that are finished processing and ready to go + * next stage. This is used to prevent from processing an instrution more + * than once on any stage. NOTE: It is up to the PROGRAMMER must manage + * this as a queue */ std::list<DynInstPtr> instsToNextStage; /** Skid buffer between previous stage and this one. */ std::queue<DynInstPtr> skidBuffer[ThePipeline::MaxThreads]; - /** Instruction used to signify that there is no *real* instruction in buffer slot */ + /** Instruction used to signify that there is no *real* instruction in + * buffer slot */ DynInstPtr dummyBufferInst; /** SeqNum of Squashing Branch Delay Instruction (used for MIPS) */ @@ -329,30 +347,27 @@ class PipelineStage /** Is Next Stage Valid? */ bool nextStageValid; + bool idle; + /** Source of possible stalls. */ struct Stalls { bool stage[ThePipeline::NumStages]; std::vector<ResReqPtr> resources; }; - /** Tracks which stages are telling decode to stall. */ + /** Tracks stage/resource stalls */ Stalls stalls[ThePipeline::MaxThreads]; - //@TODO: Use Stats for the pipeline stages - /** Stat for total number of idle cycles. */ - //Stats::Scalar stageIdleCycles; - /** Stat for total number of blocked cycles. */ - //Stats::Scalar stageBlockedCycles; - /** Stat for total number of normal running cycles. */ - //Stats::Scalar stageRunCycles; - /** Stat for total number of unblocking cycles. */ - //Stats::Scalar stageUnblockCycles; - /** Stat for total number of squashing cycles. */ - //Stats::Scalar stageSquashCycles; - /** Stat for total number of staged instructions. */ - //Stats::Scalar stageProcessedInsts; - /** Stat for total number of squashed instructions. */ - //Stats::Scalar stageSquashedInsts; + /** Number of cycles 0 instruction(s) are processed. */ + Stats::Scalar idleCycles; + + /** Number of cycles 1+ instructions are processed. */ + Stats::Scalar runCycles; + + /** Percentage of cycles 1+ instructions are processed. */ + Stats::Formula utilization; + + }; #endif diff --git a/src/cpu/inorder/pipeline_traits.cc b/src/cpu/inorder/pipeline_traits.cc index ed72ab1d0..8ff26dce2 100644 --- a/src/cpu/inorder/pipeline_traits.cc +++ b/src/cpu/inorder/pipeline_traits.cc @@ -65,16 +65,18 @@ int getNextPriority(DynInstPtr &inst, int stage_num) void createFrontEndSchedule(DynInstPtr &inst) { - InstStage *I = inst->addStage(); - InstStage *E = inst->addStage(); - - I->needs(FetchSeq, FetchSeqUnit::AssignNextPC); - I->needs(ICache, CacheUnit::InitiateFetch); - - E->needs(ICache, CacheUnit::CompleteFetch); - E->needs(Decode, DecodeUnit::DecodeInst); - E->needs(BPred, BranchPredictor::PredictBranch); - E->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC); + InstStage *F = inst->addStage(); + InstStage *D = inst->addStage(); + + // FETCH + F->needs(FetchSeq, FetchSeqUnit::AssignNextPC); + F->needs(ICache, CacheUnit::InitiateFetch); + + // DECODE + D->needs(ICache, CacheUnit::CompleteFetch); + D->needs(Decode, DecodeUnit::DecodeInst); + D->needs(BPred, BranchPredictor::PredictBranch); + D->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC); } bool createBackEndSchedule(DynInstPtr &inst) @@ -83,45 +85,48 @@ bool createBackEndSchedule(DynInstPtr &inst) return false; } - InstStage *E = inst->currentStage(); + InstStage *X = inst->addStage(); InstStage *M = inst->addStage(); - InstStage *A = inst->addStage(); InstStage *W = inst->addStage(); + // EXECUTE for (int idx=0; idx < inst->numSrcRegs(); idx++) { if (!idx || !inst->isStore()) { - E->needs(RegManager, UseDefUnit::ReadSrcReg, idx); + X->needs(RegManager, UseDefUnit::ReadSrcReg, idx); } } - if ( inst->isNonSpeculative() ) { // skip execution of non speculative insts until later } else if ( inst->isMemRef() ) { if ( inst->isLoad() ) { - E->needs(AGEN, AGENUnit::GenerateAddr); - E->needs(DCache, CacheUnit::InitiateReadData); + X->needs(AGEN, AGENUnit::GenerateAddr); } } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) { - E->needs(MDU, MultDivUnit::StartMultDiv); + X->needs(MDU, MultDivUnit::StartMultDiv); } else { - E->needs(ExecUnit, ExecutionUnit::ExecuteInst); + X->needs(ExecUnit, ExecutionUnit::ExecuteInst); } if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) { - M->needs(MDU, MultDivUnit::EndMultDiv); + X->needs(MDU, MultDivUnit::EndMultDiv); } + // MEMORY if ( inst->isLoad() ) { - M->needs(DCache, CacheUnit::CompleteReadData); + M->needs(DCache, CacheUnit::InitiateReadData); } else if ( inst->isStore() ) { M->needs(RegManager, UseDefUnit::ReadSrcReg, 1); M->needs(AGEN, AGENUnit::GenerateAddr); M->needs(DCache, CacheUnit::InitiateWriteData); } - if ( inst->isStore() ) { - A->needs(DCache, CacheUnit::CompleteWriteData); + + // WRITEBACK + if ( inst->isLoad() ) { + W->needs(DCache, CacheUnit::CompleteReadData); + } else if ( inst->isStore() ) { + W->needs(DCache, CacheUnit::CompleteWriteData); } if ( inst->isNonSpeculative() ) { diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh index 3c28894e7..f039b9e5d 100644 --- a/src/cpu/inorder/pipeline_traits.hh +++ b/src/cpu/inorder/pipeline_traits.hh @@ -53,8 +53,8 @@ namespace ThePipeline { const unsigned StageWidth = 1; const unsigned BackEndStartStage = 2; - // Enumerated List of Resources The Pipeline Uses - enum ResourceList { + // List of Resources The Pipeline Uses + enum ResourceId { FetchSeq = 0, ICache, Decode, @@ -94,6 +94,7 @@ namespace ThePipeline { stageNum(stage_num), resNum(res_num), cmd(_cmd), idx(_idx), priority(_priority) { } + virtual ~ScheduleEntry(){} // Stage number to perform this service. @@ -113,7 +114,8 @@ namespace ThePipeline { }; struct entryCompare { - bool operator()(const ScheduleEntry* lhs, const ScheduleEntry* rhs) const + bool operator()(const ScheduleEntry* lhs, const ScheduleEntry* rhs) + const { // Prioritize first by stage number that the resource is needed if (lhs->stageNum > rhs->stageNum) { @@ -158,7 +160,6 @@ namespace ThePipeline { stageNum, nextTaskPriority++, unit, request, param )); } - }; }; diff --git a/src/cpu/inorder/reg_dep_map.cc b/src/cpu/inorder/reg_dep_map.cc index 51782a588..7fac0a905 100644 --- a/src/cpu/inorder/reg_dep_map.cc +++ b/src/cpu/inorder/reg_dep_map.cc @@ -235,3 +235,27 @@ RegDepMap::findBypassInst(unsigned idx) return NULL; } + +void +RegDepMap::dump() +{ + + for (int idx=0; idx < regMap.size(); idx++) { + + if (regMap[idx].size() > 0) { + cprintf("Reg #%i (size:%i): ", idx, regMap[idx].size()); + + std::list<DynInstPtr>::iterator list_it = regMap[idx].begin(); + std::list<DynInstPtr>::iterator list_end = regMap[idx].end(); + + while (list_it != list_end) { + cprintf("[sn:%i] ", (*list_it)->seqNum); + + list_it++; + } + + cprintf("\n"); + } + + } +} diff --git a/src/cpu/inorder/reg_dep_map.hh b/src/cpu/inorder/reg_dep_map.hh index b78e211bb..cb9d35bf4 100644 --- a/src/cpu/inorder/reg_dep_map.hh +++ b/src/cpu/inorder/reg_dep_map.hh @@ -88,6 +88,8 @@ class RegDepMap /** Size of Dependency of Map */ int depSize(unsigned idx); + void dump(); + protected: // Eventually make this a map of lists for // efficiency sake! diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index cb5681bc1..e63925fe8 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -47,6 +47,7 @@ Resource::Resource(string res_name, int res_id, int res_width, Resource::~Resource() { delete [] resourceEvent; + delete deniedReq; } @@ -80,7 +81,9 @@ Resource::regStats() { instReqsProcessed .name(name() + ".instReqsProcessed") - .desc("Number of Instructions Requests that completed in this resource."); + .desc("Number of Instructions Requests that completed in " + "this resource.") + .prereq(instReqsProcessed); } int @@ -98,11 +101,6 @@ Resource::slotsInUse() void Resource::freeSlot(int slot_idx) { - DPRINTF(RefCount, "Removing [tid:%i] [sn:%i]'s request from resource [slot:%i].\n", - reqMap[slot_idx]->inst->readTid(), - reqMap[slot_idx]->inst->seqNum, - slot_idx); - // Put slot number on this resource's free list availSlots.push_back(slot_idx); @@ -159,7 +157,8 @@ Resource::getSlot(DynInstPtr inst) while (map_it != map_end) { if ((*map_it).second) { - DPRINTF(Resource, "Currently Serving request from: [tid:%i] [sn:%i].\n", + DPRINTF(Resource, "Currently Serving request from: " + "[tid:%i] [sn:%i].\n", (*map_it).second->getInst()->readTid(), (*map_it).second->getInst()->seqNum); } @@ -176,7 +175,7 @@ Resource::request(DynInstPtr inst) // See if the resource is already serving this instruction. // If so, use that request; bool try_request = false; - int slot_num; + int slot_num = -1; int stage_num; ResReqPtr inst_req = findRequest(inst); @@ -202,10 +201,12 @@ Resource::request(DynInstPtr inst) inst_req = getRequest(inst, stage_num, id, slot_num, cmd); if (inst->staticInst) { - DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource.\n", + DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this " + "resource.\n", inst->readTid(), inst->seqNum); } else { - DPRINTF(Resource, "[tid:%i]: instruction requesting this resource.\n", + DPRINTF(Resource, "[tid:%i]: instruction requesting this " + "resource.\n", inst->readTid()); } @@ -232,7 +233,8 @@ Resource::requestAgain(DynInstPtr inst, bool &do_request) do_request = true; if (inst->staticInst) { - DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource again.\n", + DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource " + "again.\n", inst->readTid(), inst->seqNum); } else { DPRINTF(Resource, "[tid:%i]: requesting this resource again.\n", @@ -254,15 +256,22 @@ Resource::findRequest(DynInstPtr inst) map<int, ResReqPtr>::iterator map_it = reqMap.begin(); map<int, ResReqPtr>::iterator map_end = reqMap.end(); + bool found = false; + ResReqPtr req = NULL; + while (map_it != map_end) { if ((*map_it).second && - (*map_it).second->getInst() == inst) { - return (*map_it).second; + (*map_it).second->getInst() == inst) { + req = (*map_it).second; + //return (*map_it).second; + assert(found == false); + found = true; } map_it++; } - return NULL; + return req; + //return NULL; } void @@ -334,6 +343,12 @@ Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, } } +void +Resource::squashDueToMemStall(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, + ThreadID tid) +{ + squash(inst, stage_num, squash_seq_num, tid); +} Tick Resource::ticks(int num_cycles) @@ -394,22 +409,72 @@ Resource::unscheduleEvent(DynInstPtr inst) int ResourceRequest::resReqID = 0; -int ResourceRequest::resReqCount = 0; +int ResourceRequest::maxReqCount = 0; + +ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, + int stage_num, int res_idx, int slot_num, + unsigned _cmd) + : res(_res), inst(_inst), cmd(_cmd), stageNum(stage_num), + resIdx(res_idx), slotNum(slot_num), completed(false), + squashed(false), processing(false), memStall(false) +{ +#ifdef DEBUG + reqID = resReqID++; + res->cpu->resReqCount++; + DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, + res->cpu->resReqCount); + + if (res->cpu->resReqCount > 100) { + fatal("Too many undeleted resource requests. Memory leak?\n"); + } + + if (res->cpu->resReqCount > maxReqCount) { + maxReqCount = res->cpu->resReqCount; + res->cpu->maxResReqCount = maxReqCount; + } + +#endif + + stagePasses = 0; + complSlotNum = -1; + +} + +ResourceRequest::~ResourceRequest() +{ +#ifdef DEBUG + res->cpu->resReqCount--; + DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, + res->cpu->resReqCount); +#endif +} void ResourceRequest::done(bool completed) { - DPRINTF(Resource, "%s done with request from [sn:%i] [tid:%i].\n", - res->name(), inst->seqNum, inst->readTid()); + DPRINTF(Resource, "%s [slot:%i] done with request from [sn:%i] [tid:%i].\n", + res->name(), slotNum, inst->seqNum, inst->readTid()); setCompleted(completed); - // Add to remove list - res->cpu->reqRemoveList.push(res->reqMap[slotNum]); - + // Used for debugging purposes + if (completed) { + complSlotNum = slotNum; + + // Would like to start a convention such as all requests deleted in resources/pipeline + // but a little more complex then it seems... + // For now, all COMPLETED requests deleted in resource.. + // all FAILED requests deleted in pipeline stage + // *all SQUASHED requests deleted in resource + res->cpu->reqRemoveList.push(res->reqMap[slotNum]); + } + // Free Slot So Another Instruction Can Use This Resource res->freeSlot(slotNum); + // change slot # to -1, since we check slotNum to see if request is still valid + slotNum = -1; + res->instReqsProcessed++; } diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index 605b7f690..b9650df18 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -70,7 +70,8 @@ class Resource { /** Define this function if resource, has a port to connect to an outside * simulation object. */ - virtual Port* getPort(const std::string &if_name, int idx) { return NULL; } + virtual Port* getPort(const std::string &if_name, int idx) + { return NULL; } /** Return ID for this resource */ int getId() { return id; } @@ -92,6 +93,14 @@ class Resource { */ virtual void deactivateThread(ThreadID tid); + /** Resources that care about thread activation override this. */ + virtual void suspendThread(ThreadID tid) { } + + /** Will be called the cycle before a context switch. Any bookkeeping + * that needs to be kept for that, can be done here + */ + virtual void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) { } + /** Resources that care when an instruction has been graduated * can override this */ @@ -114,9 +123,9 @@ class Resource { /** Free a resource slot */ virtual void freeSlot(int slot_idx); - /** Request usage of a resource for this instruction. If this instruction already - * has made this request to this resource, and that request is uncompleted - * this function will just return that request + /** Request usage of a resource for this instruction. If this instruction + * already has made this request to this resource, and that request is + * uncompleted this function will just return that request */ virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, @@ -155,6 +164,9 @@ class Resource { virtual void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + virtual void squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid); + /** The number of instructions available that this resource can * can still process */ @@ -166,7 +178,8 @@ class Resource { /** Schedule resource event, regardless of its current state. */ void scheduleEvent(int slot_idx, int delay); - /** Find instruction in list, Schedule resource event, regardless of its current state. */ + /** Find instruction in list, Schedule resource event, regardless of its + * current state. */ bool scheduleEvent(DynInstPtr inst, int delay); /** Unschedule resource event, regardless of its current state. */ @@ -303,30 +316,14 @@ class ResourceRequest static int resReqID; - static int resReqCount; - + static int maxReqCount; + public: ResourceRequest(Resource *_res, DynInstPtr _inst, int stage_num, - int res_idx, int slot_num, unsigned _cmd) - : res(_res), inst(_inst), cmd(_cmd), stageNum(stage_num), - resIdx(res_idx), slotNum(slot_num), completed(false), - squashed(false), processing(false), waiting(false) - { - reqID = resReqID++; - resReqCount++; - DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, resReqCount); - - if (resReqCount > 100) { - fatal("Too many undeleted resource requests. Memory leak?\n"); - } - } - - virtual ~ResourceRequest() - { - resReqCount--; - DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, resReqCount); - } - + int res_idx, int slot_num, unsigned _cmd); + + virtual ~ResourceRequest(); + int reqID; /** Acknowledge that this is a request is done and remove @@ -334,6 +331,8 @@ class ResourceRequest */ void done(bool completed = true); + short stagePasses; + ///////////////////////////////////////////// // // GET RESOURCE REQUEST IDENTIFICATION / INFO @@ -342,8 +341,11 @@ class ResourceRequest /** Get Resource Index */ int getResIdx() { return resIdx; } + /** Get Slot Number */ int getSlot() { return slotNum; } + int getComplSlot() { return complSlotNum; } + bool hasSlot() { return slotNum >= 0; } /** Get Stage Number */ int getStageNum() { return stageNum; } @@ -366,6 +368,9 @@ class ResourceRequest /** Instruction being used */ DynInstPtr inst; + /** Not guaranteed to be set, used for debugging */ + InstSeqNum seqNum; + /** Fault Associated With This Resource Request */ Fault fault; @@ -390,8 +395,8 @@ class ResourceRequest void setProcessing() { processing = true; } /** Get/Set IsWaiting variables */ - bool isWaiting() { return waiting; } - void setWaiting() { waiting = true; } + bool isMemStall() { return memStall; } + void setMemStall(bool stall = true) { memStall = stall; } protected: /** Resource Identification */ @@ -399,12 +404,14 @@ class ResourceRequest int stageNum; int resIdx; int slotNum; - - /** Resource Status */ + int complSlotNum; + + /** Resource Request Status */ bool completed; bool squashed; bool processing; - bool waiting; + + bool memStall; }; #endif //__CPU_INORDER_RESOURCE_HH__ diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index 0d78c232b..74bf4f03b 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -41,45 +41,74 @@ using namespace ThePipeline; ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) : cpu(_cpu) { - //@todo: use this function to instantiate the resources in resource pool. This will help in the - //auto-generation of this pipeline model. + //@todo: use this function to instantiate the resources in resource pool. + //This will help in the auto-generation of this pipeline model. //ThePipeline::addResources(resources, memObjects); // Declare Resource Objects // name - id - bandwidth - latency - CPU - Parameters // -------------------------------------------------- - resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, StageWidth * 2, 0, _cpu, params)); + resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, + StageWidth * 2, 0, _cpu, params)); memObjects.push_back(ICache); - resources.push_back(new CacheUnit("icache_port", ICache, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new CacheUnit("icache_port", ICache, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new DecodeUnit("Decode-Unit", Decode, StageWidth, 0, _cpu, params)); + resources.push_back(new DecodeUnit("Decode-Unit", Decode, + StageWidth, 0, _cpu, params)); - resources.push_back(new BranchPredictor("Branch-Predictor", BPred, StageWidth, 0, _cpu, params)); + resources.push_back(new BranchPredictor("Branch-Predictor", BPred, + StageWidth, 0, _cpu, params)); - resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, 0, _cpu, params)); + resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, + 0, _cpu, params)); - resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new AGENUnit("AGEN-Unit", AGEN, StageWidth, 0, _cpu, params)); + resources.push_back(new AGENUnit("AGEN-Unit", AGEN, + StageWidth, 0, _cpu, params)); - resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, StageWidth, 0, _cpu, params)); + resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, + StageWidth, 0, _cpu, params)); - resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, params)); + resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, + params)); memObjects.push_back(DCache); - resources.push_back(new CacheUnit("dcache_port", DCache, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new CacheUnit("dcache_port", DCache, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new GraduationUnit("Graduation-Unit", Grad, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new GraduationUnit("Graduation-Unit", Grad, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, 0, _cpu, params)); + resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, + 0, _cpu, params)); } +ResourcePool::~ResourcePool() +{ + cout << "Deleting resources ..." << endl; + + for (int i=0; i < resources.size(); i++) { + DPRINTF(Resource, "Deleting resource: %s.\n", resources[i]->name()); + + delete resources[i]; + } +} + + void ResourcePool::init() { for (int i=0; i < resources.size(); i++) { - DPRINTF(Resource, "Initializing resource: %s.\n", resources[i]->name()); + DPRINTF(Resource, "Initializing resource: %s.\n", + resources[i]->name()); resources[i]->init(); } @@ -113,8 +142,8 @@ ResourcePool::getPort(const std::string &if_name, int idx) int obj_idx = memObjects[i]; Port *port = resources[obj_idx]->getPort(if_name, idx); if (port != NULL) { - DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", if_name, - resources[obj_idx]->name(), obj_idx); + DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", + if_name, resources[obj_idx]->name(), obj_idx); return port; } } @@ -131,7 +160,8 @@ ResourcePool::getPortIdx(const std::string &port_name) unsigned obj_idx = memObjects[i]; Port *port = resources[obj_idx]->getPort(port_name, obj_idx); if (port != NULL) { - DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, port_name); + DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, + port_name); return obj_idx; } } @@ -151,6 +181,25 @@ ResourcePool::getResIdx(const std::string &res_name) return idx; } + panic("Can't find resource idx for: %s\n", res_name); + return 0; +} + +unsigned +ResourcePool::getResIdx(const ThePipeline::ResourceId &res_id) +{ + int num_resources = resources.size(); + + for (int idx = 0; idx < num_resources; idx++) { + if (resources[idx]->getId() == res_id) + return idx; + } + + // todo: change return value to int and return a -1 here + // maybe even have enumerated type + // panic for now... + panic("Can't find resource idx for: %i\n", res_id); + return 0; } @@ -167,7 +216,8 @@ void ResourcePool::squash(DynInstPtr inst, int res_idx, InstSeqNum done_seq_num, ThreadID tid) { - resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, tid); + resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, + tid); } int @@ -182,6 +232,12 @@ ResourcePool::slotsInUse(int res_idx) return resources[res_idx]->slotsInUse(); } +//@todo: split this function and call this version schedulePoolEvent +// and use this scheduleEvent for scheduling a specific event on +// a resource +//@todo: For arguments that arent being used in a ResPoolEvent, a dummyParam +// or some typedef can be used to signify what's important info +// to the event construction void ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, int delay, int res_idx, ThreadID tid) @@ -192,23 +248,45 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, { case InOrderCPU::ActivateThread: { - DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event for tick %i.\n", - curTick + delay); - ResPoolEvent *res_pool_event = new ResPoolEvent(this, - e_type, - inst, - inst->squashingStage, - inst->bdelaySeqNum, - inst->readTid()); - mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event " + "for tick %i, [tid:%i].\n", curTick + delay, + inst->readTid()); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this, + e_type, + inst, + inst->squashingStage, + inst->bdelaySeqNum, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); + } + break; + + case InOrderCPU::HaltThread: + case InOrderCPU::DeactivateThread: + { + + DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool " + "Event for tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this, + e_type, + inst, + inst->squashingStage, + inst->bdelaySeqNum, + tid); + + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); + } break; case InOrderCPU::SuspendThread: - case InOrderCPU::DeallocateThread: { - DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool Event for tick %i.\n", + DPRINTF(Resource, "Scheduling Suspend Thread Resource Pool Event for tick %i.\n", curTick + delay); ResPoolEvent *res_pool_event = new ResPoolEvent(this, e_type, @@ -224,34 +302,68 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, case ResourcePool::InstGraduated: { - DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool Event for tick %i.\n", - curTick + delay); - ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type, - inst, - inst->squashingStage, - inst->seqNum, - inst->readTid()); - mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool " + "Event for tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this,e_type, + inst, + inst->squashingStage, + inst->seqNum, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); } break; case ResourcePool::SquashAll: { - DPRINTF(Resource, "Scheduling Squash Resource Pool Event for tick %i.\n", + DPRINTF(Resource, "Scheduling Squash Resource Pool Event for " + "tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this,e_type, + inst, + inst->squashingStage, + inst->bdelaySeqNum, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); + } + break; + + case InOrderCPU::SquashFromMemStall: + { + DPRINTF(Resource, "Scheduling Squash Due to Memory Stall Resource " + "Pool Event for tick %i.\n", + curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this,e_type, + inst, + inst->squashingStage, + inst->seqNum - 1, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); + } + break; + + case ResourcePool::UpdateAfterContextSwitch: + { + DPRINTF(Resource, "Scheduling UpdatePC Resource Pool Event for tick %i.\n", curTick + delay); ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type, inst, inst->squashingStage, - inst->bdelaySeqNum, + inst->seqNum, inst->readTid()); mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + } break; default: - DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", InOrderCPU::eventNames[e_type]); - ; // If Resource Pool doesnt recognize event, we ignore it. + DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", + InOrderCPU::eventNames[e_type]); } } @@ -265,8 +377,9 @@ void ResourcePool::squashAll(DynInstPtr inst, int stage_num, InstSeqNum done_seq_num, ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above [sn:%i].\n", - stage_num, tid, done_seq_num); + DPRINTF(Resource, "[tid:%i] Broadcasting Squash All Event " + " starting w/stage %i for all instructions above [sn:%i].\n", + tid, stage_num, done_seq_num); int num_resources = resources.size(); @@ -276,23 +389,49 @@ ResourcePool::squashAll(DynInstPtr inst, int stage_num, } void -ResourcePool::activateAll(ThreadID tid) +ResourcePool::squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum done_seq_num, ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all resources.\n", - tid); + DPRINTF(Resource, "[tid:%i] Broadcasting SquashDueToMemStall Event" + " starting w/stage %i for all instructions above [sn:%i].\n", + tid, stage_num, done_seq_num); int num_resources = resources.size(); for (int idx = 0; idx < num_resources; idx++) { - resources[idx]->activateThread(tid); + resources[idx]->squashDueToMemStall(inst, stage_num, done_seq_num, + tid); } } void +ResourcePool::activateAll(ThreadID tid) +{ + bool do_activate = cpu->threadModel != InOrderCPU::SwitchOnCacheMiss || + cpu->numActiveThreads() < 1 || + cpu->activeThreadId() == tid; + + + if (do_activate) { + DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all " + "resources.\n", tid); + + int num_resources = resources.size(); + + for (int idx = 0; idx < num_resources; idx++) { + resources[idx]->activateThread(tid); + } + } else { + DPRINTF(Resource, "[tid:%i] Ignoring Thread Activation to all " + "resources.\n", tid); + } +} + +void ResourcePool::deactivateAll(ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all resources.\n", - tid); + DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all " + "resources.\n", tid); int num_resources = resources.size(); @@ -302,10 +441,23 @@ ResourcePool::deactivateAll(ThreadID tid) } void +ResourcePool::suspendAll(ThreadID tid) +{ + DPRINTF(Resource, "[tid:%i] Broadcasting Thread Suspension to all resources.\n", + tid); + + int num_resources = resources.size(); + + for (int idx = 0; idx < num_resources; idx++) { + resources[idx]->suspendThread(tid); + } +} + +void ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all resources.\n", - tid, seq_num); + DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all " + "resources.\n", tid, seq_num); int num_resources = resources.size(); @@ -314,10 +466,18 @@ ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid) } } -ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool) - : Event(CPU_Tick_Pri), resPool(_resPool), - eventType((InOrderCPU::CPUEventType) Default) -{ } +void +ResourcePool::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) +{ + DPRINTF(Resource, "[tid:%i] Broadcasting Update PC to all resources.\n", + tid); + + int num_resources = resources.size(); + + for (int idx = 0; idx < num_resources; idx++) { + resources[idx]->updateAfterContextSwitch(inst, tid); + } +} ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool, InOrderCPU::CPUEventType e_type, @@ -340,11 +500,15 @@ ResourcePool::ResPoolEvent::process() resPool->activateAll(tid); break; - case InOrderCPU::SuspendThread: - case InOrderCPU::DeallocateThread: + case InOrderCPU::DeactivateThread: + case InOrderCPU::HaltThread: resPool->deactivateAll(tid); break; + case InOrderCPU::SuspendThread: + resPool->suspendAll(tid); + break; + case ResourcePool::InstGraduated: resPool->instGraduated(seqNum, tid); break; @@ -353,6 +517,14 @@ ResourcePool::ResPoolEvent::process() resPool->squashAll(inst, stageNum, seqNum, tid); break; + case InOrderCPU::SquashFromMemStall: + resPool->squashDueToMemStall(inst, stageNum, seqNum, tid); + break; + + case ResourcePool::UpdateAfterContextSwitch: + resPool->updateAfterContextSwitch(inst, tid); + break; + default: fatal("Unrecognized Event Type"); } diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh index 016fae2bf..60d35ab61 100644 --- a/src/cpu/inorder/resource_pool.hh +++ b/src/cpu/inorder/resource_pool.hh @@ -63,6 +63,7 @@ class ResourcePool { enum ResPoolEventType { InstGraduated = InOrderCPU::NumCPUEvents, SquashAll, + UpdateAfterContextSwitch, Default }; @@ -85,9 +86,6 @@ class ResourcePool { public: /** Constructs a resource event. */ - ResPoolEvent(ResourcePool *_resPool); - - /** Constructs a resource event. */ ResPoolEvent(ResourcePool *_resPool, InOrderCPU::CPUEventType e_type, DynInstPtr _inst, @@ -123,8 +121,8 @@ class ResourcePool { }; public: - ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~ResourcePool() {} + ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params); + ~ResourcePool(); std::string name(); @@ -143,6 +141,7 @@ class ResourcePool { /** Returns a specific resource. */ unsigned getResIdx(const std::string &res_name); + unsigned getResIdx(const ThePipeline::ResourceId &res_id); /** Returns a pointer to a resource */ Resource* getResource(int res_idx) { return resources[res_idx]; } @@ -160,12 +159,24 @@ class ResourcePool { void squashAll(DynInstPtr inst, int stage_num, InstSeqNum done_seq_num, ThreadID tid); + /** Squash Resources in Pool after a memory stall + * NOTE: Only use during Switch-On-Miss Thread model + */ + void squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum done_seq_num, ThreadID tid); + /** Activate Thread in all resources */ void activateAll(ThreadID tid); /** De-Activate Thread in all resources */ void deactivateAll(ThreadID tid); + /** De-Activate Thread in all resources */ + void suspendAll(ThreadID tid); + + /** Broadcast Context Switch Update to all resources */ + void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid); + /** Broadcast graduation to all resources */ void instGraduated(InstSeqNum seq_num, ThreadID tid); diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index eb66e10f8..cb1861ea9 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -40,6 +40,7 @@ #include "cpu/inorder/resources/cache_unit.hh" #include "cpu/inorder/pipeline_traits.hh" #include "cpu/inorder/cpu.hh" +#include "cpu/inorder/resource_pool.hh" #include "mem/request.hh" using namespace std; @@ -49,14 +50,14 @@ using namespace ThePipeline; Tick CacheUnit::CachePort::recvAtomic(PacketPtr pkt) { - panic("DefaultFetch doesn't expect recvAtomic callback!"); + panic("CacheUnit::CachePort doesn't expect recvAtomic callback!"); return curTick; } void CacheUnit::CachePort::recvFunctional(PacketPtr pkt) { - panic("DefaultFetch doesn't expect recvFunctional callback!"); + panic("CacheUnit::CachePort doesn't expect recvFunctional callback!"); } void @@ -65,7 +66,7 @@ CacheUnit::CachePort::recvStatusChange(Status status) if (status == RangeChange) return; - panic("DefaultFetch doesn't expect recvStatusChange callback!"); + panic("CacheUnit::CachePort doesn't expect recvStatusChange callback!"); } bool @@ -84,8 +85,7 @@ CacheUnit::CachePort::recvRetry() CacheUnit::CacheUnit(string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params) : Resource(res_name, res_id, res_width, res_latency, _cpu), - retryPkt(NULL), retrySlot(-1), cacheBlocked(false), - predecoder(NULL) + cachePortBlocked(false), predecoder(NULL) { cachePort = new CachePort(this); @@ -131,18 +131,24 @@ CacheUnit::init() int CacheUnit::getSlot(DynInstPtr inst) { + ThreadID tid = inst->readTid(); + if (tlbBlocked[inst->threadNumber]) { return -1; } - if (!inst->validMemAddr()) { - panic("Mem. Addr. must be set before requesting cache access\n"); + // For a Split-Load, the instruction would have processed once already + // causing the address to be unset. + if (!inst->validMemAddr() && !inst->splitInst) { + panic("[tid:%i][sn:%i] Mem. Addr. must be set before requesting cache access\n", + inst->readTid(), inst->seqNum); } Addr req_addr = inst->getMemAddr(); if (resName == "icache_port" || - find(addrList.begin(), addrList.end(), req_addr) == addrList.end()) { + find(addrList[tid].begin(), addrList[tid].end(), req_addr) == + addrList[tid].end()) { int new_slot = Resource::getSlot(inst); @@ -150,37 +156,115 @@ CacheUnit::getSlot(DynInstPtr inst) return -1; inst->memTime = curTick; - addrList.push_back(req_addr); - addrMap[req_addr] = inst->seqNum; - DPRINTF(InOrderCachePort, - "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n", - inst->readTid(), inst->seqNum, req_addr); + setAddrDependency(inst); return new_slot; } else { - DPRINTF(InOrderCachePort, - "Denying request because there is an outstanding" + // Allow same instruction multiple accesses to same address + // should only happen maybe after a squashed inst. needs to replay + if (addrMap[tid][req_addr] == inst->seqNum) { + int new_slot = Resource::getSlot(inst); + + if (new_slot == -1) + return -1; + + return new_slot; + } else { + DPRINTF(InOrderCachePort, + "[tid:%i] Denying request because there is an outstanding" " request to/for addr. %08p. by [sn:%i] @ tick %i\n", - req_addr, addrMap[req_addr], inst->memTime); - return -1; + inst->readTid(), req_addr, addrMap[tid][req_addr], inst->memTime); + return -1; + } } + + return -1; } void -CacheUnit::freeSlot(int slot_num) +CacheUnit::setAddrDependency(DynInstPtr inst) { - vector<Addr>::iterator vect_it = find(addrList.begin(), addrList.end(), - reqMap[slot_num]->inst->getMemAddr()); - assert(vect_it != addrList.end()); + Addr req_addr = inst->getMemAddr(); + ThreadID tid = inst->readTid(); + addrList[tid].push_back(req_addr); + addrMap[tid][req_addr] = inst->seqNum; DPRINTF(InOrderCachePort, - "[tid:%i]: Address %08p removed from dependency list\n", - reqMap[slot_num]->inst->readTid(), (*vect_it)); + "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n", + inst->readTid(), inst->seqNum, req_addr); + DPRINTF(AddrDep, + "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n", + inst->readTid(), inst->seqNum, req_addr); +} + +void +CacheUnit::removeAddrDependency(DynInstPtr inst) +{ + ThreadID tid = inst->readTid(); + + Addr mem_addr = inst->getMemAddr(); + + // Erase from Address List + vector<Addr>::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(), + mem_addr); + assert(vect_it != addrList[tid].end() || inst->splitInst); + + if (vect_it != addrList[tid].end()) { + DPRINTF(AddrDep, + "[tid:%i]: [sn:%i] Address %08p removed from dependency list\n", + inst->readTid(), inst->seqNum, (*vect_it)); + + addrList[tid].erase(vect_it); + + // Erase From Address Map (Used for Debugging) + addrMap[tid].erase(addrMap[tid].find(mem_addr)); + } + + +} + +ResReqPtr +CacheUnit::findRequest(DynInstPtr inst) +{ + map<int, ResReqPtr>::iterator map_it = reqMap.begin(); + map<int, ResReqPtr>::iterator map_end = reqMap.end(); + + while (map_it != map_end) { + CacheRequest* cache_req = dynamic_cast<CacheRequest*>((*map_it).second); + assert(cache_req); + + if (cache_req && + cache_req->getInst() == inst && + cache_req->instIdx == inst->resSched.top()->idx) { + return cache_req; + } + map_it++; + } + + return NULL; +} + +ResReqPtr +CacheUnit::findSplitRequest(DynInstPtr inst, int idx) +{ + map<int, ResReqPtr>::iterator map_it = reqMap.begin(); + map<int, ResReqPtr>::iterator map_end = reqMap.end(); + + while (map_it != map_end) { + CacheRequest* cache_req = dynamic_cast<CacheRequest*>((*map_it).second); + assert(cache_req); - addrList.erase(vect_it); + if (cache_req && + cache_req->getInst() == inst && + cache_req->instIdx == idx) { + return cache_req; + } + map_it++; + } - Resource::freeSlot(slot_num); + return NULL; } + ResReqPtr CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) @@ -195,6 +279,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, switch (sched_entry->cmd) { + case InitSecondSplitRead: + pkt_cmd = MemCmd::ReadReq; + + DPRINTF(InOrderCachePort, + "[tid:%i]: Read request from [sn:%i] for addr %08p\n", + inst->readTid(), inst->seqNum, inst->split2ndAddr); + break; + case InitiateReadData: pkt_cmd = MemCmd::ReadReq; @@ -203,6 +295,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, inst->readTid(), inst->seqNum, inst->getMemAddr()); break; + case InitSecondSplitWrite: + pkt_cmd = MemCmd::WriteReq; + + DPRINTF(InOrderCachePort, + "[tid:%i]: Write request from [sn:%i] for addr %08p\n", + inst->readTid(), inst->seqNum, inst->split2ndAddr); + break; + case InitiateWriteData: pkt_cmd = MemCmd::WriteReq; @@ -226,7 +326,8 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, return new CacheRequest(this, inst, stage_num, id, slot_num, sched_entry->cmd, 0, pkt_cmd, - 0/*flags*/, this->cpu->readCpuId()); + 0/*flags*/, this->cpu->readCpuId(), + inst->resSched.top()->idx); } void @@ -237,15 +338,17 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request) // Check to see if this instruction is requesting the same command // or a different one - if (cache_req->cmd != inst->resSched.top()->cmd) { + if (cache_req->cmd != inst->resSched.top()->cmd && + cache_req->instIdx == inst->resSched.top()->idx) { // If different, then update command in the request cache_req->cmd = inst->resSched.top()->cmd; DPRINTF(InOrderCachePort, - "[tid:%i]: [sn:%i]: Updating the command for this instruction\n", - inst->readTid(), inst->seqNum); + "[tid:%i]: [sn:%i]: Updating the command for this " + "instruction\n ", inst->readTid(), inst->seqNum); service_request = true; - } else { + } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead && + inst->resSched.top()->idx != CacheUnit::InitSecondSplitWrite) { // If same command, just check to see if memory access was completed // but dont try to re-execute DPRINTF(InOrderCachePort, @@ -271,12 +374,25 @@ CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size, cpu->readCpuId(), inst->readTid()); cache_req->memReq = inst->fetchMemReq; } else { - inst->dataMemReq = new Request(inst->readTid(), aligned_addr, + if (!cache_req->is2ndSplit()) { + inst->dataMemReq = new Request(cpu->asid[tid], aligned_addr, acc_size, flags, inst->readPC(), cpu->readCpuId(), inst->readTid()); cache_req->memReq = inst->dataMemReq; + } else { + assert(inst->splitInst); + + inst->splitMemReq = new Request(cpu->asid[tid], + inst->split2ndAddr, + acc_size, + flags, + inst->readPC(), + cpu->readCpuId(), + tid); + cache_req->memReq = inst->splitMemReq; + } } - + cache_req->fault = _tlb->translateAtomic(cache_req->memReq, @@ -311,14 +427,93 @@ Fault CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) { CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst)); - assert(cache_req); + assert(cache_req && "Can't Find Instruction for Read!"); + + // The block size of our peer + unsigned blockSize = this->cachePort->peerBlockSize(); + + //The size of the data we're trying to read. + int dataSize = sizeof(T); + + if (inst->split2ndAccess) { + dataSize = inst->split2ndSize; + cache_req->splitAccess = true; + cache_req->split2ndAccess = true; + + DPRINTF(InOrderCachePort, "[sn:%i] Split Read Access (2 of 2) for (%#x, %#x).\n", inst->seqNum, + inst->getMemAddr(), inst->split2ndAddr); + } + + + //The address of the second part of this access if it needs to be split + //across a cache line boundary. + Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + + + if (secondAddr > addr && !inst->split2ndAccess) { + DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (1 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, + addr, secondAddr); + + // Save All "Total" Split Information + // ============================== + inst->splitInst = true; + inst->splitMemData = new uint8_t[dataSize]; + inst->splitTotalSize = dataSize; + + if (!inst->splitInstSked) { + // Schedule Split Read/Complete for Instruction + // ============================== + int stage_num = cache_req->getStageNum(); + + int stage_pri = ThePipeline::getNextPriority(inst, stage_num); + + inst->resSched.push(new ScheduleEntry(stage_num, + stage_pri, + cpu->resPool->getResIdx(DCache), + CacheUnit::InitSecondSplitRead, + 1) + ); + + inst->resSched.push(new ScheduleEntry(stage_num + 1, + 1/*stage_pri*/, + cpu->resPool->getResIdx(DCache), + CacheUnit::CompleteSecondSplitRead, + 1) + ); + inst->splitInstSked = true; + } else { + DPRINTF(InOrderCachePort, "[tid:%i] [sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n", + inst->readTid(), inst->seqNum, addr, secondAddr); + } - int acc_size = sizeof(T); - doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Read); + // Split Information for First Access + // ============================== + dataSize = secondAddr - addr; + cache_req->splitAccess = true; + + // Split Information for Second Access + // ============================== + inst->split2ndSize = addr + sizeof(T) - secondAddr; + inst->split2ndAddr = secondAddr; + inst->split2ndDataPtr = inst->splitMemData + dataSize; + inst->split2ndFlags = flags; + } + + doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Read); if (cache_req->fault == NoFault) { - cache_req->reqData = new uint8_t[acc_size]; - doCacheAccess(inst, NULL); + if (!cache_req->splitAccess) { + cache_req->reqData = new uint8_t[dataSize]; + doCacheAccess(inst, NULL); + } else { + if (!inst->split2ndAccess) { + cache_req->reqData = inst->splitMemData; + } else { + cache_req->reqData = inst->split2ndDataPtr; + } + + doCacheAccess(inst, NULL, cache_req); + } } return cache_req->fault; @@ -330,16 +525,93 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, uint64_t *write_res) { CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst)); - assert(cache_req); - - int acc_size = sizeof(T); - doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Write); + assert(cache_req && "Can't Find Instruction for Write!"); + + // The block size of our peer + unsigned blockSize = this->cachePort->peerBlockSize(); + + //The size of the data we're trying to read. + int dataSize = sizeof(T); + + if (inst->split2ndAccess) { + dataSize = inst->split2ndSize; + cache_req->splitAccess = true; + cache_req->split2ndAccess = true; + + DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (2 of 2) for (%#x, %#x).\n", inst->seqNum, + inst->getMemAddr(), inst->split2ndAddr); + } + + //The address of the second part of this access if it needs to be split + //across a cache line boundary. + Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + + if (secondAddr > addr && !inst->split2ndAccess) { + + DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (1 of 2) for (%#x, %#x).\n", inst->seqNum, + addr, secondAddr); + + // Save All "Total" Split Information + // ============================== + inst->splitInst = true; + inst->splitTotalSize = dataSize; + + if (!inst->splitInstSked) { + // Schedule Split Read/Complete for Instruction + // ============================== + int stage_num = cache_req->getStageNum(); + + int stage_pri = ThePipeline::getNextPriority(inst, stage_num); + + inst->resSched.push(new ScheduleEntry(stage_num, + stage_pri, + cpu->resPool->getResIdx(DCache), + CacheUnit::InitSecondSplitWrite, + 1) + ); + + inst->resSched.push(new ScheduleEntry(stage_num + 1, + 1/*stage_pri*/, + cpu->resPool->getResIdx(DCache), + CacheUnit::CompleteSecondSplitWrite, + 1) + ); + inst->splitInstSked = true; + } else { + DPRINTF(InOrderCachePort, "[tid:%i] sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n", + inst->readTid(), inst->seqNum, addr, secondAddr); + } + + + + // Split Information for First Access + // ============================== + dataSize = secondAddr - addr; + cache_req->splitAccess = true; + + // Split Information for Second Access + // ============================== + inst->split2ndSize = addr + sizeof(T) - secondAddr; + inst->split2ndAddr = secondAddr; + inst->split2ndStoreDataPtr = &cache_req->inst->storeData; + inst->split2ndStoreDataPtr += dataSize; + inst->split2ndFlags = flags; + inst->splitInstSked = true; + } + + doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Write); if (cache_req->fault == NoFault) { - cache_req->reqData = new uint8_t[acc_size]; - doCacheAccess(inst, write_res); + if (!cache_req->splitAccess) { + // Remove this line since storeData is saved in INST? + cache_req->reqData = new uint8_t[dataSize]; + doCacheAccess(inst, write_res); + } else { + doCacheAccess(inst, write_res, cache_req); + } + } - + return cache_req->fault; } @@ -347,8 +619,8 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, void CacheUnit::execute(int slot_num) { - if (cacheBlocked) { - DPRINTF(InOrderCachePort, "Cache Blocked. Cannot Access\n"); + if (cachePortBlocked) { + DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n"); return; } @@ -359,6 +631,8 @@ CacheUnit::execute(int slot_num) #if TRACING_ON ThreadID tid = inst->readTid(); int seq_num = inst->seqNum; + std::string acc_type = "write"; + #endif cache_req->fault = NoFault; @@ -390,10 +664,14 @@ CacheUnit::execute(int slot_num) } case InitiateReadData: +#if TRACING_ON + acc_type = "read"; +#endif case InitiateWriteData: + DPRINTF(InOrderCachePort, - "[tid:%u]: Initiating data access to %s for addr. %08p\n", - tid, name(), cache_req->inst->getMemAddr()); + "[tid:%u]: [sn:%i] Initiating data %s access to %s for addr. %08p\n", + tid, inst->seqNum, acc_type, name(), cache_req->inst->getMemAddr()); inst->setCurResSlot(slot_num); @@ -402,9 +680,29 @@ CacheUnit::execute(int slot_num) } else { inst->initiateAcc(); } + + break; + + case InitSecondSplitRead: + DPRINTF(InOrderCachePort, + "[tid:%u]: [sn:%i] Initiating split data read access to %s for addr. %08p\n", + tid, inst->seqNum, name(), cache_req->inst->split2ndAddr); + inst->split2ndAccess = true; + assert(inst->split2ndAddr != 0); + read(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags); + break; + + case InitSecondSplitWrite: + DPRINTF(InOrderCachePort, + "[tid:%u]: [sn:%i] Initiating split data write access to %s for addr. %08p\n", + tid, inst->seqNum, name(), cache_req->inst->getMemAddr()); + inst->split2ndAccess = true; + assert(inst->split2ndAddr != 0); + write(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags, NULL); break; + case CompleteFetch: if (cache_req->isMemAccComplete()) { DPRINTF(InOrderCachePort, @@ -415,16 +713,24 @@ CacheUnit::execute(int slot_num) DPRINTF(InOrderCachePort, "[tid:%i]: Instruction [sn:%i] is: %s\n", tid, seq_num, inst->staticInst->disassemble(inst->PC)); + removeAddrDependency(inst); + delete cache_req->dataPkt; + + // Do not stall and switch threads for fetch... for now.. + // TODO: We need to detect cache misses for latencies > 1 + // cache_req->setMemStall(false); + cache_req->done(); } else { DPRINTF(InOrderCachePort, - "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n", + "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n", tid, inst->seqNum); DPRINTF(InOrderStall, "STALL: [tid:%i]: Fetch miss from %08p\n", tid, cache_req->inst->readPC()); cache_req->setCompleted(false); + //cache_req->setMemStall(true); } break; @@ -437,14 +743,55 @@ CacheUnit::execute(int slot_num) if (cache_req->isMemAccComplete() || inst->isDataPrefetch() || inst->isInstPrefetch()) { + removeAddrDependency(inst); + cache_req->setMemStall(false); cache_req->done(); } else { DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n", tid, cache_req->inst->getMemAddr()); cache_req->setCompleted(false); + cache_req->setMemStall(true); } break; + case CompleteSecondSplitRead: + DPRINTF(InOrderCachePort, + "[tid:%i]: [sn:%i]: Trying to Complete Split Data Read Access\n", + tid, inst->seqNum); + + if (cache_req->isMemAccComplete() || + inst->isDataPrefetch() || + inst->isInstPrefetch()) { + removeAddrDependency(inst); + cache_req->setMemStall(false); + cache_req->done(); + } else { + DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n", + tid, cache_req->inst->split2ndAddr); + cache_req->setCompleted(false); + cache_req->setMemStall(true); + } + break; + + case CompleteSecondSplitWrite: + DPRINTF(InOrderCachePort, + "[tid:%i]: [sn:%i]: Trying to Complete Split Data Write Access\n", + tid, inst->seqNum); + + if (cache_req->isMemAccComplete() || + inst->isDataPrefetch() || + inst->isInstPrefetch()) { + removeAddrDependency(inst); + cache_req->setMemStall(false); + cache_req->done(); + } else { + DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n", + tid, cache_req->inst->split2ndAddr); + cache_req->setCompleted(false); + cache_req->setMemStall(true); + } + break; + default: fatal("Unrecognized command to %s", resName); } @@ -462,8 +809,7 @@ CacheUnit::prefetch(DynInstPtr inst) // Clean-Up cache resource request so // other memory insts. can use them cache_req->setCompleted(); - cacheStatus = cacheAccessComplete; - cacheBlocked = false; + cachePortBlocked = false; cache_req->setMemAccPending(false); cache_req->setMemAccCompleted(); inst->unsetMemAddr(); @@ -482,8 +828,7 @@ CacheUnit::writeHint(DynInstPtr inst) // Clean-Up cache resource request so // other memory insts. can use them cache_req->setCompleted(); - cacheStatus = cacheAccessComplete; - cacheBlocked = false; + cachePortBlocked = false; cache_req->setMemAccPending(false); cache_req->setMemAccCompleted(); inst->unsetMemAddr(); @@ -491,15 +836,21 @@ CacheUnit::writeHint(DynInstPtr inst) // @TODO: Split into doCacheRead() and doCacheWrite() Fault -CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) +CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, CacheReqPtr split_req) { Fault fault = NoFault; #if TRACING_ON ThreadID tid = inst->readTid(); #endif - CacheReqPtr cache_req - = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]); + CacheReqPtr cache_req; + + if (split_req == NULL) { + cache_req = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]); + } else{ + cache_req = split_req; + } + assert(cache_req); // Check for LL/SC and if so change command @@ -510,25 +861,28 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) if (cache_req->pktCmd == MemCmd::WriteReq) { cache_req->pktCmd = cache_req->memReq->isSwap() ? MemCmd::SwapReq : - (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); + (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq + : MemCmd::WriteReq); } cache_req->dataPkt = new CacheReqPacket(cache_req, cache_req->pktCmd, - Packet::Broadcast); + Packet::Broadcast, cache_req->instIdx); if (cache_req->dataPkt->isRead()) { cache_req->dataPkt->dataStatic(cache_req->reqData); - } else if (cache_req->dataPkt->isWrite()) { - cache_req->dataPkt->dataStatic(&cache_req->inst->storeData); - + } else if (cache_req->dataPkt->isWrite()) { + if (inst->split2ndAccess) { + cache_req->dataPkt->dataStatic(inst->split2ndStoreDataPtr); + } else { + cache_req->dataPkt->dataStatic(&cache_req->inst->storeData); + } + if (cache_req->memReq->isCondSwap()) { assert(write_res); cache_req->memReq->setExtraData(*write_res); } } - cache_req->dataPkt->time = curTick; - bool do_access = true; // flag to suppress cache access Request *memReq = cache_req->dataPkt->req; @@ -546,28 +900,18 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) if (do_access) { if (!cachePort->sendTiming(cache_req->dataPkt)) { DPRINTF(InOrderCachePort, - "[tid:%i] [sn:%i] is waiting to retry request\n", - tid, inst->seqNum); - - retrySlot = cache_req->getSlot(); - retryReq = cache_req; - retryPkt = cache_req->dataPkt; - - cacheStatus = cacheWaitRetry; - - //cacheBlocked = true; - - DPRINTF(InOrderStall, "STALL: \n"); - + "[tid:%i] [sn:%i] cannot access cache, because port " + "is blocked. now waiting to retry request\n", tid, + inst->seqNum); cache_req->setCompleted(false); + cachePortBlocked = true; } else { DPRINTF(InOrderCachePort, "[tid:%i] [sn:%i] is now waiting for cache response\n", tid, inst->seqNum); cache_req->setCompleted(); cache_req->setMemAccPending(); - cacheStatus = cacheWaitResponse; - cacheBlocked = false; + cachePortBlocked = false; } } else if (!do_access && memReq->isLLSC()){ // Store-Conditional instructions complete even if they "failed" @@ -594,6 +938,7 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) { // Cast to correct packet type CacheReqPacket* cache_pkt = dynamic_cast<CacheReqPacket*>(pkt); + assert(cache_pkt); if (cache_pkt->cacheReq->isSquashed()) { @@ -601,9 +946,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n", cache_pkt->cacheReq->getInst()->readTid(), cache_pkt->cacheReq->getInst()->seqNum); + DPRINTF(RefCount, + "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n", + cache_pkt->cacheReq->getTid(), + cache_pkt->cacheReq->seqNum); cache_pkt->cacheReq->done(); delete cache_pkt; + + cpu->wakeCPU(); + return; } @@ -615,7 +967,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) // Cast to correct request type CacheRequest *cache_req = dynamic_cast<CacheReqPtr>( - findRequest(cache_pkt->cacheReq->getInst())); + findSplitRequest(cache_pkt->cacheReq->getInst(), cache_pkt->instIdx)); + + if (!cache_req) { + warn( + "[tid:%u]: [sn:%i]: Can't find slot for cache access to addr. %08p\n", + cache_pkt->cacheReq->getInst()->readTid(), + cache_pkt->cacheReq->getInst()->seqNum, + cache_pkt->cacheReq->getInst()->getMemAddr()); + } + assert(cache_req); @@ -641,8 +1002,9 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) ExtMachInst ext_inst; StaticInstPtr staticInst = NULL; Addr inst_pc = inst->readPC(); - MachInst mach_inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *> - (cache_pkt->getPtr<uint8_t>())); + MachInst mach_inst = + TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *> + (cache_pkt->getPtr<uint8_t>())); predecoder.setTC(cpu->thread[tid]->getTC()); predecoder.moreBytes(inst_pc, inst_pc, mach_inst); @@ -660,9 +1022,33 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) DPRINTF(InOrderCachePort, "[tid:%u]: [sn:%i]: Processing cache access\n", tid, inst->seqNum); - - inst->completeAcc(pkt); - + + if (inst->splitInst) { + inst->splitFinishCnt++; + + if (inst->splitFinishCnt == 2) { + cache_req->memReq->setVirt(0/*inst->tid*/, + inst->getMemAddr(), + inst->splitTotalSize, + 0, + 0); + + Packet split_pkt(cache_req->memReq, cache_req->pktCmd, + Packet::Broadcast); + + + if (inst->isLoad()) { + split_pkt.dataStatic(inst->splitMemData); + } else { + split_pkt.dataStatic(&inst->storeData); + } + + inst->completeAcc(&split_pkt); + } + } else { + inst->completeAcc(pkt); + } + if (inst->isLoad()) { assert(cache_pkt->isRead()); @@ -696,6 +1082,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) cache_req->setMemAccPending(false); cache_req->setMemAccCompleted(); + if (cache_req->isMemStall() && + cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { + DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n", tid); + + cpu->activateContext(tid); + + DPRINTF(ThreadModel, "Activating [tid:%i] after return from cache" + "miss.\n", tid); + } + // Wake up the CPU (if it went to sleep and was waiting on this // completion event). cpu->wakeCPU(); @@ -717,22 +1113,14 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) void CacheUnit::recvRetry() { - DPRINTF(InOrderCachePort, "Retrying Request for [tid:%i] [sn:%i]\n", - retryReq->inst->readTid(), retryReq->inst->seqNum); + DPRINTF(InOrderCachePort, "Unblocking Cache Port. \n"); + + assert(cachePortBlocked); - assert(retryPkt != NULL); - assert(cacheBlocked); - assert(cacheStatus == cacheWaitRetry); + // Clear the cache port for use again + cachePortBlocked = false; - if (cachePort->sendTiming(retryPkt)) { - cacheStatus = cacheWaitResponse; - retryPkt = NULL; - cacheBlocked = false; - } else { - DPRINTF(InOrderCachePort, - "Retry Request for [tid:%i] [sn:%i] failed\n", - retryReq->inst->readTid(), retryReq->inst->seqNum); - } + cpu->wakeCPU(); } CacheUnitEvent::CacheUnitEvent() @@ -755,7 +1143,8 @@ CacheUnitEvent::process() tlb_res->tlbBlocked[tid] = false; - tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid); + tlb_res->cpu->pipelineStage[stage_num]-> + unsetResStall(tlb_res->reqMap[slotIdx], tid); req_ptr->tlbStall = false; @@ -765,6 +1154,26 @@ CacheUnitEvent::process() } void +CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid) +{ + // If squashing due to memory stall, then we do NOT want to + // squash the instruction that caused the stall so we + // increment the sequence number here to prevent that. + // + // NOTE: This is only for the SwitchOnCacheMiss Model + // NOTE: If you have multiple outstanding misses from the same + // thread then you need to reevaluate this code + // NOTE: squash should originate from + // pipeline_stage.cc:processInstSchedule + DPRINTF(InOrderCachePort, "Squashing above [sn:%u]\n", + squash_seq_num + 1); + + squash(inst, stage_num, squash_seq_num + 1, tid); +} + + +void CacheUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid) { @@ -784,6 +1193,14 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, "[tid:%i] Squashing request from [sn:%i]\n", req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); + if (req_ptr->isSquashed()) { + DPRINTF(AddrDep, "Request for [tid:%i] [sn:%i] already squashed, ignoring squash process.\n", + req_ptr->getInst()->readTid(), + req_ptr->getInst()->seqNum); + map_it++; + continue; + } + req_ptr->setSquashed(); req_ptr->getInst()->setSquashed(); @@ -798,7 +1215,8 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, int stall_stage = reqMap[req_slot_num]->getStageNum(); - cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid); + cpu->pipelineStage[stall_stage]-> + unsetResStall(reqMap[req_slot_num], tid); } if (!cache_req->tlbStall && !cache_req->isMemAccPending()) { @@ -807,7 +1225,29 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, // Mark slot for removal from resource slot_remove_list.push_back(req_ptr->getSlot()); + + DPRINTF(InOrderCachePort, + "[tid:%i] Squashing request from [sn:%i]\n", + req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); + } else { + DPRINTF(InOrderCachePort, + "[tid:%i] Request from [sn:%i] squashed, but still pending completion.\n", + req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); + DPRINTF(RefCount, + "[tid:%i] Request from [sn:%i] squashed (split:%i), but still pending completion.\n", + req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum, + req_ptr->getInst()->splitInst); } + + if (req_ptr->getInst()->validMemAddr()) { + DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to remove addr. %08p dependencies.\n", + req_ptr->getInst()->readTid(), + req_ptr->getInst()->seqNum, + req_ptr->getInst()->getMemAddr()); + + removeAddrDependency(req_ptr->getInst()); + } + } map_it++; @@ -927,14 +1367,16 @@ CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr, template<> Fault -CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint64_t*)&data, addr, flags, res); } template<> Fault -CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint32_t*)&data, addr, flags, res); } @@ -942,7 +1384,9 @@ CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_ template<> Fault -CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, (uint32_t)data, addr, flags, res); } + diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index c467e9771..9004f3b93 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -62,7 +62,6 @@ class CacheUnit : public Resource public: CacheUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~CacheUnit() {} enum Command { InitiateFetch, @@ -73,7 +72,11 @@ class CacheUnit : public Resource CompleteWriteData, Fetch, ReadData, - WriteData + WriteData, + InitSecondSplitRead, + InitSecondSplitWrite, + CompleteSecondSplitRead, + CompleteSecondSplitWrite }; public: @@ -119,24 +122,19 @@ class CacheUnit : public Resource virtual void recvRetry(); }; - enum CachePortStatus { - cacheWaitResponse, - cacheWaitRetry, - cacheAccessComplete - }; - void init(); virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, unsigned cmd); + ResReqPtr findRequest(DynInstPtr inst); + ResReqPtr findSplitRequest(DynInstPtr inst, int idx); + void requestAgain(DynInstPtr inst, bool &try_request); int getSlot(DynInstPtr inst); - void freeSlot(int slot_num); - /** Execute the function of this resource. The Default is action * is to do nothing. More specific models will derive from this * class and define their own execute function. @@ -146,6 +144,9 @@ class CacheUnit : public Resource void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + void squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid); + /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); @@ -159,7 +160,7 @@ class CacheUnit : public Resource /** Returns a specific port. */ Port *getPort(const std::string &if_name, int idx); - + template <class T> Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags); @@ -173,7 +174,7 @@ class CacheUnit : public Resource /** Read/Write on behalf of an instruction. * curResSlot needs to be a valid value in instruction. */ - Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL); + Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL, CacheReqPtr split_req=NULL); void prefetch(DynInstPtr inst); @@ -181,23 +182,18 @@ class CacheUnit : public Resource uint64_t getMemData(Packet *packet); + void setAddrDependency(DynInstPtr inst); + void removeAddrDependency(DynInstPtr inst); + protected: /** Cache interface. */ CachePort *cachePort; - CachePortStatus cacheStatus; - - CacheReqPtr retryReq; + bool cachePortBlocked; - PacketPtr retryPkt; - - int retrySlot; - - bool cacheBlocked; - - std::vector<Addr> addrList; + std::vector<Addr> addrList[ThePipeline::MaxThreads]; - std::map<Addr, InstSeqNum> addrMap; + std::map<Addr, InstSeqNum> addrMap[ThePipeline::MaxThreads]; public: int cacheBlkSize; @@ -249,17 +245,18 @@ class CacheRequest : public ResourceRequest public: CacheRequest(CacheUnit *cres, DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd, int req_size, - MemCmd::Command pkt_cmd, unsigned flags, int cpu_id) + MemCmd::Command pkt_cmd, unsigned flags, int cpu_id, int idx) : ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd), pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL), retryPkt(NULL), memAccComplete(false), memAccPending(false), - tlbStall(false) + tlbStall(false), splitAccess(false), splitAccessNum(-1), + split2ndAccess(false), instIdx(idx) { } virtual ~CacheRequest() { - if (reqData) { + if (reqData && !splitAccess) { delete [] reqData; } } @@ -273,6 +270,11 @@ class CacheRequest : public ResourceRequest memAccComplete = completed; } + bool is2ndSplit() + { + return split2ndAccess; + } + bool isMemAccComplete() { return memAccComplete; } void setMemAccPending(bool pending = true) { memAccPending = pending; } @@ -288,19 +290,27 @@ class CacheRequest : public ResourceRequest bool memAccComplete; bool memAccPending; bool tlbStall; + + bool splitAccess; + int splitAccessNum; + bool split2ndAccess; + int instIdx; + }; class CacheReqPacket : public Packet { public: CacheReqPacket(CacheRequest *_req, - Command _cmd, short _dest) - : Packet(_req->memReq, _cmd, _dest), cacheReq(_req) + Command _cmd, short _dest, int _idx = 0) + : Packet(_req->memReq, _cmd, _dest), cacheReq(_req), instIdx(_idx) { } CacheRequest *cacheReq; + int instIdx; + }; #endif //__CPU_CACHE_UNIT_HH__ diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc index 6c44e2456..429291231 100644 --- a/src/cpu/inorder/resources/execution_unit.cc +++ b/src/cpu/inorder/resources/execution_unit.cc @@ -54,6 +54,17 @@ ExecutionUnit::regStats() .name(name() + ".predictedNotTakenIncorrect") .desc("Number of Branches Incorrectly Predicted As Not Taken)."); + lastExecuteCycle = curTick; + + cyclesExecuted + .name(name() + ".cyclesExecuted") + .desc("Number of Cycles Execution Unit was used."); + + utilization + .name(name() + ".utilization") + .desc("Utilization of Execution Unit (cycles / totalCycles)."); + utilization = cyclesExecuted / cpu->numCycles; + Resource::regStats(); } @@ -75,6 +86,12 @@ ExecutionUnit::execute(int slot_num) { case ExecuteInst: { + if (curTick != lastExecuteCycle) { + lastExecuteCycle = curTick; + cyclesExecuted++; + } + + if (inst->isMemRef()) { panic("%s not configured to handle memory ops.\n", resName); } else if (inst->isControl()) { diff --git a/src/cpu/inorder/resources/execution_unit.hh b/src/cpu/inorder/resources/execution_unit.hh index 46691bbf2..b9cf1d428 100644 --- a/src/cpu/inorder/resources/execution_unit.hh +++ b/src/cpu/inorder/resources/execution_unit.hh @@ -52,7 +52,6 @@ class ExecutionUnit : public Resource { public: ExecutionUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~ExecutionUnit() {} public: virtual void regStats(); @@ -71,6 +70,11 @@ class ExecutionUnit : public Resource { ///////////////////////////////////////////////////////////////// Stats::Scalar predictedTakenIncorrect; Stats::Scalar predictedNotTakenIncorrect; + + Stats::Scalar cyclesExecuted; + Tick lastExecuteCycle; + + Stats::Formula utilization; }; diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc index 1d0b92075..03663881c 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.cc +++ b/src/cpu/inorder/resources/fetch_seq_unit.cc @@ -54,6 +54,11 @@ FetchSeqUnit::FetchSeqUnit(std::string res_name, int res_id, int res_width, } } +FetchSeqUnit::~FetchSeqUnit() +{ + delete [] resourceEvent; +} + void FetchSeqUnit::init() { @@ -336,3 +341,35 @@ FetchSeqUnit::deactivateThread(ThreadID tid) if (thread_it != cpu->fetchPriorityList.end()) cpu->fetchPriorityList.erase(thread_it); } + +void +FetchSeqUnit::suspendThread(ThreadID tid) +{ + deactivateThread(tid); +} + +void +FetchSeqUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) +{ + pcValid[tid] = true; + + if (cpu->thread[tid]->lastGradIsBranch) { + /** This function assumes that the instruction causing the context + * switch was right after the branch. Thus, if it's not, then + * we are updating incorrectly here + */ + assert(cpu->thread[tid]->lastBranchNextPC == inst->readPC()); + + PC[tid] = cpu->thread[tid]->lastBranchNextNPC; + nextPC[tid] = PC[tid] + instSize; + nextNPC[tid] = nextPC[tid] + instSize; + } else { + PC[tid] = inst->readNextPC(); + nextPC[tid] = inst->readNextNPC(); + nextNPC[tid] = inst->readNextNPC() + instSize; + } + + DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating PCs due to Context Switch." + "Assigning PC:%08p NPC:%08p NNPC:%08p.\n", tid, PC[tid], + nextPC[tid], nextNPC[tid]); +} diff --git a/src/cpu/inorder/resources/fetch_seq_unit.hh b/src/cpu/inorder/resources/fetch_seq_unit.hh index a4495564b..289e150aa 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.hh +++ b/src/cpu/inorder/resources/fetch_seq_unit.hh @@ -54,12 +54,15 @@ class FetchSeqUnit : public Resource { public: FetchSeqUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~FetchSeqUnit() {} - + virtual ~FetchSeqUnit(); + virtual void init(); virtual void activateThread(ThreadID tid); virtual void deactivateThread(ThreadID tid); + virtual void suspendThread(ThreadID tid); virtual void execute(int slot_num); + void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid); + /** Override default Resource squash sequence. This actually, * looks in the global communication buffer to get squash diff --git a/src/cpu/inorder/resources/graduation_unit.cc b/src/cpu/inorder/resources/graduation_unit.cc index 2d7cd5c8c..2dad9889a 100644 --- a/src/cpu/inorder/resources/graduation_unit.cc +++ b/src/cpu/inorder/resources/graduation_unit.cc @@ -79,8 +79,6 @@ GraduationUnit::execute(int slot_num) "[tid:%i] Graduating instruction [sn:%i].\n", tid, inst->seqNum); - DPRINTF(RefCount, "Refcount = %i.\n", 0/*inst->curCount()*/); - // Release Non-Speculative "Block" on instructions that could not execute // because there was a non-speculative inst. active. // @TODO: Fix this functionality. Probably too conservative. diff --git a/src/cpu/inorder/resources/graduation_unit.hh b/src/cpu/inorder/resources/graduation_unit.hh index ad222b119..7f0db98d0 100644 --- a/src/cpu/inorder/resources/graduation_unit.hh +++ b/src/cpu/inorder/resources/graduation_unit.hh @@ -63,8 +63,6 @@ class GraduationUnit : public Resource { bool *nonSpecInstActive[ThePipeline::MaxThreads]; InstSeqNum *nonSpecSeqNum[ThePipeline::MaxThreads]; - - /** @todo: Add Resource Stats Here */ }; #endif //__CPU_INORDER_GRAD_UNIT_HH__ diff --git a/src/cpu/inorder/resources/inst_buffer.cc b/src/cpu/inorder/resources/inst_buffer.cc index bb308b0ea..17b308db0 100644 --- a/src/cpu/inorder/resources/inst_buffer.cc +++ b/src/cpu/inorder/resources/inst_buffer.cc @@ -52,7 +52,8 @@ InstBuffer::regStats() { instsBypassed .name(name() + ".instsBypassed") - .desc("Number of Instructions Bypassed."); + .desc("Number of Instructions Bypassed.") + .prereq(instsBypassed); Resource::regStats(); } diff --git a/src/cpu/inorder/resources/mult_div_unit.hh b/src/cpu/inorder/resources/mult_div_unit.hh index d3dd0260d..19688b09f 100644 --- a/src/cpu/inorder/resources/mult_div_unit.hh +++ b/src/cpu/inorder/resources/mult_div_unit.hh @@ -57,7 +57,6 @@ class MultDivUnit : public Resource { public: MultDivUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~MultDivUnit() {} public: /** Override default Resource getSlot(). Will only getSlot if diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc index 36392d054..5fd6a4724 100644 --- a/src/cpu/inorder/resources/use_def.cc +++ b/src/cpu/inorder/resources/use_def.cc @@ -59,6 +59,17 @@ UseDefUnit::UseDefUnit(string res_name, int res_id, int res_width, } +void +UseDefUnit::regStats() +{ + uniqueRegsPerSwitch + .name(name() + ".uniqueRegsPerSwitch") + .desc("Number of Unique Registers Needed Per Context Switch") + .prereq(uniqueRegsPerSwitch); + + Resource::regStats(); +} + ResReqPtr UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) @@ -75,7 +86,8 @@ UseDefUnit::findRequest(DynInstPtr inst) map<int, ResReqPtr>::iterator map_end = reqMap.end(); while (map_it != map_end) { - UseDefRequest* ud_req = dynamic_cast<UseDefRequest*>((*map_it).second); + UseDefRequest* ud_req = + dynamic_cast<UseDefRequest*>((*map_it).second); assert(ud_req); if (ud_req && @@ -107,9 +119,9 @@ UseDefUnit::execute(int slot_idx) // in the pipeline then stall instructions here if (*nonSpecInstActive[tid] == true && seq_num > *nonSpecSeqNum[tid]) { - DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because there is " - "non-speculative instruction [sn:%i] has not graduated.\n", - tid, seq_num, *nonSpecSeqNum[tid]); + DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because" + "there is non-speculative instruction [sn:%i] has not " + "graduated.\n", tid, seq_num, *nonSpecSeqNum[tid]); return; } else if (inst->isNonSpeculative()) { *nonSpecInstActive[tid] = true; @@ -121,91 +133,134 @@ UseDefUnit::execute(int slot_idx) case ReadSrcReg: { int reg_idx = inst->_srcRegIdx[ud_idx]; - - DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source register idx %i (reg #%i).\n", + + DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source " + "register idx %i (reg #%i).\n", tid, ud_idx, reg_idx); - // Ask register dependency map if it is OK to read from Arch. Reg. File + // Ask register dependency map if it is OK to read from Arch. + // Reg. File if (regDepMap[tid]->canRead(reg_idx, inst)) { + + uniqueRegMap[reg_idx] = true; + if (inst->seqNum <= outReadSeqNum[tid]) { if (reg_idx < FP_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i from Register File:%i.\n", - tid, reg_idx, cpu->readIntReg(reg_idx,inst->readTid())); + DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i" + "from Register File:%i.\n", + tid, + reg_idx, + cpu->readIntReg(reg_idx,inst->readTid())); inst->setIntSrc(ud_idx, - cpu->readIntReg(reg_idx,inst->readTid())); + cpu->readIntReg(reg_idx, + inst->readTid())); } else if (reg_idx < Ctrl_Base_DepTag) { reg_idx -= FP_Base_DepTag; - DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i from Register File:%x (%08f).\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i" + "from Register File:%x (%08f).\n", tid, reg_idx, - cpu->readFloatRegBits(reg_idx, inst->readTid()), - cpu->readFloatReg(reg_idx, inst->readTid())); + cpu->readFloatRegBits(reg_idx, + inst->readTid()), + cpu->readFloatReg(reg_idx, + inst->readTid())); inst->setFloatSrc(ud_idx, - cpu->readFloatReg(reg_idx, inst->readTid())); + cpu->readFloatReg(reg_idx, + inst->readTid())); } else { reg_idx -= Ctrl_Base_DepTag; - DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i from Register File:%i.\n", - tid, reg_idx, cpu->readMiscReg(reg_idx, inst->readTid())); + DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i " + "from Register File:%i.\n", + tid, + reg_idx, + cpu->readMiscReg(reg_idx, + inst->readTid())); inst->setIntSrc(ud_idx, - cpu->readMiscReg(reg_idx, inst->readTid())); + cpu->readMiscReg(reg_idx, + inst->readTid())); } outReadSeqNum[tid] = maxSeqNum; ud_req->done(); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's" - " registers yet.\n", tid, outReadSeqNum[tid]); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to write\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because " + "of [sn:%i] hasnt read it's registers yet.\n", + tid, outReadSeqNum[tid]); + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " + "[sn:%i] to write\n", tid, outReadSeqNum[tid]); + ud_req->done(false); } } else { // Look for forwarding opportunities - DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, ud_idx, inst); + DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, + ud_idx, + inst); if (forward_inst) { if (inst->seqNum <= outReadSeqNum[tid]) { - int dest_reg_idx = forward_inst->getDestIdxNum(reg_idx); + int dest_reg_idx = + forward_inst->getDestIdxNum(reg_idx); if (reg_idx < FP_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from " + DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest." + " reg value 0x%x from " "[sn:%i] to [sn:%i] source #%i.\n", - tid, forward_inst->readIntResult(dest_reg_idx) , - forward_inst->seqNum, inst->seqNum, ud_idx); - inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx)); + tid, + forward_inst->readIntResult(dest_reg_idx), + forward_inst->seqNum, + inst->seqNum, ud_idx); + inst->setIntSrc(ud_idx, + forward_inst-> + readIntResult(dest_reg_idx)); } else if (reg_idx < Ctrl_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from " + DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest." + " reg value 0x%x from " "[sn:%i] to [sn:%i] source #%i.\n", - tid, forward_inst->readFloatResult(dest_reg_idx) , + tid, + forward_inst->readFloatResult(dest_reg_idx), forward_inst->seqNum, inst->seqNum, ud_idx); inst->setFloatSrc(ud_idx, - forward_inst->readFloatResult(dest_reg_idx)); + forward_inst-> + readFloatResult(dest_reg_idx)); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from " + DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest." + " reg value 0x%x from " "[sn:%i] to [sn:%i] source #%i.\n", - tid, forward_inst->readIntResult(dest_reg_idx) , - forward_inst->seqNum, inst->seqNum, ud_idx); - inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx)); + tid, + forward_inst->readIntResult(dest_reg_idx), + forward_inst->seqNum, + inst->seqNum, ud_idx); + inst->setIntSrc(ud_idx, + forward_inst-> + readIntResult(dest_reg_idx)); } outReadSeqNum[tid] = maxSeqNum; ud_req->done(); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's" + DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read " + "because of [sn:%i] hasnt read it's" " registers yet.\n", tid, outReadSeqNum[tid]); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to forward\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " + "[sn:%i] to forward\n", tid, outReadSeqNum[tid]); + ud_req->done(false); } } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i is not ready to read.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i" + "is not ready to read.\n", tid, reg_idx); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read register (idx=%i)\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read " + "register (idx=%i)\n", tid, reg_idx); outReadSeqNum[tid] = inst->seqNum; + ud_req->done(false); } } } @@ -216,12 +271,14 @@ UseDefUnit::execute(int slot_idx) int reg_idx = inst->_destRegIdx[ud_idx]; if (regDepMap[tid]->canWrite(reg_idx, inst)) { - DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i & Attempting to write to Register File.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i &" + "Attempting to write to Register File.\n", tid, reg_idx); - + uniqueRegMap[reg_idx] = true; if (inst->seqNum <= outReadSeqNum[tid]) { if (reg_idx < FP_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result 0x%x to register idx %i.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result " + "0x%x to register idx %i.\n", tid, inst->readIntResult(ud_idx), reg_idx); // Remove Dependencies @@ -236,33 +293,54 @@ UseDefUnit::execute(int slot_idx) reg_idx -= FP_Base_DepTag; - if (inst->resultType(ud_idx) == InOrderDynInst::Integer) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits Result 0x%x (bits:0x%x) to register idx %i.\n", - tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx); - - cpu->setFloatRegBits(reg_idx, // Check for FloatRegBits Here + if (inst->resultType(ud_idx) == + InOrderDynInst::Integer) { + DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits " + "Result 0x%x (bits:0x%x) to register " + "idx %i.\n", + tid, + inst->readFloatResult(ud_idx), + inst->readIntResult(ud_idx), + reg_idx); + + // Check for FloatRegBits Here + cpu->setFloatRegBits(reg_idx, inst->readIntResult(ud_idx), inst->readTid()); - } else if (inst->resultType(ud_idx) == InOrderDynInst::Float) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float Result 0x%x (bits:0x%x) to register idx %i.\n", - tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx); + } else if (inst->resultType(ud_idx) == + InOrderDynInst::Float) { + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float " + "Result 0x%x (bits:0x%x) to register " + "idx %i.\n", + tid, inst->readFloatResult(ud_idx), + inst->readIntResult(ud_idx), + reg_idx); cpu->setFloatReg(reg_idx, inst->readFloatResult(ud_idx), inst->readTid()); - } else if (inst->resultType(ud_idx) == InOrderDynInst::Double) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double Result 0x%x (bits:0x%x) to register idx %i.\n", - tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx); - - cpu->setFloatReg(reg_idx, // Check for FloatRegBits Here + } else if (inst->resultType(ud_idx) == + InOrderDynInst::Double) { + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double " + "Result 0x%x (bits:0x%x) to register " + "idx %i.\n", + tid, + inst->readFloatResult(ud_idx), + inst->readIntResult(ud_idx), + reg_idx); + + // Check for FloatRegBits Here + cpu->setFloatReg(reg_idx, inst->readFloatResult(ud_idx), inst->readTid()); } else { - panic("Result Type Not Set For [sn:%i] %s.\n", inst->seqNum, inst->instName()); + panic("Result Type Not Set For [sn:%i] %s.\n", + inst->seqNum, inst->instName()); } } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x to register idx %i.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x " + "to register idx %i.\n", tid, inst->readIntResult(ud_idx), reg_idx); // Remove Dependencies @@ -279,17 +357,23 @@ UseDefUnit::execute(int slot_idx) ud_req->done(); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because of [sn:%i] hasnt read it's" + DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because " + "of [sn:%i] hasnt read it's" " registers yet.\n", tid, outReadSeqNum); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to read\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " + "[sn:%i] to read\n", tid, outReadSeqNum); + ud_req->done(false); } } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is not ready to write.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is " + "not ready to write.\n", tid, reg_idx); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write register (idx=%i)\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write " + "register (idx=%i)\n", tid, reg_idx); outWriteSeqNum[tid] = inst->seqNum; + ud_req->done(false); } } break; @@ -323,12 +407,16 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); - regDepMap[tid]->remove(req_ptr->getInst()); - int req_slot_num = req_ptr->getSlot(); - if (latency > 0) + if (latency > 0) { + assert(0); + unscheduleEvent(req_slot_num); + } + + // Mark request for later removal + cpu->reqRemoveList.push(req_ptr); // Mark slot for removal from resource slot_remove_list.push_back(req_ptr->getSlot()); @@ -343,18 +431,29 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, } if (outReadSeqNum[tid] >= squash_seq_num) { - DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", tid); + DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", + tid); outReadSeqNum[tid] = maxSeqNum; } else if (outReadSeqNum[tid] != maxSeqNum) { - DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read Seq Num %i\n", + DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read " + "Seq Num %i\n", tid, outReadSeqNum[tid]); } if (outWriteSeqNum[tid] >= squash_seq_num) { - DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", tid); + DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", + tid); outWriteSeqNum[tid] = maxSeqNum; } else if (outWriteSeqNum[tid] != maxSeqNum) { - DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write Seq Num %i\n", + DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write " + "Seq Num %i\n", tid, outWriteSeqNum[tid]); } } + +void +UseDefUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) +{ + uniqueRegsPerSwitch = uniqueRegMap.size(); + uniqueRegMap.clear(); +} diff --git a/src/cpu/inorder/resources/use_def.hh b/src/cpu/inorder/resources/use_def.hh index 6c76d8ab5..41d758dd7 100644 --- a/src/cpu/inorder/resources/use_def.hh +++ b/src/cpu/inorder/resources/use_def.hh @@ -68,8 +68,12 @@ class UseDefUnit : public Resource { virtual void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid); + const InstSeqNum maxSeqNum; + void regStats(); + protected: RegDepMap *regDepMap[ThePipeline::MaxThreads]; @@ -84,14 +88,18 @@ class UseDefUnit : public Resource { InstSeqNum floatRegSize[ThePipeline::MaxThreads]; + Stats::Average uniqueRegsPerSwitch; + std::map<unsigned, bool> uniqueRegMap; + public: class UseDefRequest : public ResourceRequest { public: typedef ThePipeline::DynInstPtr DynInstPtr; public: - UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, int res_idx, - int slot_num, unsigned cmd, int use_def_idx) + UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, + int res_idx, int slot_num, unsigned cmd, + int use_def_idx) : ResourceRequest(res, inst, stage_num, res_idx, slot_num, cmd), useDefIdx(use_def_idx) { } diff --git a/src/cpu/inorder/thread_context.cc b/src/cpu/inorder/thread_context.cc index 41d16b633..d2f511b9d 100644 --- a/src/cpu/inorder/thread_context.cc +++ b/src/cpu/inorder/thread_context.cc @@ -242,21 +242,21 @@ InOrderThreadContext::setRegOtherThread(int misc_reg, const MiscReg &val, void InOrderThreadContext::setPC(uint64_t val) { - DPRINTF(InOrderCPU, "Setting PC to %08p\n", val); + DPRINTF(InOrderCPU, "[tid:%i] Setting PC to %08p\n", thread->readTid(), val); cpu->setPC(val, thread->readTid()); } void InOrderThreadContext::setNextPC(uint64_t val) { - DPRINTF(InOrderCPU, "Setting NPC to %08p\n", val); + DPRINTF(InOrderCPU, "[tid:%i] Setting NPC to %08p\n", thread->readTid(), val); cpu->setNextPC(val, thread->readTid()); } void InOrderThreadContext::setNextNPC(uint64_t val) { - DPRINTF(InOrderCPU, "Setting NNPC to %08p\n", val); + DPRINTF(InOrderCPU, "[tid:%i] Setting NNPC to %08p\n", thread->readTid(), val); cpu->setNextNPC(val, thread->readTid()); } diff --git a/src/cpu/inorder/thread_context.hh b/src/cpu/inorder/thread_context.hh index 820f3077f..6dd5f192f 100644 --- a/src/cpu/inorder/thread_context.hh +++ b/src/cpu/inorder/thread_context.hh @@ -64,7 +64,6 @@ class InOrderThreadContext : public ThreadContext /** Pointer to the thread state that this TC corrseponds to. */ InOrderThreadState *thread; - /** Returns a pointer to the ITB. */ /** @TODO: PERF: Should we bind this to a pointer in constructor? */ TheISA::TLB *getITBPtr() { return cpu->getITBPtr(); } diff --git a/src/cpu/inorder/thread_state.hh b/src/cpu/inorder/thread_state.hh index 422df30aa..0a171a99f 100644 --- a/src/cpu/inorder/thread_state.hh +++ b/src/cpu/inorder/thread_state.hh @@ -79,14 +79,14 @@ class InOrderThreadState : public ThreadState { #if FULL_SYSTEM InOrderThreadState(InOrderCPU *_cpu, ThreadID _thread_num) : ThreadState(reinterpret_cast<BaseCPU*>(_cpu), _thread_num), - cpu(_cpu), inSyscall(0), trapPending(0) + cpu(_cpu), inSyscall(0), trapPending(0), lastGradIsBranch(false) { } #else InOrderThreadState(InOrderCPU *_cpu, ThreadID _thread_num, Process *_process) : ThreadState(reinterpret_cast<BaseCPU*>(_cpu), _thread_num, _process), - cpu(_cpu), inSyscall(0), trapPending(0) + cpu(_cpu), inSyscall(0), trapPending(0), lastGradIsBranch(false) { } #endif @@ -105,10 +105,15 @@ class InOrderThreadState : public ThreadState { /** Returns a pointer to the TC of this thread. */ ThreadContext *getTC() { return tc; } + /** Return the thread id */ int readTid() { return threadId(); } - /** Pointer to the last graduated instruction in the thread */ - //DynInstPtr lastGradInst; + + /** Is last instruction graduated a branch? */ + bool lastGradIsBranch; + Addr lastBranchPC; + Addr lastBranchNextPC; + Addr lastBranchNextNPC; }; #endif // __CPU_INORDER_THREAD_STATE_HH__ diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini index ca54a43c1..95f95a25b 100644 --- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini +++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini @@ -63,6 +63,7 @@ progress_interval=0 stageTracing=false stageWidth=1 system=system +threadModel=SMT tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side @@ -78,7 +79,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -113,7 +113,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -148,7 +147,6 @@ hash_delay=1 latency=10000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=100000 diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout index b0f68db24..2c2b59190 100755 --- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout +++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout @@ -5,10 +5,10 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jul 4 2009 20:43:52 -M5 revision 20167772fb15 6281 default tip -M5 started Jul 4 2009 20:43:52 -M5 executing on tater +M5 compiled Jan 30 2010 14:58:44 +M5 revision 4b602939e245 6707 default inorder_vortex_alpha qtip tip +M5 started Jan 30 2010 14:58:45 +M5 executing on zooks command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/inorder-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt index 2791e3ab6..f03c66752 100644 --- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt +++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt @@ -1,88 +1,87 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 66323 # Simulator instruction rate (inst/s) -host_mem_usage 296324 # Number of bytes of host memory used -host_seconds 1331.98 # Real time elapsed on the host -host_tick_rate 81990812 # Simulator tick rate (ticks/s) +host_inst_rate 51950 # Simulator instruction rate (inst/s) +host_mem_usage 166756 # Number of bytes of host memory used +host_seconds 1700.48 # Real time elapsed on the host +host_tick_rate 63220517 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 88340673 # Number of instructions simulated -sim_seconds 0.109210 # Number of seconds simulated -sim_ticks 109210014500 # Number of ticks simulated +sim_seconds 0.107505 # Number of seconds simulated +sim_ticks 107505320500 # Number of ticks simulated system.cpu.AGEN-Unit.instReqsProcessed 35224018 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.instReqsProcessed 88340674 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.predictedNotTaken 10443271 # Number of Branches Predicted As Not Taken (False). -system.cpu.Branch-Predictor.predictedTaken 3311206 # Number of Branches Predicted As Taken (True). -system.cpu.Decode-Unit.instReqsProcessed 88340674 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.instReqsProcessed 88523379 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.predictedNotTaken 10466150 # Number of Branches Predicted As Not Taken (False). +system.cpu.Branch-Predictor.predictedTaken 3314731 # Number of Branches Predicted As Taken (True). +system.cpu.Decode-Unit.instReqsProcessed 88523379 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.cyclesExecuted 53070972 # Number of Cycles Execution Unit was used. system.cpu.Execution-Unit.instReqsProcessed 53075554 # Number of Instructions Requests that completed in this resource. -system.cpu.Execution-Unit.predictedNotTakenIncorrect 4515835 # Number of Branches Incorrectly Predicted As Not Taken). -system.cpu.Execution-Unit.predictedTakenIncorrect 1659774 # Number of Branches Incorrectly Predicted As Taken. -system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Seq-Unit.instReqsProcessed 184507615 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.predictedNotTakenIncorrect 4515839 # Number of Branches Incorrectly Predicted As Not Taken). +system.cpu.Execution-Unit.predictedTakenIncorrect 1659770 # Number of Branches Incorrectly Predicted As Taken. +system.cpu.Execution-Unit.utilization 0.246830 # Utilization of Execution Unit (cycles / totalCycles). +system.cpu.Fetch-Seq-Unit.instReqsProcessed 186350086 # Number of Instructions Requests that completed in this resource. system.cpu.Graduation-Unit.instReqsProcessed 88340673 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.divInstReqsProcessed 0 # Number of Divide Requests Processed. system.cpu.Mult-Div-Unit.instReqsProcessed 82202 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.multInstReqsProcessed 41101 # Number of Multiply Requests Processed. -system.cpu.RegFile-Manager.instReqsProcessed 158796488 # Number of Instructions Requests that completed in this resource. +system.cpu.RegFile-Manager.instReqsProcessed 165783241 # Number of Instructions Requests that completed in this resource. +system.cpu.activity 86.931340 # Percentage of cycles cpu is active system.cpu.committedInsts 88340673 # Number of Instructions Simulated (Per-Thread) system.cpu.committedInsts_total 88340673 # Number of Instructions Simulated (Total) -system.cpu.cpi 2.472474 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 2.472474 # CPI: Total CPI of All Threads +system.cpu.contextSwitches 1 # Number of context switches +system.cpu.cpi 2.433881 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 2.433881 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 20276638 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 38181.240129 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 35069.166968 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 20215854 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 2320808500 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.002998 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 60784 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 18 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 2131013000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_avg_miss_latency 38009.956226 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 34917.034197 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 20215872 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 2309713000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.002997 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 60766 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 2121768500 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.002997 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 60766 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 14613377 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 56049.825426 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53049.825426 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 56040.926479 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53040.926479 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 14463584 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 8395871500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 8394538500 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.010250 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 149793 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 7946492500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 7945159500 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.010250 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 149793 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 169.741509 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 169.741568 # Average number of references to valid blocks. system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 34890015 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 50891.977756 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 47860.720748 # average overall mshr miss latency -system.cpu.dcache.demand_hits 34679438 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 10716680000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_avg_miss_latency 50837.302134 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 47810.485422 # average overall mshr miss latency +system.cpu.dcache.demand_hits 34679456 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 10704251500 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.006035 # miss rate for demand accesses -system.cpu.dcache.demand_misses 210577 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 18 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 10077505500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_misses 210559 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 10066928000 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.006035 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 210559 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.overall_accesses 34890015 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 50891.977756 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 47860.720748 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 50837.302134 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 47810.485422 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 34679438 # number of overall hits -system.cpu.dcache.overall_miss_latency 10716680000 # number of overall miss cycles +system.cpu.dcache.overall_hits 34679456 # number of overall hits +system.cpu.dcache.overall_miss_latency 10704251500 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.006035 # miss rate for overall accesses -system.cpu.dcache.overall_misses 210577 # number of overall misses -system.cpu.dcache.overall_mshr_hits 18 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 10077505500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_misses 210559 # number of overall misses +system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 10066928000 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.006035 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 210559 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -90,9 +89,9 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0 system.cpu.dcache.replacements 200248 # number of replacements system.cpu.dcache.sampled_refs 204344 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 4077.182458 # Cycle average of tags in use -system.cpu.dcache.total_refs 34685659 # Total number of references to valid blocks. -system.cpu.dcache.warmup_cycle 848449000 # Cycle when the warmup percentage was hit. +system.cpu.dcache.tagsinuse 4076.864414 # Cycle average of tags in use +system.cpu.dcache.total_refs 34685671 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 848885000 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 147714 # number of writebacks system.cpu.dcache_port.instReqsProcessed 35224018 # Number of Instructions Requests that completed in this resource. system.cpu.dtb.data_accesses 34987415 # DTB accesses @@ -111,70 +110,71 @@ system.cpu.dtb.write_accesses 14620629 # DT system.cpu.dtb.write_acv 0 # DTB write access violations system.cpu.dtb.write_hits 14613377 # DTB write hits system.cpu.dtb.write_misses 7252 # DTB write misses -system.cpu.icache.ReadReq_accesses 96166938 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 19084.949617 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 15849.033723 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 96087744 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 1511413500 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_accesses 97826463 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 19024.038820 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 15840.795350 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 97745885 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1532919000 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.000824 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 79194 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 1266 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 1235083500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.000810 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 80578 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 2650 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 1234441500 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.000797 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 77928 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 1233.032338 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 1254.310197 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 96166938 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 19084.949617 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 15849.033723 # average overall mshr miss latency -system.cpu.icache.demand_hits 96087744 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 1511413500 # number of demand (read+write) miss cycles +system.cpu.icache.demand_accesses 97826463 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 19024.038820 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 15840.795350 # average overall mshr miss latency +system.cpu.icache.demand_hits 97745885 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1532919000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.000824 # miss rate for demand accesses -system.cpu.icache.demand_misses 79194 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 1266 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 1235083500 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.000810 # mshr miss rate for demand accesses +system.cpu.icache.demand_misses 80578 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 2650 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 1234441500 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.000797 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 77928 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 96166938 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 19084.949617 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 15849.033723 # average overall mshr miss latency +system.cpu.icache.overall_accesses 97826463 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 19024.038820 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 15840.795350 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 96087744 # number of overall hits -system.cpu.icache.overall_miss_latency 1511413500 # number of overall miss cycles +system.cpu.icache.overall_hits 97745885 # number of overall hits +system.cpu.icache.overall_miss_latency 1532919000 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.000824 # miss rate for overall accesses -system.cpu.icache.overall_misses 79194 # number of overall misses -system.cpu.icache.overall_mshr_hits 1266 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 1235083500 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.000810 # mshr miss rate for overall accesses +system.cpu.icache.overall_misses 80578 # number of overall misses +system.cpu.icache.overall_mshr_hits 2650 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 1234441500 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.000797 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 77928 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.replacements 75882 # number of replacements system.cpu.icache.sampled_refs 77928 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 1874.320715 # Cycle average of tags in use -system.cpu.icache.total_refs 96087744 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 1873.747475 # Cycle average of tags in use +system.cpu.icache.total_refs 97745885 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.icache_port.instReqsProcessed 96166940 # Number of Instructions Requests that completed in this resource. -system.cpu.ipc 0.404453 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.404453 # IPC: Total IPC of All Threads +system.cpu.icache_port.instReqsProcessed 97826706 # Number of Instructions Requests that completed in this resource. +system.cpu.idleCycles 28099010 # Number of cycles cpu's stages were not processed +system.cpu.ipc 0.410867 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.410867 # IPC: Total IPC of All Threads system.cpu.itb.data_accesses 0 # DTB accesses system.cpu.itb.data_acv 0 # DTB access violations system.cpu.itb.data_hits 0 # DTB hits system.cpu.itb.data_misses 0 # DTB misses -system.cpu.itb.fetch_accesses 96170872 # ITB accesses +system.cpu.itb.fetch_accesses 97830397 # ITB accesses system.cpu.itb.fetch_acv 0 # ITB acv -system.cpu.itb.fetch_hits 96166938 # ITB hits +system.cpu.itb.fetch_hits 97826463 # ITB hits system.cpu.itb.fetch_misses 3934 # ITB misses system.cpu.itb.read_accesses 0 # DTB read accesses system.cpu.itb.read_acv 0 # DTB read access violations @@ -185,31 +185,31 @@ system.cpu.itb.write_acv 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.l2cache.ReadExReq_accesses 143578 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 52038.849963 # average ReadExReq miss latency -system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000.083578 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 7471634000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_avg_miss_latency 52034.768558 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000.222875 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 7471048000 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_misses 143578 # number of ReadExReq misses -system.cpu.l2cache.ReadExReq_mshr_miss_latency 5743132000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_latency 5743152000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 143578 # number of ReadExReq MSHR misses system.cpu.l2cache.ReadReq_accesses 138694 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52316.057051 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40003.485162 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 52087.681159 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40004.623879 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 95224 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 2274179000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 2264251500 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 0.313424 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 43470 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 1738951500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 1739001000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 0.313424 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 43470 # number of ReadReq MSHR misses system.cpu.l2cache.UpgradeReq_accesses 6215 # number of UpgradeReq accesses(hits+misses) -system.cpu.l2cache.UpgradeReq_avg_miss_latency 51993.805310 # average UpgradeReq miss latency -system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000.884956 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 323141500 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_avg_miss_latency 51862.831858 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40002.815768 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 322327500 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_misses 6215 # number of UpgradeReq misses -system.cpu.l2cache.UpgradeReq_mshr_miss_latency 248605500 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 248617500 # number of UpgradeReq MSHR miss cycles system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_mshr_misses 6215 # number of UpgradeReq MSHR misses system.cpu.l2cache.Writeback_accesses 147714 # number of Writeback accesses(hits+misses) @@ -223,29 +223,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 # system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 282272 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52103.272957 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 40000.874107 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 52047.065459 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 40001.245670 # average overall mshr miss latency system.cpu.l2cache.demand_hits 95224 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 9745813000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 9735299500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 0.662652 # miss rate for demand accesses system.cpu.l2cache.demand_misses 187048 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 7482083500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 7482153000 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 0.662652 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 187048 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 282272 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52103.272957 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 40000.874107 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 52047.065459 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 40001.245670 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 95224 # number of overall hits -system.cpu.l2cache.overall_miss_latency 9745813000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 9735299500 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 0.662652 # miss rate for overall accesses system.cpu.l2cache.overall_misses 187048 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 7482083500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 7482153000 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 0.662652 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 187048 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -253,16 +253,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0 system.cpu.l2cache.replacements 147733 # number of replacements system.cpu.l2cache.sampled_refs 172939 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 18262.944082 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 18257.402494 # Cycle average of tags in use system.cpu.l2cache.total_refs 110306 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 120636 # number of writebacks -system.cpu.numCycles 218420030 # number of cpu cycles simulated +system.cpu.numCycles 215010642 # number of cpu cycles simulated +system.cpu.runCycles 186911632 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) -system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT) +system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.threadCycles 218420030 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.stage-0.idleCycles 117180245 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 97830397 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 45.500258 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 126487263 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 88523379 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 41.171627 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 125185318 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 89825324 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 41.777153 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 179779372 # Number of cycles 0 instructions are processed. +system.cpu.stage-3.runCycles 35231270 # Number of cycles 1+ instructions are processed. +system.cpu.stage-3.utilization 16.385826 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 126669969 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 88340673 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 41.086651 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 215010642 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) system.cpu.workload.PROG:num_syscalls 4583 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini index f04bd741b..ca2d0ba7e 100644 --- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini +++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini @@ -63,6 +63,7 @@ progress_interval=0 stageTracing=false stageWidth=1 system=system +threadModel=SMT tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side @@ -78,7 +79,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -113,7 +113,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -148,7 +147,6 @@ hash_delay=1 latency=10000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=100000 diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout index 4a762fa1c..309f6bf40 100755 --- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout +++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout @@ -5,10 +5,10 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jul 4 2009 20:43:52 -M5 revision 20167772fb15 6281 default tip -M5 started Jul 4 2009 20:43:52 -M5 executing on tater +M5 compiled Jan 29 2010 09:29:58 +M5 revision a196f8cf520a 6706 default qtip tip inorder_twolf_alpha +M5 started Jan 29 2010 09:31:14 +M5 executing on zooks command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing Couldn't unlink build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing/smred.sav Couldn't unlink build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing/smred.sv2 diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt index c58b2a060..0453fd079 100644 --- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt +++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt @@ -1,88 +1,87 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 69440 # Simulator instruction rate (inst/s) -host_mem_usage 210892 # Number of bytes of host memory used -host_seconds 1323.48 # Real time elapsed on the host -host_tick_rate 76516395 # Simulator tick rate (ticks/s) +host_inst_rate 55182 # Simulator instruction rate (inst/s) +host_mem_usage 156168 # Number of bytes of host memory used +host_seconds 1665.47 # Real time elapsed on the host +host_tick_rate 59164617 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 91903056 # Number of instructions simulated -sim_seconds 0.101268 # Number of seconds simulated -sim_ticks 101268061000 # Number of ticks simulated +sim_seconds 0.098537 # Number of seconds simulated +sim_ticks 98536744000 # Number of ticks simulated system.cpu.AGEN-Unit.instReqsProcessed 26537108 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.instReqsProcessed 91903057 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.predictedNotTaken 8198984 # Number of Branches Predicted As Not Taken (False). -system.cpu.Branch-Predictor.predictedTaken 2041701 # Number of Branches Predicted As Taken (True). -system.cpu.Decode-Unit.instReqsProcessed 91903057 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.instReqsProcessed 92657148 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.predictedNotTaken 8232810 # Number of Branches Predicted As Not Taken (False). +system.cpu.Branch-Predictor.predictedTaken 2041716 # Number of Branches Predicted As Taken (True). +system.cpu.Decode-Unit.instReqsProcessed 92657148 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.cyclesExecuted 64907308 # Number of Cycles Execution Unit was used. system.cpu.Execution-Unit.instReqsProcessed 64907696 # Number of Instructions Requests that completed in this resource. system.cpu.Execution-Unit.predictedNotTakenIncorrect 3739118 # Number of Branches Incorrectly Predicted As Not Taken). system.cpu.Execution-Unit.predictedTakenIncorrect 1029596 # Number of Branches Incorrectly Predicted As Taken. -system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Seq-Unit.instReqsProcessed 189586934 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.utilization 0.329356 # Utilization of Execution Unit (cycles / totalCycles). +system.cpu.Fetch-Seq-Unit.instReqsProcessed 191370621 # Number of Instructions Requests that completed in this resource. system.cpu.Graduation-Unit.instReqsProcessed 91903056 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.divInstReqsProcessed 0 # Number of Divide Requests Processed. system.cpu.Mult-Div-Unit.instReqsProcessed 916504 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.multInstReqsProcessed 458252 # Number of Multiply Requests Processed. -system.cpu.RegFile-Manager.instReqsProcessed 188816950 # Number of Instructions Requests that completed in this resource. +system.cpu.RegFile-Manager.instReqsProcessed 196152134 # Number of Instructions Requests that completed in this resource. +system.cpu.activity 96.743392 # Percentage of cycles cpu is active system.cpu.committedInsts 91903056 # Number of Instructions Simulated (Per-Thread) system.cpu.committedInsts_total 91903056 # Number of Instructions Simulated (Total) -system.cpu.cpi 2.203802 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 2.203802 # CPI: Total CPI of All Threads +system.cpu.contextSwitches 1 # Number of context switches +system.cpu.cpi 2.144363 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 2.144363 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 19996198 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 51623.700624 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 48550.526316 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 19995717 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 24831000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_avg_miss_latency 51569.473684 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 48547.368421 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 19995723 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 24495500 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.000024 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 481 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 6 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 23061500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_misses 475 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 23060000 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.000024 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 475 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 6501103 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 56415.277031 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53415.277031 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 56295.857988 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53295.857988 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 6499244 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 104876000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 104654000 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.000286 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 1859 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 99299000 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 99077000 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.000286 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 1859 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 11918.612686 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 11918.613585 # Average number of references to valid blocks. system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 26497301 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 55430.341880 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 52425.235647 # average overall mshr miss latency -system.cpu.dcache.demand_hits 26494961 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 129707000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_avg_miss_latency 55333.976007 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 52329.477292 # average overall mshr miss latency +system.cpu.dcache.demand_hits 26494967 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 129149500 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.000088 # miss rate for demand accesses -system.cpu.dcache.demand_misses 2340 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 6 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 122360500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_misses 2334 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 122137000 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.000088 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 2334 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.overall_accesses 26497301 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 55430.341880 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 52425.235647 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 55333.976007 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 52329.477292 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 26494961 # number of overall hits -system.cpu.dcache.overall_miss_latency 129707000 # number of overall miss cycles +system.cpu.dcache.overall_hits 26494967 # number of overall hits +system.cpu.dcache.overall_miss_latency 129149500 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.000088 # miss rate for overall accesses -system.cpu.dcache.overall_misses 2340 # number of overall misses -system.cpu.dcache.overall_mshr_hits 6 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 122360500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_misses 2334 # number of overall misses +system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 122137000 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.000088 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 2334 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -90,8 +89,8 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0 system.cpu.dcache.replacements 157 # number of replacements system.cpu.dcache.sampled_refs 2223 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 1441.819572 # Cycle average of tags in use -system.cpu.dcache.total_refs 26495076 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 1441.684134 # Cycle average of tags in use +system.cpu.dcache.total_refs 26495078 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 104 # number of writebacks system.cpu.dcache_port.instReqsProcessed 26537108 # Number of Instructions Requests that completed in this resource. @@ -111,70 +110,71 @@ system.cpu.dtb.write_accesses 6501126 # DT system.cpu.dtb.write_acv 0 # DTB write access violations system.cpu.dtb.write_hits 6501103 # DTB write hits system.cpu.dtb.write_misses 23 # DTB write misses -system.cpu.icache.ReadReq_accesses 97683877 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 27282.787360 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 24026.266636 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 97675238 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 235696000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_accesses 98713473 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 27258.057090 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 23994.339402 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 98704785 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 236818000 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.000088 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 8639 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 73 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 205809000 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.000088 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 8566 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_misses 8688 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 120 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 205583500 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.000087 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 8568 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked -system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 11402.666122 # Average number of references to valid blocks. +system.cpu.icache.avg_blocked_cycles::no_targets 1000 # average number of cycles each access was blocked +system.cpu.icache.avg_refs 11520.166317 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked -system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked::no_targets 1 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked -system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles::no_targets 1000 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 97683877 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 27282.787360 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 24026.266636 # average overall mshr miss latency -system.cpu.icache.demand_hits 97675238 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 235696000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_accesses 98713473 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 27258.057090 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 23994.339402 # average overall mshr miss latency +system.cpu.icache.demand_hits 98704785 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 236818000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.000088 # miss rate for demand accesses -system.cpu.icache.demand_misses 8639 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 73 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 205809000 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.000088 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 8566 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_misses 8688 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 120 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 205583500 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.000087 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 8568 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 97683877 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 27282.787360 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 24026.266636 # average overall mshr miss latency +system.cpu.icache.overall_accesses 98713473 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 27258.057090 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 23994.339402 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 97675238 # number of overall hits -system.cpu.icache.overall_miss_latency 235696000 # number of overall miss cycles +system.cpu.icache.overall_hits 98704785 # number of overall hits +system.cpu.icache.overall_miss_latency 236818000 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.000088 # miss rate for overall accesses -system.cpu.icache.overall_misses 8639 # number of overall misses -system.cpu.icache.overall_mshr_hits 73 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 205809000 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.000088 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 8566 # number of overall MSHR misses +system.cpu.icache.overall_misses 8688 # number of overall misses +system.cpu.icache.overall_mshr_hits 120 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 205583500 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.000087 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 8568 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses -system.cpu.icache.replacements 6732 # number of replacements -system.cpu.icache.sampled_refs 8566 # Sample count of references to valid blocks. +system.cpu.icache.replacements 6734 # number of replacements +system.cpu.icache.sampled_refs 8568 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 1428.614683 # Cycle average of tags in use -system.cpu.icache.total_refs 97675238 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 1428.229557 # Cycle average of tags in use +system.cpu.icache.total_refs 98704785 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.icache_port.instReqsProcessed 97683876 # Number of Instructions Requests that completed in this resource. -system.cpu.ipc 0.453761 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.453761 # IPC: Total IPC of All Threads +system.cpu.icache_port.instReqsProcessed 98713472 # Number of Instructions Requests that completed in this resource. +system.cpu.idleCycles 6417911 # Number of cycles cpu's stages were not processed +system.cpu.ipc 0.466339 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.466339 # IPC: Total IPC of All Threads system.cpu.itb.data_accesses 0 # DTB accesses system.cpu.itb.data_acv 0 # DTB access violations system.cpu.itb.data_hits 0 # DTB hits system.cpu.itb.data_misses 0 # DTB misses -system.cpu.itb.fetch_accesses 97683924 # ITB accesses +system.cpu.itb.fetch_accesses 98713520 # ITB accesses system.cpu.itb.fetch_acv 0 # ITB acv -system.cpu.itb.fetch_hits 97683877 # ITB hits +system.cpu.itb.fetch_hits 98713473 # ITB hits system.cpu.itb.fetch_misses 47 # ITB misses system.cpu.itb.read_accesses 0 # DTB read accesses system.cpu.itb.read_acv 0 # DTB read access violations @@ -185,84 +185,100 @@ system.cpu.itb.write_acv 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.l2cache.ReadExReq_accesses 1748 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 52413.043478 # average ReadExReq miss latency -system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40003.432494 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 91618000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_avg_miss_latency 52296.624714 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40005.720824 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 91414500 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_misses 1748 # number of ReadExReq misses -system.cpu.l2cache.ReadExReq_mshr_miss_latency 69926000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_latency 69930000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 1748 # number of ReadExReq MSHR misses -system.cpu.l2cache.ReadReq_accesses 9041 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52240.613777 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40013.548808 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_hits 5978 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 160013000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 0.338790 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_accesses 9043 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 52161.443030 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40020.078355 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_hits 5980 # number of ReadReq hits +system.cpu.l2cache.ReadReq_miss_latency 159770500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.338715 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 3063 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 122561500 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.338790 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_miss_latency 122581500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.338715 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 3063 # number of ReadReq MSHR misses system.cpu.l2cache.UpgradeReq_accesses 111 # number of UpgradeReq accesses(hits+misses) -system.cpu.l2cache.UpgradeReq_avg_miss_latency 52414.414414 # average UpgradeReq miss latency -system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 5818000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_avg_miss_latency 52216.216216 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40009.009009 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 5796000 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_misses 111 # number of UpgradeReq misses -system.cpu.l2cache.UpgradeReq_mshr_miss_latency 4440000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 4441000 # number of UpgradeReq MSHR miss cycles system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_mshr_misses 111 # number of UpgradeReq MSHR misses system.cpu.l2cache.Writeback_accesses 104 # number of Writeback accesses(hits+misses) system.cpu.l2cache.Writeback_hits 104 # number of Writeback hits system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 1.968317 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 1.968977 # Average number of references to valid blocks. system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 10789 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52303.263355 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 40009.873207 # average overall mshr miss latency -system.cpu.l2cache.demand_hits 5978 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 251631000 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 0.445917 # miss rate for demand accesses +system.cpu.l2cache.demand_accesses 10791 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 52210.559135 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 40014.861775 # average overall mshr miss latency +system.cpu.l2cache.demand_hits 5980 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 251185000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.445834 # miss rate for demand accesses system.cpu.l2cache.demand_misses 4811 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 192487500 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 0.445917 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_miss_latency 192511500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.445834 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 4811 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 10789 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52303.263355 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 40009.873207 # average overall mshr miss latency +system.cpu.l2cache.overall_accesses 10791 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 52210.559135 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 40014.861775 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.l2cache.overall_hits 5978 # number of overall hits -system.cpu.l2cache.overall_miss_latency 251631000 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 0.445917 # miss rate for overall accesses +system.cpu.l2cache.overall_hits 5980 # number of overall hits +system.cpu.l2cache.overall_miss_latency 251185000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.445834 # miss rate for overall accesses system.cpu.l2cache.overall_misses 4811 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 192487500 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 0.445917 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_miss_latency 192511500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.445834 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 4811 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 3030 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 2039.371088 # Cycle average of tags in use -system.cpu.l2cache.total_refs 5964 # Total number of references to valid blocks. +system.cpu.l2cache.tagsinuse 2038.814805 # Cycle average of tags in use +system.cpu.l2cache.total_refs 5966 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 202536123 # number of cpu cycles simulated +system.cpu.numCycles 197073489 # number of cpu cycles simulated +system.cpu.runCycles 190655578 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) -system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT) +system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.threadCycles 202536123 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.stage-0.idleCycles 98359969 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 98713520 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 50.089700 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 104416341 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 92657148 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 47.016546 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 103581004 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 93492485 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 47.440417 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 170536358 # Number of cycles 0 instructions are processed. +system.cpu.stage-3.runCycles 26537131 # Number of cycles 1+ instructions are processed. +system.cpu.stage-3.utilization 13.465602 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 105170433 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 91903056 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 46.633901 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 197073489 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) system.cpu.workload.PROG:num_syscalls 389 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini index b30560264..5ab5381fc 100644 --- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini @@ -63,6 +63,7 @@ progress_interval=0 stageTracing=false stageWidth=1 system=system +threadModel=SMT tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side @@ -78,7 +79,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -113,7 +113,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -148,7 +147,6 @@ hash_delay=1 latency=10000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=100000 @@ -190,7 +188,7 @@ egid=100 env= errout=cerr euid=100 -executable=tests/test-progs/hello/bin/alpha/linux/hello +executable=/dist/m5/regression/test-progs/hello/bin/alpha/linux/hello gid=100 input=cin max_stack_size=67108864 diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout index 18efdaa9e..4ad6292c5 100755 --- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout +++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled May 12 2009 11:18:39 -M5 revision 21550d38f156 6195 default qtip tip inorder-hello-regress -M5 started May 12 2009 11:18:40 +M5 compiled Jan 29 2010 09:13:03 +M5 revision 23ae96d82d21+ 6704+ default qtip tip inorder_hello_alpha +M5 started Jan 29 2010 09:13:04 M5 executing on zooks command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/inorder-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello world! -Exiting @ tick 31646000 because target called exit() +Exiting @ tick 31286000 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt index a88b80594..b9a12afbb 100644 --- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt @@ -1,53 +1,53 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 23793 # Simulator instruction rate (inst/s) -host_mem_usage 152032 # Number of bytes of host memory used -host_seconds 0.27 # Real time elapsed on the host -host_tick_rate 117464960 # Simulator tick rate (ticks/s) +host_inst_rate 23048 # Simulator instruction rate (inst/s) +host_mem_usage 153228 # Number of bytes of host memory used +host_seconds 0.28 # Real time elapsed on the host +host_tick_rate 112412599 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 6404 # Number of instructions simulated -sim_seconds 0.000032 # Number of seconds simulated -sim_ticks 31646000 # Number of ticks simulated +sim_seconds 0.000031 # Number of seconds simulated +sim_ticks 31286000 # Number of ticks simulated system.cpu.AGEN-Unit.instReqsProcessed 2050 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.instReqsProcessed 6405 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.predictedNotTaken 909 # Number of Branches Predicted As Not Taken (False). -system.cpu.Branch-Predictor.predictedTaken 142 # Number of Branches Predicted As Taken (True). -system.cpu.Decode-Unit.instReqsProcessed 6405 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.instReqsProcessed 6581 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.predictedNotTaken 924 # Number of Branches Predicted As Not Taken (False). +system.cpu.Branch-Predictor.predictedTaken 143 # Number of Branches Predicted As Taken (True). +system.cpu.Decode-Unit.instReqsProcessed 6581 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.cyclesExecuted 4340 # Number of Cycles Execution Unit was used. system.cpu.Execution-Unit.instReqsProcessed 4354 # Number of Instructions Requests that completed in this resource. -system.cpu.Execution-Unit.predictedNotTakenIncorrect 607 # Number of Branches Incorrectly Predicted As Not Taken). -system.cpu.Execution-Unit.predictedTakenIncorrect 124 # Number of Branches Incorrectly Predicted As Taken. -system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Seq-Unit.instReqsProcessed 13560 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.predictedNotTakenIncorrect 608 # Number of Branches Incorrectly Predicted As Not Taken). +system.cpu.Execution-Unit.predictedTakenIncorrect 123 # Number of Branches Incorrectly Predicted As Taken. +system.cpu.Execution-Unit.utilization 0.069359 # Utilization of Execution Unit (cycles / totalCycles). +system.cpu.Fetch-Seq-Unit.instReqsProcessed 13858 # Number of Instructions Requests that completed in this resource. system.cpu.Graduation-Unit.instReqsProcessed 6404 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.divInstReqsProcessed 0 # Number of Divide Requests Processed. system.cpu.Mult-Div-Unit.instReqsProcessed 2 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.multInstReqsProcessed 1 # Number of Multiply Requests Processed. -system.cpu.RegFile-Manager.instReqsProcessed 12884 # Number of Instructions Requests that completed in this resource. +system.cpu.RegFile-Manager.instReqsProcessed 19961 # Number of Instructions Requests that completed in this resource. +system.cpu.activity 22.407428 # Percentage of cycles cpu is active system.cpu.committedInsts 6404 # Number of Instructions Simulated (Per-Thread) system.cpu.committedInsts_total 6404 # Number of Instructions Simulated (Total) -system.cpu.cpi 9.883354 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 9.883354 # CPI: Total CPI of All Threads +system.cpu.contextSwitches 1 # Number of context switches +system.cpu.cpi 9.770924 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 9.770924 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 1185 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 56352.631579 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53352.631579 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_miss_latency 56347.368421 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53347.368421 # average ReadReq mshr miss latency system.cpu.dcache.ReadReq_hits 1090 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 5353500 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency 5353000 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.080169 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 95 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 5068500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency 5068000 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.080169 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 95 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 865 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 56419.540230 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53419.540230 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 56074.712644 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53074.712644 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 778 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 4908500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 4878500 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.100578 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 87 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 4647500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 4617500 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.100578 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 87 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked @@ -59,29 +59,29 @@ system.cpu.dcache.blocked_cycles::no_mshrs 0 # system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 2050 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 56384.615385 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 53384.615385 # average overall mshr miss latency +system.cpu.dcache.demand_avg_miss_latency 56217.032967 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 53217.032967 # average overall mshr miss latency system.cpu.dcache.demand_hits 1868 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 10262000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 10231500 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.088780 # miss rate for demand accesses system.cpu.dcache.demand_misses 182 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 9716000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 9685500 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.088780 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 182 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.overall_accesses 2050 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 56384.615385 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 53384.615385 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 56217.032967 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 53217.032967 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.dcache.overall_hits 1868 # number of overall hits -system.cpu.dcache.overall_miss_latency 10262000 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 10231500 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.088780 # miss rate for overall accesses system.cpu.dcache.overall_misses 182 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 9716000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 9685500 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.088780 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 182 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -89,7 +89,7 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 168 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 104.325446 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 103.689640 # Cycle average of tags in use system.cpu.dcache.total_refs 1882 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks @@ -110,70 +110,71 @@ system.cpu.dtb.write_accesses 868 # DT system.cpu.dtb.write_acv 0 # DTB write access violations system.cpu.dtb.write_hits 865 # DTB write hits system.cpu.dtb.write_misses 3 # DTB write misses -system.cpu.icache.ReadReq_accesses 7155 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 55763.605442 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 52949.122807 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 6861 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 16394500 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.041090 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 294 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 9 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 15090500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.039832 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_accesses 7277 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 55521.594684 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 52863.157895 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 6976 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 16712000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.041363 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 301 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 16 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 15066000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.039164 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 285 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 24.158451 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 24.563380 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 7155 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 55763.605442 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 52949.122807 # average overall mshr miss latency -system.cpu.icache.demand_hits 6861 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 16394500 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.041090 # miss rate for demand accesses -system.cpu.icache.demand_misses 294 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 9 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 15090500 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.039832 # mshr miss rate for demand accesses +system.cpu.icache.demand_accesses 7277 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 55521.594684 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 52863.157895 # average overall mshr miss latency +system.cpu.icache.demand_hits 6976 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 16712000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.041363 # miss rate for demand accesses +system.cpu.icache.demand_misses 301 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 16 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 15066000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.039164 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 285 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 7155 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 55763.605442 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 52949.122807 # average overall mshr miss latency +system.cpu.icache.overall_accesses 7277 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 55521.594684 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 52863.157895 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 6861 # number of overall hits -system.cpu.icache.overall_miss_latency 16394500 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.041090 # miss rate for overall accesses -system.cpu.icache.overall_misses 294 # number of overall misses -system.cpu.icache.overall_mshr_hits 9 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 15090500 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.039832 # mshr miss rate for overall accesses +system.cpu.icache.overall_hits 6976 # number of overall hits +system.cpu.icache.overall_miss_latency 16712000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.041363 # miss rate for overall accesses +system.cpu.icache.overall_misses 301 # number of overall misses +system.cpu.icache.overall_mshr_hits 16 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 15066000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.039164 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 285 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 284 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 131.383181 # Cycle average of tags in use -system.cpu.icache.total_refs 6861 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 130.373495 # Cycle average of tags in use +system.cpu.icache.total_refs 6976 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.icache_port.instReqsProcessed 7153 # Number of Instructions Requests that completed in this resource. -system.cpu.ipc 0.101180 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.101180 # IPC: Total IPC of All Threads +system.cpu.icache_port.instReqsProcessed 7275 # Number of Instructions Requests that completed in this resource. +system.cpu.idleCycles 48552 # Number of cycles cpu's stages were not processed +system.cpu.ipc 0.102344 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.102344 # IPC: Total IPC of All Threads system.cpu.itb.data_accesses 0 # DTB accesses system.cpu.itb.data_acv 0 # DTB access violations system.cpu.itb.data_hits 0 # DTB hits system.cpu.itb.data_misses 0 # DTB misses -system.cpu.itb.fetch_accesses 7172 # ITB accesses +system.cpu.itb.fetch_accesses 7294 # ITB accesses system.cpu.itb.fetch_acv 0 # ITB acv -system.cpu.itb.fetch_hits 7155 # ITB hits +system.cpu.itb.fetch_hits 7277 # ITB hits system.cpu.itb.fetch_misses 17 # ITB misses system.cpu.itb.read_accesses 0 # DTB read accesses system.cpu.itb.read_acv 0 # DTB read access violations @@ -184,28 +185,28 @@ system.cpu.itb.write_acv 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.l2cache.ReadExReq_accesses 73 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 52424.657534 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_miss_latency 52075.342466 # average ReadExReq miss latency system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40013.698630 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 3827000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_latency 3801500 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_misses 73 # number of ReadExReq misses system.cpu.l2cache.ReadExReq_mshr_miss_latency 2921000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 73 # number of ReadExReq MSHR misses system.cpu.l2cache.ReadReq_accesses 380 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52118.733509 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 39944.591029 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 52068.601583 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 39945.910290 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 1 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 19753000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 19734000 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 0.997368 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 379 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 15139000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 15139500 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 0.997368 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 379 # number of ReadReq MSHR misses system.cpu.l2cache.UpgradeReq_accesses 14 # number of UpgradeReq accesses(hits+misses) -system.cpu.l2cache.UpgradeReq_avg_miss_latency 52357.142857 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_miss_latency 52071.428571 # average UpgradeReq miss latency system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 733000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_latency 729000 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_misses 14 # number of UpgradeReq misses system.cpu.l2cache.UpgradeReq_mshr_miss_latency 560000 # number of UpgradeReq MSHR miss cycles @@ -220,29 +221,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 # system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 453 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52168.141593 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 39955.752212 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 52069.690265 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 39956.858407 # average overall mshr miss latency system.cpu.l2cache.demand_hits 1 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 23580000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 23535500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 0.997792 # miss rate for demand accesses system.cpu.l2cache.demand_misses 452 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 18060000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 18060500 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 0.997792 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 452 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 453 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52168.141593 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 39955.752212 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 52069.690265 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 39956.858407 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 1 # number of overall hits -system.cpu.l2cache.overall_miss_latency 23580000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 23535500 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 0.997792 # miss rate for overall accesses system.cpu.l2cache.overall_misses 452 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 18060000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 18060500 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 0.997792 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 452 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -250,16 +251,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 364 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 182.840902 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 181.532273 # Cycle average of tags in use system.cpu.l2cache.total_refs 1 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 63293 # number of cpu cycles simulated +system.cpu.numCycles 62573 # number of cpu cycles simulated +system.cpu.runCycles 14021 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) -system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT) +system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.threadCycles 63293 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.stage-0.idleCycles 55279 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 7294 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 11.656785 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 55992 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 6581 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 10.517316 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 56103 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 6470 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 10.339923 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 60520 # Number of cycles 0 instructions are processed. +system.cpu.stage-3.runCycles 2053 # Number of cycles 1+ instructions are processed. +system.cpu.stage-3.utilization 3.280968 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 56169 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 6404 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 10.234446 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 62573 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) system.cpu.workload.PROG:num_syscalls 17 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini index 78a86bf82..8d2a24508 100644 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini @@ -117,6 +117,7 @@ progress_interval=0 stageTracing=false stageWidth=1 system=system +threadModel=SMT tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout index 581c531f6..ce217f494 100755 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jan 2 2010 07:01:31 -M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch -M5 started Jan 2 2010 07:03:09 -M5 executing on fajita -command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing +M5 compiled Jan 31 2010 17:08:14 +M5 revision 01508015f86b 6964 default qtip tip inorder_hello_mips +M5 started Jan 31 2010 17:08:15 +M5 executing on zooks +command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello World! -Exiting @ tick 29940500 because target called exit() +Exiting @ tick 29206500 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt index d55c721ca..df2d539f4 100644 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt @@ -1,96 +1,96 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 10400 # Simulator instruction rate (inst/s) -host_mem_usage 205896 # Number of bytes of host memory used -host_seconds 0.56 # Real time elapsed on the host -host_tick_rate 53415864 # Simulator tick rate (ticks/s) +host_inst_rate 19644 # Simulator instruction rate (inst/s) +host_mem_usage 155856 # Number of bytes of host memory used +host_seconds 0.30 # Real time elapsed on the host +host_tick_rate 98307932 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5827 # Number of instructions simulated -sim_seconds 0.000030 # Number of seconds simulated -sim_ticks 29940500 # Number of ticks simulated +sim_seconds 0.000029 # Number of seconds simulated +sim_ticks 29206500 # Number of ticks simulated system.cpu.AGEN-Unit.instReqsProcessed 2090 # Number of Instructions Requests that completed in this resource. system.cpu.Branch-Predictor.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource. system.cpu.Branch-Predictor.predictedNotTaken 826 # Number of Branches Predicted As Not Taken (False). system.cpu.Branch-Predictor.predictedTaken 90 # Number of Branches Predicted As Taken (True). system.cpu.Decode-Unit.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.cyclesExecuted 3725 # Number of Cycles Execution Unit was used. system.cpu.Execution-Unit.instReqsProcessed 3734 # Number of Instructions Requests that completed in this resource. system.cpu.Execution-Unit.predictedNotTakenIncorrect 541 # Number of Branches Incorrectly Predicted As Not Taken). system.cpu.Execution-Unit.predictedTakenIncorrect 35 # Number of Branches Incorrectly Predicted As Taken. -system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Seq-Unit.instReqsProcessed 11657 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.utilization 0.063769 # Utilization of Execution Unit (cycles / totalCycles). +system.cpu.Fetch-Seq-Unit.instReqsProcessed 11702 # Number of Instructions Requests that completed in this resource. system.cpu.Graduation-Unit.instReqsProcessed 5827 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.divInstReqsProcessed 1 # Number of Divide Requests Processed. system.cpu.Mult-Div-Unit.instReqsProcessed 8 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.multInstReqsProcessed 3 # Number of Multiply Requests Processed. system.cpu.RegFile-Manager.instReqsProcessed 10713 # Number of Instructions Requests that completed in this resource. +system.cpu.activity 20.277673 # Percentage of cycles cpu is active system.cpu.committedInsts 5827 # Number of Instructions Simulated (Per-Thread) system.cpu.committedInsts_total 5827 # Number of Instructions Simulated (Total) -system.cpu.cpi 10.276643 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 10.276643 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 1165 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 56201.149425 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53201.149425 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 1078 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 4889500 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.074678 # miss rate for ReadReq accesses +system.cpu.contextSwitches 1 # Number of context switches +system.cpu.cpi 10.024713 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 10.024713 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 1164 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 56229.885057 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53229.885057 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 1077 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 4892000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.074742 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 87 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 4628500 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.074678 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_miss_latency 4631000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.074742 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 87 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 925 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 56554.687500 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53554.687500 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 56265.625000 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53265.625000 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 861 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 3619500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 3601000 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.069189 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 64 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 3427500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 3409000 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.069189 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 64 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 14.144928 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 14.137681 # Average number of references to valid blocks. system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 2090 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 56350.993377 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 53350.993377 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1939 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 8509000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.072249 # miss rate for demand accesses +system.cpu.dcache.demand_accesses 2089 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 56245.033113 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 53245.033113 # average overall mshr miss latency +system.cpu.dcache.demand_hits 1938 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 8493000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.072283 # miss rate for demand accesses system.cpu.dcache.demand_misses 151 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 8056000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.072249 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_latency 8040000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.072283 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 151 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 2090 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 56350.993377 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 53350.993377 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 2089 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 56245.033113 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 53245.033113 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1939 # number of overall hits -system.cpu.dcache.overall_miss_latency 8509000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.072249 # miss rate for overall accesses +system.cpu.dcache.overall_hits 1938 # number of overall hits +system.cpu.dcache.overall_miss_latency 8493000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.072283 # miss rate for overall accesses system.cpu.dcache.overall_misses 151 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 8056000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.072249 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_latency 8040000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.072283 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 151 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 138 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 88.212490 # Cycle average of tags in use -system.cpu.dcache.total_refs 1952 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 88.491296 # Cycle average of tags in use +system.cpu.dcache.total_refs 1951 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.dcache_port.instReqsProcessed 2089 # Number of Instructions Requests that completed in this resource. @@ -103,62 +103,63 @@ system.cpu.dtb.read_misses 0 # DT system.cpu.dtb.write_accesses 0 # DTB write accesses system.cpu.dtb.write_hits 0 # DTB write hits system.cpu.dtb.write_misses 0 # DTB write misses -system.cpu.icache.ReadReq_accesses 5829 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 55765.676568 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 52765.676568 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 5526 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 16897000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.051981 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_accesses 5874 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 55801.980198 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 52801.980198 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 5571 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 16908000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.051583 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 303 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_miss_latency 15988000 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.051981 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_miss_latency 15999000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.051583 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 303 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 18.237624 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 18.386139 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 5829 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 55765.676568 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 52765.676568 # average overall mshr miss latency -system.cpu.icache.demand_hits 5526 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 16897000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.051981 # miss rate for demand accesses +system.cpu.icache.demand_accesses 5874 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 55801.980198 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 52801.980198 # average overall mshr miss latency +system.cpu.icache.demand_hits 5571 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 16908000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.051583 # miss rate for demand accesses system.cpu.icache.demand_misses 303 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 15988000 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.051981 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_miss_latency 15999000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.051583 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 303 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 5829 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 55765.676568 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 52765.676568 # average overall mshr miss latency +system.cpu.icache.overall_accesses 5874 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 55801.980198 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 52801.980198 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 5526 # number of overall hits -system.cpu.icache.overall_miss_latency 16897000 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.051981 # miss rate for overall accesses +system.cpu.icache.overall_hits 5571 # number of overall hits +system.cpu.icache.overall_miss_latency 16908000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.051583 # miss rate for overall accesses system.cpu.icache.overall_misses 303 # number of overall misses system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 15988000 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.051981 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_miss_latency 15999000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.051583 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 303 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.replacements 13 # number of replacements system.cpu.icache.sampled_refs 303 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 134.267603 # Cycle average of tags in use -system.cpu.icache.total_refs 5526 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 135.362853 # Cycle average of tags in use +system.cpu.icache.total_refs 5571 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.icache_port.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource. -system.cpu.ipc 0.097308 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.097308 # IPC: Total IPC of All Threads +system.cpu.icache_port.instReqsProcessed 5873 # Number of Instructions Requests that completed in this resource. +system.cpu.idleCycles 46569 # Number of cycles cpu's stages were not processed +system.cpu.ipc 0.099753 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.099753 # IPC: Total IPC of All Threads system.cpu.itb.accesses 0 # DTB accesses system.cpu.itb.hits 0 # DTB hits system.cpu.itb.misses 0 # DTB misses @@ -169,31 +170,31 @@ system.cpu.itb.write_accesses 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.l2cache.ReadExReq_accesses 51 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 52500 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_miss_latency 52264.705882 # average ReadExReq miss latency system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40098.039216 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 2677500 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_latency 2665500 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_misses 51 # number of ReadExReq misses system.cpu.l2cache.ReadExReq_mshr_miss_latency 2045000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 51 # number of ReadExReq MSHR misses system.cpu.l2cache.ReadReq_accesses 390 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52052.835052 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40023.195876 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 52091.494845 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40048.969072 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 2 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 20196500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 20211500 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 0.994872 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 388 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 15529000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 15539000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 0.994872 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 388 # number of ReadReq MSHR misses system.cpu.l2cache.UpgradeReq_accesses 13 # number of UpgradeReq accesses(hits+misses) -system.cpu.l2cache.UpgradeReq_avg_miss_latency 52538.461538 # average UpgradeReq miss latency -system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40076.923077 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 683000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_avg_miss_latency 52192.307692 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40153.846154 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 678500 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_misses 13 # number of UpgradeReq misses -system.cpu.l2cache.UpgradeReq_mshr_miss_latency 521000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 522000 # number of UpgradeReq MSHR miss cycles system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_mshr_misses 13 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked @@ -205,29 +206,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 # system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 441 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52104.783599 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 40031.890661 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 52111.617312 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 40054.669704 # average overall mshr miss latency system.cpu.l2cache.demand_hits 2 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 22874000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 22877000 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 0.995465 # miss rate for demand accesses system.cpu.l2cache.demand_misses 439 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 17574000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 17584000 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 0.995465 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 439 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 441 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52104.783599 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 40031.890661 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 52111.617312 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 40054.669704 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 2 # number of overall hits -system.cpu.l2cache.overall_miss_latency 22874000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 22877000 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 0.995465 # miss rate for overall accesses system.cpu.l2cache.overall_misses 439 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 17574000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 17584000 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 0.995465 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 439 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -235,16 +236,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 375 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 185.807591 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 187.032260 # Cycle average of tags in use system.cpu.l2cache.total_refs 2 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 59882 # number of cpu cycles simulated +system.cpu.numCycles 58414 # number of cpu cycles simulated +system.cpu.runCycles 11845 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) -system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT) +system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.threadCycles 59882 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.stage-0.idleCycles 52540 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 5874 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 10.055809 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 52586 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 5828 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 9.977060 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 52582 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 5832 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 9.983908 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 56324 # Number of cycles 0 instructions are processed. +system.cpu.stage-3.runCycles 2090 # Number of cycles 1+ instructions are processed. +system.cpu.stage-3.utilization 3.577909 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 52587 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 5827 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 9.975348 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 58414 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) system.cpu.workload.PROG:num_syscalls 8 # Number of system calls ---------- End Simulation Statistics ---------- |