diff options
Diffstat (limited to 'src/cpu/o3')
28 files changed, 817 insertions, 406 deletions
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 490305cbf..5e767655d 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -91,7 +91,10 @@ Param<unsigned> renameWidth; Param<unsigned> commitToIEWDelay; Param<unsigned> renameToIEWDelay; Param<unsigned> issueToExecuteDelay; +Param<unsigned> dispatchWidth; Param<unsigned> issueWidth; +Param<unsigned> wbWidth; +Param<unsigned> wbDepth; SimObjectParam<FUPool *> fuPool; Param<unsigned> iewToCommitDelay; @@ -99,7 +102,9 @@ Param<unsigned> renameToROBDelay; Param<unsigned> commitWidth; Param<unsigned> squashWidth; Param<Tick> trapLatency; -Param<Tick> fetchTrapLatency; + +Param<unsigned> backComSize; +Param<unsigned> forwardComSize; Param<std::string> predType; Param<unsigned> localPredictorSize; @@ -207,7 +212,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) "Issue/Execute/Writeback delay"), INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -216,7 +224,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(commitWidth, "Commit width"), INIT_PARAM(squashWidth, "Squash width"), INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), - INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), + + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), @@ -333,7 +343,10 @@ CREATE_SIM_OBJECT(DerivO3CPU) params->commitToIEWDelay = commitToIEWDelay; params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; params->issueWidth = issueWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; @@ -341,7 +354,9 @@ CREATE_SIM_OBJECT(DerivO3CPU) params->commitWidth = commitWidth; params->squashWidth = squashWidth; params->trapLatency = trapLatency; - params->fetchTrapLatency = fetchTrapLatency; + + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; params->predType = predType; params->localPredictorSize = localPredictorSize; diff --git a/src/cpu/o3/alpha/impl.hh b/src/cpu/o3/alpha/impl.hh index 8cd8692c6..b928ae654 100644 --- a/src/cpu/o3/alpha/impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -36,6 +36,7 @@ #include "cpu/o3/alpha/params.hh" #include "cpu/o3/cpu_policy.hh" + // Forward declarations. template <class Impl> class AlphaDynInst; @@ -88,7 +89,4 @@ struct AlphaSimpleImpl /** The O3Impl to be used. */ typedef AlphaSimpleImpl O3CPUImpl; -/** The O3Impl to be used. */ -typedef DynInst O3DynInst; - #endif // __CPU_O3_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh index b1f2a487d..c618cee08 100644 --- a/src/cpu/o3/alpha/params.hh +++ b/src/cpu/o3/alpha/params.hh @@ -54,16 +54,7 @@ class AlphaSimpleParams : public O3Params #if FULL_SYSTEM AlphaITB *itb; AlphaDTB *dtb; -#else - std::vector<Process *> workload; - Process *process; -#endif // FULL_SYSTEM - - MemObject *mem; - - BaseCPU *checker; - - unsigned decodeToFetchDelay; +#endif }; #endif // __CPU_O3_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index 78b0ee788..ad52b0d2e 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -70,18 +70,19 @@ class AlphaTC : public O3ThreadContext<Impl> { panic("Not supported on Alpha!"); } - // This function exits the thread context in the CPU and returns - // 1 if the CPU has no more active threads (meaning it's OK to exit); - // Used in syscall-emulation mode when a thread executes the 'exit' - // syscall. + /** This function exits the thread context in the CPU and returns + * 1 if the CPU has no more active threads (meaning it's OK to exit); + * Used in syscall-emulation mode when a thread executes the 'exit' + * syscall. + */ virtual int exit() { - this->cpu->deallocateContext(this->thread->readTid()); + this->deallocate(); // If there are still threads executing in the system if (this->cpu->numActiveThreads()) - return 0; + return 0; // don't exit simulation else - return 1; + return 1; // exit simulation } }; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 60b555269..956b6ec3e 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -162,10 +162,6 @@ class DefaultCommit /** Sets the pointer to the queue coming from IEW. */ void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); - void setFetchStage(Fetch *fetch_stage); - - Fetch *fetchStage; - /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); @@ -187,11 +183,14 @@ class DefaultCommit /** Initializes stage by sending back the number of free entries. */ void initStage(); - /** Initializes the switching out of commit. */ - void switchOut(); + /** Initializes the draining of commit. */ + bool drain(); + + /** Resumes execution after draining. */ + void resume(); /** Completes the switch out of commit. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -332,10 +331,6 @@ class DefaultCommit /** Vector of all of the threads. */ std::vector<Thread *> thread; - Fault fetchFault; - - int fetchTrapWait; - /** Records that commit has written to the time buffer this cycle. Used for * the CPU to determine if it can deschedule itself if there is no activity. */ @@ -383,8 +378,8 @@ class DefaultCommit /** Number of Active Threads */ unsigned numThreads; - /** Is a switch out pending. */ - bool switchPending; + /** Is a drain pending. */ + bool drainPending; /** Is commit switched out. */ bool switchedOut; @@ -394,10 +389,6 @@ class DefaultCommit */ Tick trapLatency; - Tick fetchTrapLatency; - - Tick fetchFaultTick; - /** The commit PC of each thread. Refers to the instruction that * is currently being processed/committed. */ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 06b8e8a95..c667d633a 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -80,10 +80,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) renameWidth(params->renameWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), - switchPending(false), + drainPending(false), switchedOut(false), - trapLatency(params->trapLatency), - fetchTrapLatency(params->fetchTrapLatency) + trapLatency(params->trapLatency) { _status = Active; _nextStatus = Inactive; @@ -123,9 +122,6 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) tcSquash[i] = false; PC[i] = nextPC[i] = 0; } - - fetchFaultTick = 0; - fetchTrapWait = 0; } template <class Impl> @@ -235,7 +231,6 @@ DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr) cpu->activateStage(O3CPU::CommitIdx); trapLatency = cpu->cycles(trapLatency); - fetchTrapLatency = cpu->cycles(fetchTrapLatency); } template <class Impl> @@ -294,13 +289,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) template <class Impl> void -DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage) -{ - fetchStage = fetch_stage; -} - -template <class Impl> -void DefaultCommit<Impl>::setIEWStage(IEW *iew_stage) { iewStage = iew_stage; @@ -350,23 +338,38 @@ DefaultCommit<Impl>::initStage() } template <class Impl> -void -DefaultCommit<Impl>::switchOut() +bool +DefaultCommit<Impl>::drain() { - switchPending = true; + drainPending = true; + + // If it's already drained, return true. + if (rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + return true; + } + + return false; } template <class Impl> void -DefaultCommit<Impl>::doSwitchOut() +DefaultCommit<Impl>::switchOut() { switchedOut = true; - switchPending = false; + drainPending = false; rob->switchOut(); } template <class Impl> void +DefaultCommit<Impl>::resume() +{ + drainPending = false; +} + +template <class Impl> +void DefaultCommit<Impl>::takeOverFrom() { switchedOut = false; @@ -557,11 +560,15 @@ DefaultCommit<Impl>::tick() wroteToTimeBuffer = false; _nextStatus = Inactive; - if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalSwitched(); + if (drainPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + drainPending = false; return; } + if ((*activeThreads).size() <= 0) + return; + list<unsigned>::iterator threads = (*activeThreads).begin(); // Check if any of the threads are done squashing. Change the @@ -575,7 +582,7 @@ DefaultCommit<Impl>::tick() commitStatus[tid] = Running; } else { DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" - "insts this cycle.\n", tid); + " insts this cycle.\n", tid); rob->doSquash(tid); toIEW->commitInfo[tid].robSquashing = true; wroteToTimeBuffer = true; @@ -989,6 +996,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Check if the instruction caused a fault. If so, trap. Fault inst_fault = head_inst->getFault(); + // DTB will sometimes need the machine instruction for when + // faults happen. So we will set it here, prior to the DTB + // possibly needing it for its fault. + thread[tid]->setInst( + static_cast<TheISA::MachInst>(head_inst->staticInst->machInst)); + if (inst_fault != NoFault) { head_inst->setCompleted(); DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", @@ -1011,12 +1024,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // execution doesn't generate extra squashes. thread[tid]->inSyscall = true; - // DTB will sometimes need the machine instruction for when - // faults happen. So we will set it here, prior to the DTB - // possibly needing it for its fault. - thread[tid]->setInst( - static_cast<TheISA::MachInst>(head_inst->staticInst->machInst)); - // Execute the trap. Although it's slightly unrealistic in // terms of timing (as it doesn't wait for the full timing of // the trap event to complete before updating state), it's diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index feca4cdf2..c43cc2cf8 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -115,6 +115,36 @@ FullO3CPU<Impl>::ActivateThreadEvent::description() } template <class Impl> +FullO3CPU<Impl>::DeallocateContextEvent::DeallocateContextEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::DeallocateContextEvent::init(int thread_num, + FullO3CPU<Impl> *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template <class Impl> +void +FullO3CPU<Impl>::DeallocateContextEvent::process() +{ + cpu->deactivateThread(tid); + cpu->removeThread(tid); +} + +template <class Impl> +const char * +FullO3CPU<Impl>::DeallocateContextEvent::description() +{ + return "FullO3CPU \"Deallocate Context\" event"; +} + +template <class Impl> FullO3CPU<Impl>::FullO3CPU(Params *params) : BaseO3CPU(params), tickEvent(this), @@ -141,15 +171,14 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) TheISA::NumMiscRegs * number_of_threads, TheISA::ZeroReg), - // For now just have these time buffers be pretty big. - // @todo: Make these time buffer sizes parameters or derived - // from latencies - timeBuffer(5, 5), - fetchQueue(5, 5), - decodeQueue(5, 5), - renameQueue(5, 5), - iewQueue(5, 5), - activityRec(NumStages, 10, params->activity), + timeBuffer(params->backComSize, params->forwardComSize), + fetchQueue(params->backComSize, params->forwardComSize), + decodeQueue(params->backComSize, params->forwardComSize), + renameQueue(params->backComSize, params->forwardComSize), + iewQueue(params->backComSize, params->forwardComSize), + activityRec(NumStages, + params->backComSize + params->forwardComSize, + params->activity), globalSeqNum(1), @@ -158,7 +187,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) physmem(system->physmem), #endif // FULL_SYSTEM mem(params->mem), - switchCount(0), + drainCount(0), deferRegistration(params->deferRegistration), numThreads(number_of_threads) { @@ -214,7 +243,6 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) commit.setIEWQueue(&iewQueue); commit.setRenameQueue(&renameQueue); - commit.setFetchStage(&fetch); commit.setIEWStage(&iew); rename.setIEWStage(&iew); rename.setCommitStage(&commit); @@ -361,6 +389,18 @@ FullO3CPU<Impl>::fullCPURegStats() } template <class Impl> +Port * +FullO3CPU<Impl>::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return iew.getDcachePort(); + else if (if_name == "icache_port") + return fetch.getIcachePort(); + else + panic("No Such Port\n"); +} + +template <class Impl> void FullO3CPU<Impl>::tick() { @@ -400,7 +440,8 @@ FullO3CPU<Impl>::tick() } if (!tickEvent.scheduled()) { - if (_status == SwitchedOut) { + if (_status == SwitchedOut || + getState() == SimObject::Drained) { // increment stat lastRunningCycle = curTick; } else if (!activityRec.active()) { @@ -461,16 +502,107 @@ FullO3CPU<Impl>::init() template <class Impl> void +FullO3CPU<Impl>::activateThread(unsigned tid) +{ + list<unsigned>::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive == activeThreads.end()) { + DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", + tid); + + activeThreads.push_back(tid); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::deactivateThread(unsigned tid) +{ + //Remove From Active List, if Active + list<unsigned>::iterator thread_it = + find(activeThreads.begin(), activeThreads.end(), tid); + + if (thread_it != activeThreads.end()) { + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(thread_it); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::activateContext(int tid, int delay) +{ + // Needs to set each stage to running as well. + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleActivateThreadEvent(tid, delay); + } else { + activateThread(tid); + } + + if(lastActivatedCycle < curTick) { + scheduleTickEvent(delay); + + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + fetch.wakeFromQuiesce(); + + lastActivatedCycle = curTick; + + _status = Running; + } +} + +template <class Impl> +void +FullO3CPU<Impl>::deallocateContext(int tid, int delay) +{ + // Schedule removal of thread data from CPU + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleDeallocateContextEvent(tid, delay); + } else { + deactivateThread(tid); + removeThread(tid); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::suspendContext(int tid) +{ + DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); + deactivateThread(tid); + if (activeThreads.size() == 0) + unscheduleTickEvent(); + _status = Idle; +} + +template <class Impl> +void +FullO3CPU<Impl>::haltContext(int tid) +{ + //For now, this is the same as deallocate + DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); + deallocateContext(tid, 1); +} + +template <class Impl> +void FullO3CPU<Impl>::insertThread(unsigned tid) { - DPRINTF(O3CPU,"[tid:%i] Initializing thread data"); + DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); // Will change now that the PC and thread state is internal to the CPU // and not in the ThreadContext. -#if 0 #if FULL_SYSTEM ThreadContext *src_tc = system->threadContexts[tid]; #else - ThreadContext *src_tc = thread[tid]; + ThreadContext *src_tc = tcBase(tid); #endif //Bind Int Regs to Rename Map @@ -490,11 +622,14 @@ FullO3CPU<Impl>::insertThread(unsigned tid) } //Copy Thread Data Into RegFile - this->copyFromTC(tid); + //this->copyFromTC(tid); - //Set PC/NPC - regFile.pc[tid] = src_tc->readPC(); - regFile.npc[tid] = src_tc->readNextPC(); + //Set PC/NPC/NNPC + setPC(src_tc->readPC(), tid); + setNextPC(src_tc->readNextPC(), tid); +#if THE_ISA != ALPHA_ISA + setNextNPC(src_tc->readNextNPC(), tid); +#endif src_tc->setStatus(ThreadContext::Active); @@ -503,16 +638,19 @@ FullO3CPU<Impl>::insertThread(unsigned tid) //Reset ROB/IQ/LSQ Entries commit.rob->resetEntries(); iew.resetEntries(); -#endif } template <class Impl> void FullO3CPU<Impl>::removeThread(unsigned tid) { - DPRINTF(O3CPU,"[tid:%i] Removing thread data"); -#if 0 - //Unbind Int Regs from Rename Map + DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); + + // Copy Thread Data From RegFile + // If thread is suspended, it might be re-allocated + //this->copyToTC(tid); + + // Unbind Int Regs from Rename Map for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(ireg); @@ -520,7 +658,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Unbind Float Regs from Rename Map + // Unbind Float Regs from Rename Map for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(freg); @@ -528,27 +666,20 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Copy Thread Data From RegFile - /* Fix Me: - * Do we really need to do this if we are removing a thread - * in the sense that it's finished (exiting)? If the thread is just - * being suspended we might... - */ -// this->copyToTC(tid); - - //Squash Throughout Pipeline + // Squash Throughout Pipeline fetch.squash(0,tid); decode.squash(tid); rename.squash(tid); + iew.squash(tid); + commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid); assert(iew.ldstQueue.getCount(tid) == 0); - //Reset ROB/IQ/LSQ Entries + // Reset ROB/IQ/LSQ Entries if (activeThreads.size() >= 1) { commit.rob->resetEntries(); iew.resetEntries(); } -#endif } @@ -604,6 +735,7 @@ FullO3CPU<Impl>::activateWhenReady(int tid) //blocks fetch contextSwitch = true; + //@todo: dont always add to waitlist //do waitlist cpuWaitList.push_back(tid); } @@ -611,149 +743,132 @@ FullO3CPU<Impl>::activateWhenReady(int tid) template <class Impl> void -FullO3CPU<Impl>::activateThread(unsigned int tid) -{ - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive == activeThreads.end()) { - DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", - tid); - - activeThreads.push_back(tid); +FullO3CPU<Impl>::serialize(std::ostream &os) +{ + SimObject::State so_state = SimObject::getState(); + SERIALIZE_ENUM(so_state); + BaseCPU::serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); + + // Use SimpleThread's ability to checkpoint to make it easier to + // write out the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + nameOut(os, csprintf("%s.xc.%i", name(), i)); + temp.copyTC(thread[i]->getTC()); + temp.serialize(os); } } - template <class Impl> void -FullO3CPU<Impl>::activateContext(int tid, int delay) -{ - // Needs to set each stage to running as well. - if (delay){ - DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " - "on cycle %d\n", tid, curTick + cycles(delay)); - scheduleActivateThreadEvent(tid, delay); - } else { - activateThread(tid); - } - - if(lastActivatedCycle < curTick) { - scheduleTickEvent(delay); - - // Be sure to signal that there's some activity so the CPU doesn't - // deschedule itself. - activityRec.activity(); - fetch.wakeFromQuiesce(); - - lastActivatedCycle = curTick; - - _status = Running; +FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) +{ + SimObject::State so_state; + UNSERIALIZE_ENUM(so_state); + BaseCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + + // Use SimpleThread's ability to checkpoint to make it easier to + // read in the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + temp.copyTC(thread[i]->getTC()); + temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); + thread[i]->getTC()->copyArchRegs(temp.getTC()); } } template <class Impl> -void -FullO3CPU<Impl>::suspendContext(int tid) +unsigned int +FullO3CPU<Impl>::drain(Event *drain_event) { - DPRINTF(O3CPU,"[tid: %i]: Suspended ...\n", tid); - unscheduleTickEvent(); - _status = Idle; -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + drainCount = 0; + fetch.drain(); + decode.drain(); + rename.drain(); + iew.drain(); + commit.drain(); - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + // Wake the CPU and record activity so everything can drain out if + // the CPU was not able to immediately drain. + if (getState() != SimObject::Drained) { + // A bit of a hack...set the drainEvent after all the drain() + // calls have been made, that way if all of the stages drain + // immediately, the signalDrained() function knows not to call + // process on the drain event. + drainEvent = drain_event; + + wakeCPU(); + activityRec.activity(); + + return 1; + } else { + return 0; } -*/ } template <class Impl> void -FullO3CPU<Impl>::deallocateContext(int tid) +FullO3CPU<Impl>::resume() { - DPRINTF(O3CPU,"[tid:%i]: Deallocating ...", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + assert(system->getMemoryMode() == System::Timing); + fetch.resume(); + decode.resume(); + rename.resume(); + iew.resume(); + commit.resume(); - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + changeState(SimObject::Running); - removeThread(tid); - } -*/ + if (_status == SwitchedOut || _status == Idle) + return; + + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + _status = Running; } template <class Impl> void -FullO3CPU<Impl>::haltContext(int tid) +FullO3CPU<Impl>::signalDrained() { - DPRINTF(O3CPU,"[tid:%i]: Halted ...", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + if (++drainCount == NumStages) { + if (tickEvent.scheduled()) + tickEvent.squash(); - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + changeState(SimObject::Drained); - removeThread(tid); + if (drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } -*/ + assert(drainCount <= 5); } template <class Impl> void -FullO3CPU<Impl>::switchOut(Sampler *_sampler) +FullO3CPU<Impl>::switchOut() { - sampler = _sampler; - switchCount = 0; fetch.switchOut(); - decode.switchOut(); rename.switchOut(); - iew.switchOut(); commit.switchOut(); + instList.clear(); + while (!removeList.empty()) { + removeList.pop(); + } - // Wake the CPU and record activity so everything can drain out if - // the CPU is currently idle. - wakeCPU(); - activityRec.activity(); -} - -template <class Impl> -void -FullO3CPU<Impl>::signalSwitched() -{ - if (++switchCount == NumStages) { - fetch.doSwitchOut(); - rename.doSwitchOut(); - commit.doSwitchOut(); - instList.clear(); - while (!removeList.empty()) { - removeList.pop(); - } - + _status = SwitchedOut; #if USE_CHECKER - if (checker) - checker->switchOut(sampler); + if (checker) + checker->switchOut(); #endif - - if (tickEvent.scheduled()) - tickEvent.squash(); - sampler->signalSwitched(); - _status = SwitchedOut; - } - assert(switchCount <= 5); } template <class Impl> @@ -761,7 +876,7 @@ void FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) { // Flush out any old data from the time buffers. - for (int i = 0; i < 10; ++i) { + for (int i = 0; i < timeBuffer.getSize(); ++i) { timeBuffer.advance(); fetchQueue.advance(); decodeQueue.advance(); @@ -932,7 +1047,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatReg(phys_reg, val); } @@ -941,7 +1057,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatReg(phys_reg, val, 64); } @@ -950,7 +1067,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatRegBits(phys_reg, val); } diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 1cff6142d..83cb966e3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -57,6 +57,8 @@ class Checker; class ThreadContext; template <class> class O3ThreadContext; + +class Checkpoint; class MemObject; class Process; @@ -197,6 +199,49 @@ class FullO3CPU : public BaseO3CPU /** The tick event used for scheduling CPU ticks. */ ActivateThreadEvent activateThreadEvent[Impl::MaxThreads]; + class DeallocateContextEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU<Impl> *cpu; + + public: + /** Constructs the event. */ + DeallocateContextEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU<Impl> *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule cpu to deallocate thread context.*/ + void scheduleDeallocateContextEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (deallocateContextEvent[tid].squashed()) + deallocateContextEvent[tid].reschedule(curTick + cycles(delay)); + else if (!deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule thread deallocation in CPU */ + void unscheduleDeallocateContextEvent(int tid) + { + if (deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads]; + public: /** Constructs a CPU with the given parameters. */ FullO3CPU(Params *params); @@ -206,6 +251,9 @@ class FullO3CPU : public BaseO3CPU /** Registers statistics. */ void fullCPURegStats(); + /** Returns a specific port. */ + Port *getPort(const std::string &if_name, int idx); + /** Ticks CPU, calling tick() on each stage, and checking the overall * activity to see if the CPU should deschedule itself. */ @@ -219,7 +267,10 @@ class FullO3CPU : public BaseO3CPU { return activeThreads.size(); } /** Add Thread to Active Threads List */ - void activateThread(unsigned int tid); + void activateThread(unsigned tid); + + /** Remove Thread from Active Threads List */ + void deactivateThread(unsigned tid); /** Setup CPU to insert a thread's context */ void insertThread(unsigned tid); @@ -247,7 +298,7 @@ class FullO3CPU : public BaseO3CPU /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. */ - void deallocateContext(int tid); + void deallocateContext(int tid, int delay = 1); /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. @@ -263,6 +314,13 @@ class FullO3CPU : public BaseO3CPU /** Update The Order In Which We Process Threads. */ void updateThreadPriority(); + /** Serialize state. */ + virtual void serialize(std::ostream &os); + + /** Unserialize from a checkpoint. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + public: /** Executes a syscall on this cycle. * --------------------------------------- * Note: this is a virtual function. CPU-Specific @@ -270,14 +328,21 @@ class FullO3CPU : public BaseO3CPU */ virtual void syscall(int tid) { panic("Unimplemented!"); } - /** Switches out this CPU. */ - void switchOut(Sampler *sampler); + /** Starts draining the CPU's pipeline of all instructions in + * order to stop all memory accesses. */ + virtual unsigned int drain(Event *drain_event); + + /** Resumes execution after a drain. */ + virtual void resume(); /** Signals to this CPU that a stage has completed switching out. */ - void signalSwitched(); + void signalDrained(); + + /** Switches out this CPU. */ + virtual void switchOut(); /** Takes over from another CPU. */ - void takeOverFrom(BaseCPU *oldCPU); + virtual void takeOverFrom(BaseCPU *oldCPU); /** Get the current instruction sequence number, and increment it. */ InstSeqNum getAndIncrementInstSeq() @@ -550,11 +615,11 @@ class FullO3CPU : public BaseO3CPU /** Pointer to memory. */ MemObject *mem; - /** Pointer to the sampler */ - Sampler *sampler; + /** Event to call process() on once draining has completed. */ + Event *drainEvent; - /** Counter of how many stages have completed switching out. */ - int switchCount; + /** Counter of how many stages have completed draining. */ + int drainCount; /** Pointers to all of the threads in the CPU. */ std::vector<Thread *> thread; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 1edf3335d..7f5ecbc26 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -109,8 +109,14 @@ class DefaultDecode /** Sets pointer to list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); + /** Drains the decode stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the decode stage. */ - void switchOut(); + void switchOut() { } /** Takes over from another CPU's thread. */ void takeOverFrom(); diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 16be01784..8b851c032 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -165,11 +165,12 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr) } template <class Impl> -void -DefaultDecode<Impl>::switchOut() +bool +DefaultDecode<Impl>::drain() { - // Decode can immediately switch out. - cpu->signalSwitched(); + // Decode is done draining at any time. + cpu->signalDrained(); + return true; } template <class Impl> diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh new file mode 100644 index 000000000..a2cdf2dba --- /dev/null +++ b/src/cpu/o3/dyn_inst.hh @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#ifndef __CPU_O3_DYN_INST_HH__ +#define __CPU_O3_DYN_INST_HH__ + +#include "arch/isa_specific.hh" + +#if THE_ISA == ALPHA_ISA +template <class Impl> +class AlphaDynInst; + +struct AlphaSimpleImpl; + +typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst; +#endif + +#endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 7fcd21b7d..931919af8 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -40,8 +40,6 @@ #include "mem/port.hh" #include "sim/eventq.hh" -class Sampler; - /** * DefaultFetch class handles both single threaded and SMT fetch. Its * width is specified by the parameters; each cycle it tries to fetch @@ -164,6 +162,9 @@ class DefaultFetch /** Registers statistics. */ void regStats(); + /** Returns the icache port. */ + Port *getIcachePort() { return icachePort; } + /** Sets CPU pointer. */ void setCPU(O3CPU *cpu_ptr); @@ -182,11 +183,14 @@ class DefaultFetch /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); - /** Begins the switch out of the fetch stage. */ - void switchOut(); + /** Begins the drain of the fetch stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume(); - /** Completes the switch out of the fetch stage. */ - void doSwitchOut(); + /** Tells fetch stage to prepare to be switched out. */ + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -400,6 +404,12 @@ class DefaultFetch /** The cache line being fetched. */ uint8_t *cacheData[Impl::MaxThreads]; + /** The PC of the cacheline that has been loaded. */ + Addr cacheDataPC[Impl::MaxThreads]; + + /** Whether or not the cache data is valid. */ + bool cacheDataValid[Impl::MaxThreads]; + /** Size of instructions. */ int instSize; @@ -423,6 +433,9 @@ class DefaultFetch */ bool interruptPending; + /** Is there a drain pending. */ + bool drainPending; + /** Records if fetch is switched out. */ bool switchedOut; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 60eb76d17..4184e1867 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -109,6 +109,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) numThreads(params->numberOfThreads), numFetchingThreads(params->smtNumFetchingThreads), interruptPending(false), + drainPending(false), switchedOut(false) { if (numThreads > Impl::MaxThreads) @@ -161,6 +162,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; stalls[tid].decode = 0; stalls[tid].rename = 0; @@ -279,10 +282,6 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr) // Name is finally available, so create the port. icachePort = new IcachePort(this); - Port *mem_dport = mem->getPort(""); - icachePort->setPeer(mem_dport); - mem_dport->setPeer(icachePort); - #if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(icachePort); @@ -360,14 +359,19 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) return; } - // Wake up the CPU (if it went to sleep and was waiting on this completion - // event). - cpu->wakeCPU(); + memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize); + cacheDataValid[tid] = true; - DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", - tid); + if (!drainPending) { + // Wake up the CPU (if it went to sleep and was waiting on + // this completion event). + cpu->wakeCPU(); - switchToActive(); + DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", + tid); + + switchToActive(); + } // Only switch to IcacheAccessComplete if we're not stalled as well. if (checkStall(tid)) { @@ -383,18 +387,27 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) } template <class Impl> +bool +DefaultFetch<Impl>::drain() +{ + // Fetch is ready to drain at any time. + cpu->signalDrained(); + drainPending = true; + return true; +} + +template <class Impl> void -DefaultFetch<Impl>::switchOut() +DefaultFetch<Impl>::resume() { - // Fetch is ready to switch out at any time. - switchedOut = true; - cpu->signalSwitched(); + drainPending = false; } template <class Impl> void -DefaultFetch<Impl>::doSwitchOut() +DefaultFetch<Impl>::switchOut() { + switchedOut = true; // Branch predictor needs to have its state cleared. branchPred.switchOut(); } @@ -498,7 +511,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (cacheBlocked || (interruptPending && flags == 0) || switchedOut) { + if (cacheBlocked || (interruptPending && flags == 0)) { // Hold off fetch from getting new instructions when: // Cache is blocked, or // while an interrupt is pending and we're not in PAL mode, or @@ -509,6 +522,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Align the fetch PC so it's at the start of a cache block. fetch_PC = icacheBlockAlignPC(fetch_PC); + // If we've already got the block, no need to try to fetch it again. + if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { + return true; + } + // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. // Build request here. @@ -540,7 +558,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Build packet here. PacketPtr data_pkt = new Packet(mem_req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataStatic(cacheData[tid]); + data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); + + cacheDataPC[tid] = fetch_PC; + cacheDataValid[tid] = false; DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); @@ -898,7 +919,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) ////////////////////////////////////////// int tid = getFetchingThread(fetchPolicy); - if (tid == -1) { + if (tid == -1 || drainPending) { DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); // Breaks looping condition in tick() diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 2af68d8fc..6c6be4787 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -31,11 +31,12 @@ #ifndef __CPU_O3_IEW_HH__ #define __CPU_O3_IEW_HH__ +#include "config/full_system.hh" + #include <queue> #include "base/statistics.hh" #include "base/timebuf.hh" -#include "config/full_system.hh" #include "cpu/o3/comm.hh" #include "cpu/o3/scoreboard.hh" #include "cpu/o3/lsq.hh" @@ -125,6 +126,9 @@ class DefaultIEW /** Initializes stage; sends back the number of free IQ and LSQ entries. */ void initStage(); + /** Returns the dcache port. */ + Port *getDcachePort() { return ldstQueue.getDcachePort(); } + /** Sets CPU pointer for IEW, IQ, and LSQ. */ void setCPU(O3CPU *cpu_ptr); @@ -143,11 +147,14 @@ class DefaultIEW /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *sb_ptr); - /** Starts switch out of IEW stage. */ - void switchOut(); + /** Drains IEW stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume(); /** Completes switch out of IEW stage. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -204,6 +211,45 @@ class DefaultIEW /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } + void incrWb(InstSeqNum &sn) + { + if (++wbOutstanding == wbMax) + ableToIssue = false; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#ifdef DEBUG + wbList.insert(sn); +#endif + } + + void decrWb(InstSeqNum &sn) + { + if (wbOutstanding-- == wbMax) + ableToIssue = true; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#ifdef DEBUG + assert(wbList.find(sn) != wbList.end()); + wbList.erase(sn); +#endif + } + +#ifdef DEBUG + std::set<InstSeqNum> wbList; + + void dumpWb() + { + std::set<InstSeqNum>::iterator wb_it = wbList.begin(); + while (wb_it != wbList.end()) { + cprintf("[sn:%lli]\n", + (*wb_it)); + wb_it++; + } + } +#endif + + bool canIssue() { return ableToIssue; } + + bool ableToIssue; + private: /** Sends commit proper information for a squash due to a branch * mispredict. @@ -384,11 +430,8 @@ class DefaultIEW */ unsigned issueToExecuteDelay; - /** Width of issue's read path, in instructions. The read path is both - * the skid buffer and the rename instruction queue. - * Note to self: is this really different than issueWidth? - */ - unsigned issueReadWidth; + /** Width of dispatch, in instructions. */ + unsigned dispatchWidth; /** Width of issue, in instructions. */ unsigned issueWidth; @@ -403,6 +446,17 @@ class DefaultIEW */ unsigned wbCycle; + /** Number of instructions in flight that will writeback. */ + unsigned wbOutstanding; + + /** Writeback width. */ + unsigned wbWidth; + + /** Writeback width * writeback depth, where writeback depth is + * the number of cycles of writing back instructions that can be + * buffered. */ + unsigned wbMax; + /** Number of active threads. */ unsigned numThreads; diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 8e6fd46a1..684ae2295 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -42,16 +42,17 @@ using namespace std; template<class Impl> DefaultIEW<Impl>::DefaultIEW(Params *params) - : // @todo: Make this into a parameter. - issueToExecQueue(5, 5), + : issueToExecQueue(params->backComSize, params->forwardComSize), instQueue(params), ldstQueue(params), fuPool(params->fuPool), commitToIEWDelay(params->commitToIEWDelay), renameToIEWDelay(params->renameToIEWDelay), issueToExecuteDelay(params->issueToExecuteDelay), - issueReadWidth(params->issueWidth), + dispatchWidth(params->dispatchWidth), issueWidth(params->issueWidth), + wbOutstanding(0), + wbWidth(params->wbWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -74,8 +75,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) fetchRedirect[i] = false; } + wbMax = wbWidth * params->wbDepth; + updateLSQNextCycle = false; + ableToIssue = true; + skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -348,16 +353,23 @@ DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr) } template <class Impl> +bool +DefaultIEW<Impl>::drain() +{ + // IEW is ready to drain at any time. + cpu->signalDrained(); + return true; +} + +template <class Impl> void -DefaultIEW<Impl>::switchOut() +DefaultIEW<Impl>::resume() { - // IEW is ready to switch out at any time. - cpu->signalSwitched(); } template <class Impl> void -DefaultIEW<Impl>::doSwitchOut() +DefaultIEW<Impl>::switchOut() { // Clear any state. switchedOut = true; @@ -400,7 +412,7 @@ DefaultIEW<Impl>::takeOverFrom() updateLSQNextCycle = false; // @todo: Fix hardcoded number - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < issueToExecQueue.getSize(); ++i) { issueToExecQueue.advance(); } } @@ -559,12 +571,12 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; - if (wbNumInst == issueWidth) { + if (wbNumInst == wbWidth) { ++wbCycle; wbNumInst = 0; } - assert(wbCycle < 5); + assert((wbCycle * wbWidth + wbNumInst) < wbMax); } // Add finished instruction to queue to commit. @@ -937,7 +949,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) // Loop through the instructions, putting them in the instruction // queue. for ( ; dis_num_inst < insts_to_add && - dis_num_inst < issueReadWidth; + dis_num_inst < dispatchWidth; ++dis_num_inst) { inst = insts_to_dispatch.front(); @@ -1189,6 +1201,7 @@ DefaultIEW<Impl>::executeInsts() ++iewExecSquashedInsts; + decrWb(inst->seqNum); continue; } @@ -1351,6 +1364,8 @@ DefaultIEW<Impl>::writebackInsts() } writebackCount[tid]++; } + + decrWb(inst->seqNum); } } diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index b99bd0900..36e0842be 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -687,6 +687,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() int total_issued = 0; while (total_issued < totalWidth && + iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -784,6 +785,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() listOrder.erase(order_it++); statIssuedInstType[tid][op_class]++; + iewStage->incrWb(issuing_inst->seqNum); } else { statFuBusy[op_class]++; fuBusy[tid]++; diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 89791fec9..190734dc2 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -65,6 +65,13 @@ class LSQ { /** Registers statistics of each LSQ unit. */ void regStats(); + /** Returns dcache port. + * @todo: Dcache port needs to be moved up to this level for SMT + * to work. For now it just returns the port from one of the + * threads. + */ + Port *getDcachePort() { return &dcachePort; } + /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); /** Sets the CPU pointer. */ @@ -251,6 +258,15 @@ class LSQ { bool willWB(unsigned tid) { return thread[tid].willWB(); } + /** Returns if the cache is currently blocked. */ + bool cacheBlocked() + { return retryTid != -1; } + + /** Sets the retry thread id, indicating that one of the LSQUnits + * tried to access the cache but the cache was blocked. */ + void setRetryTid(int tid) + { retryTid = tid; } + /** Debugging function to print out all instructions. */ void dumpInsts(); /** Debugging function to print out instructions from a specific thread. */ @@ -267,7 +283,49 @@ class LSQ { template <class T> Fault write(RequestPtr req, T &data, int store_idx); - private: + /** DcachePort class for this LSQ. Handles doing the + * communication with the cache/memory. + */ + class DcachePort : public Port + { + protected: + /** Pointer to LSQ. */ + LSQ *lsq; + + public: + /** Default constructor. */ + DcachePort(LSQ *_lsq) + : lsq(_lsq) + { } + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + /** Timing version of receive. Handles writing back and + * completing the load or store that has returned from + * memory. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of the previous send. */ + virtual void recvRetry(); + }; + + /** D-cache port. */ + DcachePort dcachePort; + + protected: /** The LSQ policy for SMT mode. */ LSQPolicy lsqPolicy; @@ -296,6 +354,10 @@ class LSQ { /** Number of Threads. */ unsigned numThreads; + + /** The thread id of the LSQ Unit that is currently waiting for a + * retry. */ + int retryTid; }; template <class Impl> diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 5173f8be1..4e3957029 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -36,9 +36,53 @@ using namespace std; template <class Impl> +Tick +LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("O3CPU doesn't expect recvStatusChange callback!"); +} + +template <class Impl> +bool +LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt); + return true; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvRetry() +{ + lsq->thread[lsq->retryTid].recvRetry(); + // Speculatively clear the retry Tid. This will get set again if + // the LSQUnit was unable to complete its access. + lsq->retryTid = -1; +} + +template <class Impl> LSQ<Impl>::LSQ(Params *params) - : LQEntries(params->LQEntries), SQEntries(params->SQEntries), - numThreads(params->numberOfThreads) + : dcachePort(this), LQEntries(params->LQEntries), + SQEntries(params->SQEntries), numThreads(params->numberOfThreads), + retryTid(-1) { DPRINTF(LSQ, "Creating LSQ object.\n"); @@ -94,7 +138,8 @@ LSQ<Impl>::LSQ(Params *params) //Initialize LSQs for (int tid=0; tid < numThreads; tid++) { - thread[tid].init(params, maxLQEntries, maxSQEntries, tid); + thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid); + thread[tid].setDcachePort(&dcachePort); } } @@ -130,6 +175,8 @@ LSQ<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; + dcachePort.setName(name()); + for (int tid=0; tid < numThreads; tid++) { thread[tid].setCPU(cpu_ptr); } @@ -502,6 +549,9 @@ LSQ<Impl>::hasStoresToWB() { list<unsigned>::iterator active_threads = (*activeThreads).begin(); + if ((*activeThreads).empty()) + return false; + while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; if (!hasStoresToWB(tid)) diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 74b8fe5bb..512b5a63c 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -64,6 +64,7 @@ class LSQUnit { typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::LSQ LSQ; typedef typename Impl::CPUPol::IssueStruct IssueStruct; public: @@ -71,7 +72,7 @@ class LSQUnit { LSQUnit(); /** Initializes the LSQ unit with the specified number of entries. */ - void init(Params *params, unsigned maxLQEntries, + void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id); /** Returns the name of the LSQ unit. */ @@ -87,6 +88,10 @@ class LSQUnit { void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } + /** Sets the pointer to the dcache port. */ + void setDcachePort(Port *dcache_port) + { dcachePort = dcache_port; } + /** Switches out LSQ unit. */ void switchOut(); @@ -206,6 +211,9 @@ class LSQUnit { !storeQueue[storeWBIdx].completed && !isStoreBlocked; } + /** Handles doing the retry. */ + void recvRetry(); + private: /** Writes back the instruction, sending it to IEW. */ void writeback(DynInstPtr &inst, PacketPtr pkt); @@ -216,9 +224,6 @@ class LSQUnit { /** Completes the store at the specified index. */ void completeStore(int store_idx); - /** Handles doing the retry. */ - void recvRetry(); - /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx); /** Decrements the given store index (circular queue). */ @@ -239,54 +244,11 @@ class LSQUnit { /** Pointer to the IEW stage. */ IEW *iewStage; - /** Pointer to memory object. */ - MemObject *mem; + /** Pointer to the LSQ. */ + LSQ *lsq; - /** DcachePort class for this LSQ Unit. Handles doing the - * communication with the cache/memory. - * @todo: Needs to be moved to the LSQ level and have some sort - * of arbitration. - */ - class DcachePort : public Port - { - protected: - /** Pointer to CPU. */ - O3CPU *cpu; - /** Pointer to LSQ. */ - LSQUnit *lsq; - - public: - /** Default constructor. */ - DcachePort(O3CPU *_cpu, LSQUnit *_lsq) - : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) - { } - - protected: - /** Atomic version of receive. Panics. */ - virtual Tick recvAtomic(PacketPtr pkt); - - /** Functional version of receive. Panics. */ - virtual void recvFunctional(PacketPtr pkt); - - /** Receives status change. Other than range changing, panics. */ - virtual void recvStatusChange(Status status); - - /** Returns the address ranges of this device. */ - virtual void getDeviceAddressRanges(AddrRangeList &resp, - AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - - /** Timing version of receive. Handles writing back and - * completing the load or store that has returned from - * memory. */ - virtual bool recvTiming(PacketPtr pkt); - - /** Handles doing a retry of the previous send. */ - virtual void recvRetry(); - }; - - /** Pointer to the D-cache. */ - DcachePort *dcachePort; + /** Pointer to the dcache port. Used only for sending. */ + Port *dcachePort; /** Derived class to hold any sender state the LSQ needs. */ class LSQSenderState : public Packet::SenderState @@ -639,6 +601,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(load_inst); + iewStage->decrWb(load_inst->seqNum); ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not @@ -653,7 +616,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) } // If there's no forwarding case, then go access memory - DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", load_inst->seqNum, load_inst->readPC()); assert(!load_inst->memData); @@ -661,9 +624,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) ++usedPorts; - DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", - load_inst->readPC()); - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); @@ -673,8 +633,18 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) state->inst = load_inst; data_pkt->senderState = state; - // if we have a cache, do cache access too - if (!dcachePort->sendTiming(data_pkt)) { + // if we the cache is not blocked, do cache access + if (!lsq->cacheBlocked()) { + if (!dcachePort->sendTiming(data_pkt)) { + // If the access didn't succeed, tell the LSQ by setting + // the retry thread id. + lsq->setRetryTid(lsqID); + } + } + + // If the cache was blocked, or has become blocked due to the access, + // handle it. + if (lsq->cacheBlocked()) { ++lsqCacheBlocked; // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 714acb2ef..4f5dbbf1c 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -31,6 +31,7 @@ #include "config/use_checker.hh" +#include "cpu/o3/lsq.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" #include "mem/packet.hh" @@ -77,6 +78,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); if (isSwitchedOut() || inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); delete state; delete pkt; return; @@ -95,46 +97,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) } template <class Impl> -Tick -LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt) -{ - panic("O3CPU model does not work with atomic mode!"); - return curTick; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt) -{ - panic("O3CPU doesn't expect recvFunctional callback!"); -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvStatusChange(Status status) -{ - if (status == RangeChange) - return; - - panic("O3CPU doesn't expect recvStatusChange callback!"); -} - -template <class Impl> -bool -LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt) -{ - lsq->completeDataAccess(pkt); - return true; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvRetry() -{ - lsq->recvRetry(); -} - -template <class Impl> LSQUnit<Impl>::LSQUnit() : loads(0), stores(0), storesToWB(0), stalled(false), isStoreBlocked(false), isLoadBlocked(false), @@ -144,13 +106,15 @@ LSQUnit<Impl>::LSQUnit() template<class Impl> void -LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, +LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id) { DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); switchedOut = false; + lsq = lsq_ptr; + lsqID = id; // Add 1 for the sentinel entry (they are circular queues). @@ -167,8 +131,6 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, usedPorts = 0; cachePorts = params->cachePorts; - mem = params->mem; - memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -179,11 +141,6 @@ void LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; - dcachePort = new DcachePort(cpu, this); - - Port *mem_dport = mem->getPort(""); - dcachePort->setPeer(mem_dport); - mem_dport->setPeer(dcachePort); #if USE_CHECKER if (cpu->checker) { @@ -591,7 +548,7 @@ LSQUnit<Impl>::writebackStores() storeQueue[storeWBIdx].canWB && usedPorts < cachePorts) { - if (isStoreBlocked) { + if (isStoreBlocked || lsq->cacheBlocked()) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" " is blocked!\n"); break; @@ -833,6 +790,7 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) // Squashed instructions do not need to complete their access. if (inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); assert(!inst->isStore()); ++lsqIgnoredResponses; return; @@ -914,6 +872,7 @@ LSQUnit<Impl>::recvRetry() } else { // Still blocked! ++lsqCacheBlocked; + lsq->setRetryTid(lsqID); } } else if (isLoadBlocked) { DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " diff --git a/src/cpu/o3/params.hh b/src/cpu/o3/params.hh index 69a1bb937..1c234bcd7 100755 --- a/src/cpu/o3/params.hh +++ b/src/cpu/o3/params.hh @@ -47,6 +47,18 @@ class O3Params : public BaseO3CPU::Params unsigned activity; // + // Pointers to key objects + // +#if !FULL_SYSTEM + std::vector<Process *> workload; + Process *process; +#endif // FULL_SYSTEM + + MemObject *mem; + + BaseCPU *checker; + + // // Caches // // MemInterface *icacheInterface; @@ -86,7 +98,10 @@ class O3Params : public BaseO3CPU::Params unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; + unsigned dispatchWidth; unsigned issueWidth; + unsigned wbWidth; + unsigned wbDepth; FUPool *fuPool; // @@ -100,6 +115,12 @@ class O3Params : public BaseO3CPU::Params Tick fetchTrapLatency; // + // Timebuffer sizes + // + unsigned backComSize; + unsigned forwardComSize; + + // // Branch predictor (BP, BTB, RAS) // std::string predType; diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 6972f055f..b6677b4b1 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -86,10 +86,6 @@ class PhysRegFile //The duplication is unfortunate but it's better than having //different ways to access certain registers. - //Add these in later when everything else is in place -// void serialize(std::ostream &os); -// void unserialize(Checkpoint *cp, const std::string §ion); - /** Reads an integer register. */ uint64_t readIntReg(PhysRegIndex reg_idx) { diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 581fc8f81..034087feb 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -157,12 +157,15 @@ class DefaultRename /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *_scoreboard); + /** Drains the rename stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the rename stage. */ void switchOut(); - /** Completes the switch out. */ - void doSwitchOut(); - /** Takes over from another CPU's thread. */ void takeOverFrom(); diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index df8b7f9da..805a72808 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -257,16 +257,17 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard) } template <class Impl> -void -DefaultRename<Impl>::switchOut() +bool +DefaultRename<Impl>::drain() { // Rename is ready to switch out at any time. - cpu->signalSwitched(); + cpu->signalDrained(); + return true; } template <class Impl> void -DefaultRename<Impl>::doSwitchOut() +DefaultRename<Impl>::switchOut() { // Clear any state, fix up the rename map. for (int i = 0; i < numThreads; i++) { diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index 6f8080ef4..7cd5a5143 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -308,7 +308,7 @@ class ROB private: /** The sequence number of the squashed instruction. */ - InstSeqNum squashedSeqNum; + InstSeqNum squashedSeqNum[Impl::MaxThreads]; /** Is the ROB done squashing. */ bool doneSquashing[Impl::MaxThreads]; diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index d9978b17f..1b9f666b8 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -41,10 +41,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth, : numEntries(_numEntries), squashWidth(_squashWidth), numInstsInROB(0), - squashedSeqNum(0), numThreads(_numThreads) { for (int tid=0; tid < numThreads; tid++) { + squashedSeqNum[tid] = 0; doneSquashing[tid] = true; threadEntries[tid] = 0; } @@ -352,11 +352,11 @@ void ROB<Impl>::doSquash(unsigned tid) { DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n", - tid, squashedSeqNum); + tid, squashedSeqNum[tid]); assert(squashIt[tid] != instList[tid].end()); - if ((*squashIt[tid])->seqNum < squashedSeqNum) { + if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -371,7 +371,7 @@ ROB<Impl>::doSquash(unsigned tid) for (int numSquashed = 0; numSquashed < squashWidth && squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum; + (*squashIt[tid])->seqNum > squashedSeqNum[tid]; ++numSquashed) { DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n", @@ -408,7 +408,7 @@ ROB<Impl>::doSquash(unsigned tid) // Check if ROB is done squashing. - if ((*squashIt[tid])->seqNum <= squashedSeqNum) { + if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -520,7 +520,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid) doneSquashing[tid] = false; - squashedSeqNum = squash_num; + squashedSeqNum[tid] = squash_num; if (!instList[tid].empty()) { InstIt tail_thread = instList[tid].end(); @@ -544,6 +544,7 @@ ROB<Impl>::readHeadInst() } } */ + template <class Impl> typename Impl::DynInstPtr ROB<Impl>::readHeadInst(unsigned tid) @@ -558,6 +559,7 @@ ROB<Impl>::readHeadInst(unsigned tid) return dummyInst; } } + /* template <class Impl> uint64_t diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index d097ee63e..df8d1a6d8 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -112,7 +112,7 @@ class O3ThreadContext : public ThreadContext virtual void suspend(); /** Set the status to Unallocated. */ - virtual void deallocate(); + virtual void deallocate(int delay = 0); /** Set the status to Halted. */ virtual void halt(); diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index cfb71f623..bf8cbf850 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -115,7 +115,8 @@ template <class Impl> void O3ThreadContext<Impl>::activate(int delay) { - DPRINTF(O3CPU, "Calling activate on AlphaTC\n"); + DPRINTF(O3CPU, "Calling activate on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Active) return; @@ -139,7 +140,8 @@ template <class Impl> void O3ThreadContext<Impl>::suspend() { - DPRINTF(O3CPU, "Calling suspend on AlphaTC\n"); + DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Suspended) return; @@ -163,22 +165,24 @@ O3ThreadContext<Impl>::suspend() template <class Impl> void -O3ThreadContext<Impl>::deallocate() +O3ThreadContext<Impl>::deallocate(int delay) { - DPRINTF(O3CPU, "Calling deallocate on AlphaTC\n"); + DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Unallocated) return; thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid()); + cpu->deallocateContext(thread->readTid(), delay); } template <class Impl> void O3ThreadContext<Impl>::halt() { - DPRINTF(O3CPU, "Calling halt on AlphaTC\n"); + DPRINTF(O3CPU, "Calling halt on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Halted) return; |