From 51f19f2e28a30054d4a9cc06b059b602e17e504f Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 28 Sep 2006 00:09:27 -0400 Subject: Minor changes plus updates to O3. cpu/base.cc: Have output message regardless of build. cpu/checker/cpu_builder.cc: cpu/checker/o3_cpu_builder.cc: Be sure to include all parameters. cpu/o3/cpu.cc: IEW also needs to switch out. cpu/o3/iew_impl.hh: Handle stores with faults properly. cpu/o3/inst_queue_impl.hh: Switch out properly, handle squashing properly. cpu/o3/lsq_unit_impl.hh: Minor fixes. cpu/o3/mem_dep_unit_impl.hh: Make sure mem dep unit is switched out properly. cpu/o3/rename_impl.hh: Switch out fix. --HG-- extra : convert_revision : b94deb83f724225c01166c84a1b3fdd3543cbe9a --- cpu/base.cc | 8 ++++-- cpu/checker/cpu_builder.cc | 1 + cpu/checker/o3_cpu_builder.cc | 6 +++++ cpu/o3/cpu.cc | 1 + cpu/o3/iew_impl.hh | 18 ++++++++++--- cpu/o3/inst_queue_impl.hh | 21 +++++++++++---- cpu/o3/lsq_unit_impl.hh | 61 ++++++------------------------------------- cpu/o3/mem_dep_unit_impl.hh | 3 +++ cpu/o3/rename_impl.hh | 5 ++++ 9 files changed, 61 insertions(+), 63 deletions(-) diff --git a/cpu/base.cc b/cpu/base.cc index 044fafca9..d4ba8c812 100644 --- a/cpu/base.cc +++ b/cpu/base.cc @@ -60,15 +60,19 @@ int maxThreadsPerCPU = 1; void CPUProgressEvent::process() { -#ifndef NDEBUG Counter temp = cpu->totalInstructions(); +#ifndef NDEBUG double ipc = double(temp - lastNumInst) / (interval / cpu->cycles(1)); + DPRINTFN("%s progress event, instructions committed: %lli, IPC: %0.8d\n", cpu->name(), temp - lastNumInst, ipc); ipc = 0.0; +#else + cprintf("%lli: %s progress event, instructions committed: %lli\n", + curTick, cpu->name(), temp - lastNumInst); +#endif lastNumInst = temp; schedule(curTick + interval); -#endif } const char * diff --git a/cpu/checker/cpu_builder.cc b/cpu/checker/cpu_builder.cc index ec36ae09f..d68dcdcd9 100644 --- a/cpu/checker/cpu_builder.cc +++ b/cpu/checker/cpu_builder.cc @@ -144,6 +144,7 @@ CREATE_SIM_OBJECT(OzoneChecker) temp = max_insts_all_threads; temp = max_loads_any_thread; temp = max_loads_all_threads; + temp = stats_reset_inst; Tick temp2 = progress_interval; temp2++; params->progress_interval = 0; diff --git a/cpu/checker/o3_cpu_builder.cc b/cpu/checker/o3_cpu_builder.cc index 496cca779..079217b0c 100644 --- a/cpu/checker/o3_cpu_builder.cc +++ b/cpu/checker/o3_cpu_builder.cc @@ -58,6 +58,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker) Param max_insts_all_threads; Param max_loads_any_thread; Param max_loads_all_threads; + Param stats_reset_inst; Param progress_interval; #if FULL_SYSTEM @@ -92,6 +93,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker) "terminate when any thread reaches this load count"), INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), + INIT_PARAM(stats_reset_inst, + "blah"), INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0), #if FULL_SYSTEM @@ -127,6 +130,7 @@ CREATE_SIM_OBJECT(O3Checker) params->max_insts_all_threads = 0; params->max_loads_any_thread = 0; params->max_loads_all_threads = 0; + params->stats_reset_inst = 0; params->exitOnError = exitOnError; params->updateOnError = updateOnError; params->deferRegistration = defer_registration; @@ -140,7 +144,9 @@ CREATE_SIM_OBJECT(O3Checker) temp = max_insts_all_threads; temp = max_loads_any_thread; temp = max_loads_all_threads; + temp = stats_reset_inst; Tick temp2 = progress_interval; + params->progress_interval = 0; temp2++; BaseMem *cache = icache; cache = dcache; diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc index 88de6c746..21cd1c599 100644 --- a/cpu/o3/cpu.cc +++ b/cpu/o3/cpu.cc @@ -697,6 +697,7 @@ FullO3CPU::signalSwitched() if (++switchCount == NumStages) { fetch.doSwitchOut(); rename.doSwitchOut(); + iew.doSwitchOut(); commit.doSwitchOut(); instList.clear(); diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh index 102be4f8d..33fd0f6b9 100644 --- a/cpu/o3/iew_impl.hh +++ b/cpu/o3/iew_impl.hh @@ -431,6 +431,8 @@ DefaultIEW::doSwitchOut() { // Clear any state. switchedOut = true; + assert(insts[0].empty()); + assert(skidBuffer[0].empty()); instQueue.switchOut(); ldstQueue.switchOut(); @@ -1281,13 +1283,23 @@ DefaultIEW::executeInsts() // event adds the instruction to the queue to commit fault = ldstQueue.executeLoad(inst); } else if (inst->isStore()) { - ldstQueue.executeStore(inst); + fault = ldstQueue.executeStore(inst); // If the store had a fault then it may not have a mem req - if (inst->req && !(inst->req->flags & LOCKED)) { + if (!inst->isStoreConditional() && fault == NoFault) { inst->setExecuted(); instToCommit(inst); + } else if (fault != NoFault) { + // If the instruction faulted, then we need to send it along to commit + // without the instruction completing. + + // Send this instruction to commit, also make sure iew stage + // realizes there is activity. + inst->setExecuted(); + + instToCommit(inst); + activityThisCycle(); } // Store conditionals will mark themselves as @@ -1408,7 +1420,7 @@ DefaultIEW::writebackInsts() // E.g. Uncached loads have not actually executed when they // are first sent to commit. Instead commit must tell the LSQ // when it's ready to execute the uncached load. - if (!inst->isSquashed() && inst->isExecuted()) { + if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) { int dependents = instQueue.wakeDependents(inst); for (int i = 0; i < inst->numDestRegs(); i++) { diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh index b6b06ca77..0a17cae5c 100644 --- a/cpu/o3/inst_queue_impl.hh +++ b/cpu/o3/inst_queue_impl.hh @@ -386,8 +386,16 @@ template void InstructionQueue::switchOut() { +/* + if (!instList[0].empty() || (numEntries != freeEntries) || + !readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) { + dumpInsts(); +// assert(0); + } +*/ resetState(); dependGraph.reset(); + instsToExecute.clear(); switchedOut = true; for (int i = 0; i < numThreads; ++i) { memDepUnit[i].switchOut(); @@ -643,9 +651,12 @@ template void InstructionQueue::processFUCompletion(DynInstPtr &inst, int fu_idx) { + DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum); // The CPU could have been sleeping until this op completed (*extremely* // long latency op). Wake it if it was. This may be overkill. if (isSwitchedOut()) { + DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n", + inst->seqNum); return; } @@ -1033,6 +1044,10 @@ InstructionQueue::doSquash(unsigned tid) (squashed_inst->isMemRef() && !squashed_inst->memOpDone)) { + DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x " + "squashed.\n", + tid, squashed_inst->seqNum, squashed_inst->readPC()); + // Remove the instruction from the dependency list. if (!squashed_inst->isNonSpeculative() && !squashed_inst->isStoreConditional() && @@ -1063,7 +1078,7 @@ InstructionQueue::doSquash(unsigned tid) ++iqSquashedOperandsExamined; } - } else { + } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) { NonSpecMapIt ns_inst_it = nonSpecInsts.find(squashed_inst->seqNum); assert(ns_inst_it != nonSpecInsts.end()); @@ -1090,10 +1105,6 @@ InstructionQueue::doSquash(unsigned tid) count[squashed_inst->threadNumber]--; ++freeEntries; - - DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x " - "squashed.\n", - tid, squashed_inst->seqNum, squashed_inst->readPC()); } instList[tid].erase(squash_it--); diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh index 7086c381e..f75a41cfe 100644 --- a/cpu/o3/lsq_unit_impl.hh +++ b/cpu/o3/lsq_unit_impl.hh @@ -198,62 +198,12 @@ void LSQUnit::switchOut() { switchedOut = true; - for (int i = 0; i < loadQueue.size(); ++i) + for (int i = 0; i < loadQueue.size(); ++i) { + assert(!loadQueue[i]); loadQueue[i] = NULL; + } assert(storesToWB == 0); - - while (storesToWB > 0 && - storeWBIdx != storeTail && - storeQueue[storeWBIdx].inst && - storeQueue[storeWBIdx].canWB) { - - if (storeQueue[storeWBIdx].size == 0 || - storeQueue[storeWBIdx].inst->isDataPrefetch() || - storeQueue[storeWBIdx].committed || - storeQueue[storeWBIdx].req->flags & LOCKED) { - incrStIdx(storeWBIdx); - - continue; - } - - assert(storeQueue[storeWBIdx].req); - assert(!storeQueue[storeWBIdx].committed); - - MemReqPtr req = storeQueue[storeWBIdx].req; - storeQueue[storeWBIdx].committed = true; - - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size); - - DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x [sn:%lli]\n", - storeWBIdx,storeQueue[storeWBIdx].inst->readPC(), - req->paddr, *(req->data), - storeQueue[storeWBIdx].inst->seqNum); - - switch(storeQueue[storeWBIdx].size) { - case 1: - cpu->write(req, (uint8_t &)storeQueue[storeWBIdx].data); - break; - case 2: - cpu->write(req, (uint16_t &)storeQueue[storeWBIdx].data); - break; - case 4: - cpu->write(req, (uint32_t &)storeQueue[storeWBIdx].data); - break; - case 8: - cpu->write(req, (uint64_t &)storeQueue[storeWBIdx].data); - break; - default: - panic("Unexpected store size!\n"); - } - incrStIdx(storeWBIdx); - } } template @@ -439,6 +389,11 @@ LSQUnit::executeLoad(DynInstPtr &inst) if (load_fault != NoFault) { // Send this instruction to commit, also make sure iew stage // realizes there is activity. + // Mark it as executed unless it is an uncached load that + // needs to hit the head of commit. + if (!(inst->req->flags & UNCACHEABLE) || inst->isAtCommit()) { + inst->setExecuted(); + } iewStage->instToCommit(inst); iewStage->activityThisCycle(); } diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh index bfe694bd8..a2d04ece9 100644 --- a/cpu/o3/mem_dep_unit_impl.hh +++ b/cpu/o3/mem_dep_unit_impl.hh @@ -107,6 +107,9 @@ template void MemDepUnit::switchOut() { + assert(instList[0].empty()); + assert(instsToReplay.empty()); + assert(memDepHash.empty()); // Clear any state. for (int i = 0; i < Impl::MaxThreads; ++i) { instList[i].clear(); diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh index 49627e3d4..a41e8d016 100644 --- a/cpu/o3/rename_impl.hh +++ b/cpu/o3/rename_impl.hh @@ -864,6 +864,11 @@ DefaultRename::doSquash(unsigned tid) // Put the renamed physical register back on the free list. freeList->addReg(hb_it->newPhysReg); + // Be sure to mark its register as ready if it's a misc register. + if (hb_it->newPhysReg >= maxPhysicalRegs) { + scoreboard->setReg(hb_it->newPhysReg); + } + historyBuffer[tid].erase(hb_it++); ++renameUndoneMaps; -- cgit v1.2.3