diff options
Diffstat (limited to 'src/cpu/o3')
-rw-r--r-- | src/cpu/o3/alpha/cpu_builder.cc | 11 | ||||
-rw-r--r-- | src/cpu/o3/checker_builder.cc | 13 | ||||
-rw-r--r-- | src/cpu/o3/commit_impl.hh | 19 | ||||
-rw-r--r-- | src/cpu/o3/cpu.cc | 43 | ||||
-rw-r--r-- | src/cpu/o3/fetch_impl.hh | 9 | ||||
-rw-r--r-- | src/cpu/o3/iew.hh | 8 | ||||
-rw-r--r-- | src/cpu/o3/iew_impl.hh | 45 | ||||
-rw-r--r-- | src/cpu/o3/inst_queue.hh | 4 | ||||
-rw-r--r-- | src/cpu/o3/inst_queue_impl.hh | 28 | ||||
-rw-r--r-- | src/cpu/o3/lsq_impl.hh | 10 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit.hh | 11 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit_impl.hh | 14 | ||||
-rw-r--r-- | src/cpu/o3/mem_dep_unit_impl.hh | 3 | ||||
-rw-r--r-- | src/cpu/o3/rename.hh | 2 | ||||
-rw-r--r-- | src/cpu/o3/rename_impl.hh | 13 | ||||
-rw-r--r-- | src/cpu/o3/thread_state.hh | 29 | ||||
-rw-r--r-- | src/cpu/o3/tournament_pred.cc | 10 | ||||
-rw-r--r-- | src/cpu/o3/tournament_pred.hh | 3 |
18 files changed, 233 insertions, 42 deletions
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 5e767655d..fbf1f342c 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -56,6 +56,7 @@ SimObjectParam<System *> system; Param<int> cpu_id; SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; +Param<Tick> profile; #else SimObjectVectorParam<Process *> workload; #endif // FULL_SYSTEM @@ -68,6 +69,8 @@ Param<Counter> max_insts_any_thread; Param<Counter> max_insts_all_threads; Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; +Param<Counter> stats_reset_inst; +Param<Tick> progress_interval; Param<unsigned> cachePorts; @@ -162,6 +165,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(itb, "Instruction translation buffer"), INIT_PARAM(dtb, "Data translation buffer"), + INIT_PARAM(profile, ""), #else INIT_PARAM(workload, "Processes to run"), #endif // FULL_SYSTEM @@ -184,6 +188,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) "Terminate when all threads have reached this load" "count", 0), + INIT_PARAM_DFLT(stats_reset_inst, + "blah", + 0), + INIT_PARAM_DFLT(progress_interval, "Progress interval", 0), INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), @@ -305,6 +313,7 @@ CREATE_SIM_OBJECT(DerivO3CPU) params->cpu_id = cpu_id; params->itb = itb; params->dtb = dtb; + params->profile = profile; #else params->workload = workload; #endif // FULL_SYSTEM @@ -317,6 +326,8 @@ CREATE_SIM_OBJECT(DerivO3CPU) params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; params->max_loads_all_threads = max_loads_all_threads; + params->stats_reset_inst = stats_reset_inst; + params->progress_interval = progress_interval; // // Caches diff --git a/src/cpu/o3/checker_builder.cc b/src/cpu/o3/checker_builder.cc index 782d963b0..ad83ec57a 100644 --- a/src/cpu/o3/checker_builder.cc +++ b/src/cpu/o3/checker_builder.cc @@ -64,6 +64,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker) Param<Counter> max_insts_all_threads; Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; + Param<Counter> stats_reset_inst; + Param<Tick> progress_interval; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; @@ -78,6 +80,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker) Param<bool> defer_registration; Param<bool> exitOnError; + Param<bool> updateOnError; Param<bool> warnOnlyOnLoadError; Param<bool> function_trace; Param<Tick> function_trace_start; @@ -94,6 +97,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker) "terminate when any thread reaches this load count"), INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), + INIT_PARAM(stats_reset_inst, + "blah"), + INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), @@ -109,6 +115,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker) INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(exitOnError, "exit on error"), + INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"), INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load " "result errors", false), INIT_PARAM(function_trace, "Enable function trace"), @@ -126,7 +133,9 @@ CREATE_SIM_OBJECT(O3Checker) params->max_insts_all_threads = 0; params->max_loads_any_thread = 0; params->max_loads_all_threads = 0; + params->stats_reset_inst = 0; params->exitOnError = exitOnError; + params->updateOnError = updateOnError; params->warnOnlyOnLoadError = warnOnlyOnLoadError; params->deferRegistration = defer_registration; params->functionTrace = function_trace; @@ -139,6 +148,10 @@ CREATE_SIM_OBJECT(O3Checker) temp = max_insts_all_threads; temp = max_loads_any_thread; temp = max_loads_all_threads; + temp = stats_reset_inst; + Tick temp2 = progress_interval; + params->progress_interval = 0; + temp2++; #if FULL_SYSTEM params->itb = itb; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 34f487e2c..6ae01ae67 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -1083,12 +1083,26 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Generate trap squash event. generateTrapEvent(tid); - +// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC()); return false; } updateComInstStats(head_inst); +#if FULL_SYSTEM + if (thread[tid]->profile) { +// bool usermode = +// (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0; +// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC(); + thread[tid]->profilePC = head_inst->readPC(); + ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(), + head_inst->staticInst); + + if (node) + thread[tid]->profileNode = node; + } +#endif + if (head_inst->traceData) { head_inst->traceData->setFetchSeq(head_inst->seqNum); head_inst->traceData->setCPSeq(thread[tid]->numInst); @@ -1102,6 +1116,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->renamedDestRegIdx(i)); } + if (head_inst->isCopy()) + panic("Should not commit any copy instructions!"); + // Finally clear the head ROB entry. rob->retireHead(tid); diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 19ab7f4c5..4279df6f7 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -33,6 +33,7 @@ #include "config/use_checker.hh" #if FULL_SYSTEM +#include "cpu/quiesce_event.hh" #include "sim/system.hh" #else #include "sim/process.hh" @@ -793,6 +794,8 @@ template <class Impl> unsigned int FullO3CPU<Impl>::drain(Event *drain_event) { + DPRINTF(O3CPU, "Switching out\n"); + BaseCPU::switchOut(_sampler); drainCount = 0; fetch.drain(); decode.drain(); @@ -863,6 +866,7 @@ FullO3CPU<Impl>::switchOut() { fetch.switchOut(); rename.switchOut(); + iew.switchOut(); commit.switchOut(); instList.clear(); while (!removeList.empty()) { @@ -931,6 +935,45 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) } template <class Impl> +void +FullO3CPU<Impl>::serialize(std::ostream &os) +{ + BaseCPU::serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); + + // Use SimpleThread's ability to checkpoint to make it easier to + // write out the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static CPUExecContext temp; + + for (int i = 0; i < thread.size(); i++) { + nameOut(os, csprintf("%s.xc.%i", name(), i)); + temp.copyXC(thread[i]->getXCProxy()); + temp.serialize(os); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) +{ + BaseCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + + // Use SimpleThread's ability to checkpoint to make it easier to + // read in the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static CPUExecContext temp; + + for (int i = 0; i < thread.size(); i++) { + temp.copyXC(thread[i]->getXCProxy()); + temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); + thread[i]->getXCProxy()->copyArchRegs(temp.getProxy()); + } +} + +template <class Impl> uint64_t FullO3CPU<Impl>::readIntReg(int reg_idx) { diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 1e080181c..2d447bfe5 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -442,6 +442,7 @@ DefaultFetch<Impl>::takeOverFrom() wroteToTimeBuffer = false; _status = Inactive; switchedOut = false; + interruptPending = false; branchPred.takeOverFrom(); } @@ -563,7 +564,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (cacheBlocked || (interruptPending && flags == 0)) { + if (cacheBlocked || isSwitchedOut() || (interruptPending && flags == 0)) { // Hold off fetch from getting new instructions when: // Cache is blocked, or // while an interrupt is pending and we're not in PAL mode, or @@ -1152,8 +1153,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetch_PC = next_PC; if (instruction->isQuiesce()) { - warn("cycle %lli: Quiesce instruction encountered, halting fetch!", - curTick); +// warn("%lli: Quiesce instruction encountered, halting fetch!", +// curTick); fetchStatus[tid] = QuiescePending; ++numInst; status_change = true; @@ -1268,7 +1269,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetchStatus[tid] = TrapPending; status_change = true; - warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]); +// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #else // !FULL_SYSTEM warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]); #endif // FULL_SYSTEM diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 76fa008ee..a400c9fa8 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -216,6 +216,7 @@ class DefaultIEW if (++wbOutstanding == wbMax) ableToIssue = false; DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); + assert(wbOutstanding <= wbMax); #ifdef DEBUG wbList.insert(sn); #endif @@ -226,6 +227,7 @@ class DefaultIEW if (wbOutstanding-- == wbMax) ableToIssue = true; DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); + assert(wbOutstanding >= 0); #ifdef DEBUG assert(wbList.find(sn) != wbList.end()); wbList.erase(sn); @@ -450,7 +452,9 @@ class DefaultIEW unsigned wbCycle; /** Number of instructions in flight that will writeback. */ - unsigned wbOutstanding; + + /** Number of instructions in flight that will writeback. */ + int wbOutstanding; /** Writeback width. */ unsigned wbWidth; @@ -507,6 +511,8 @@ class DefaultIEW Stats::Scalar<> iewExecutedInsts; /** Stat for total number of executed load instructions. */ Stats::Vector<> iewExecLoadInsts; + /** Stat for total number of executed store instructions. */ +// Stats::Scalar<> iewExecStoreInsts; /** Stat for total number of squashed instructions skipped at execute. */ Stats::Scalar<> iewExecSquashedInsts; /** Number of executed software prefetches. */ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index e9b24a6d4..c82f6dd21 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -162,17 +162,17 @@ DefaultIEW<Impl>::regStats() branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; iewExecutedInsts - .name(name() + ".EXEC:insts") + .name(name() + ".iewExecutedInsts") .desc("Number of executed instructions"); iewExecLoadInsts .init(cpu->number_of_threads) - .name(name() + ".EXEC:loads") + .name(name() + ".iewExecLoadInsts") .desc("Number of load instructions executed") .flags(total); iewExecSquashedInsts - .name(name() + ".EXEC:squashedInsts") + .name(name() + ".iewExecSquashedInsts") .desc("Number of squashed instructions skipped in execute"); iewExecutedSwp @@ -372,6 +372,8 @@ DefaultIEW<Impl>::switchOut() { // Clear any state. switchedOut = true; + assert(insts[0].empty()); + assert(skidBuffer[0].empty()); instQueue.switchOut(); ldstQueue.switchOut(); @@ -410,7 +412,6 @@ DefaultIEW<Impl>::takeOverFrom() updateLSQNextCycle = false; - // @todo: Fix hardcoded number for (int i = 0; i < issueToExecQueue.getSize(); ++i) { issueToExecQueue.advance(); } @@ -611,9 +612,11 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) wbNumInst = 0; } - assert((wbCycle * wbWidth + wbNumInst) < wbMax); + assert((wbCycle * wbWidth + wbNumInst) <= wbMax); } + DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n", + wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst); // Add finished instruction to queue to commit. (*iewQueue)[wbCycle].insts[wbNumInst] = inst; (*iewQueue)[wbCycle].size++; @@ -903,6 +906,22 @@ DefaultIEW<Impl>::emptyRenameInsts(unsigned tid) template <class Impl> void +DefaultIEW<Impl>::emptyRenameInsts(unsigned tid) +{ + while (!insts[tid].empty()) { + if (insts[tid].front()->isLoad() || + insts[tid].front()->isStore() ) { + toRename->iewInfo[tid].dispatchedToLSQ++; + } + + toRename->iewInfo[tid].dispatched++; + + insts[tid].pop(); + } +} + +template <class Impl> +void DefaultIEW<Impl>::wakeCPU() { cpu->wakeCPU(); @@ -1273,13 +1292,23 @@ DefaultIEW<Impl>::executeInsts() // event adds the instruction to the queue to commit fault = ldstQueue.executeLoad(inst); } else if (inst->isStore()) { - ldstQueue.executeStore(inst); + fault = ldstQueue.executeStore(inst); // If the store had a fault then it may not have a mem req - if (inst->req && !(inst->req->getFlags() & LOCKED)) { + if (!inst->isStoreConditional() && fault == NoFault) { + inst->setExecuted(); + + instToCommit(inst); + } else if (fault != NoFault) { + // If the instruction faulted, then we need to send it along to commit + // without the instruction completing. + + // Send this instruction to commit, also make sure iew stage + // realizes there is activity. inst->setExecuted(); instToCommit(inst); + activityThisCycle(); } // Store conditionals will mark themselves as @@ -1404,7 +1433,7 @@ DefaultIEW<Impl>::writebackInsts() // E.g. Uncached loads have not actually executed when they // are first sent to commit. Instead commit must tell the LSQ // when it's ready to execute the uncached load. - if (!inst->isSquashed() && inst->isExecuted()) { + if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) { int dependents = instQueue.wakeDependents(inst); for (int i = 0; i < inst->numDestRegs(); i++) { diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index d745faf7b..3dd4dc658 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -479,13 +479,13 @@ class InstructionQueue /** Distribution of number of instructions in the queue. * @todo: Need to create struct to track the entry time for each * instruction. */ - Stats::VectorDistribution<> queueResDist; +// Stats::VectorDistribution<> queueResDist; /** Distribution of the number of instructions issued. */ Stats::Distribution<> numIssuedDist; /** Distribution of the cycles it takes to issue an instruction. * @todo: Need to create struct to track the ready time for each * instruction. */ - Stats::VectorDistribution<> issueDelayDist; +// Stats::VectorDistribution<> issueDelayDist; /** Number of times an instruction could not be issued because a * FU was busy. diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 47634f645..6edb528a9 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats() .name(name() + ".iqSquashedNonSpecRemoved") .desc("Number of squashed non-spec instructions that were removed") .prereq(iqSquashedNonSpecRemoved); - +/* queueResDist .init(Num_OpClasses, 0, 99, 2) .name(name() + ".IQ:residence:") @@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats() for (int i = 0; i < Num_OpClasses; ++i) { queueResDist.subname(i, opClassStrings[i]); } +*/ numIssuedDist .init(0,totalWidth,1) .name(name() + ".ISSUE:issued_per_cycle") @@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats() // // How long did instructions for a particular FU type wait prior to issue // - +/* issueDelayDist .init(Num_OpClasses,0,99,2) .name(name() + ".ISSUE:") @@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats() subname << opClassStrings[i] << "_delay"; issueDelayDist.subname(i, subname.str()); } - +*/ issueRate .name(name() + ".ISSUE:rate") .desc("Inst issue rate") @@ -385,8 +386,16 @@ template <class Impl> void InstructionQueue<Impl>::switchOut() { +/* + if (!instList[0].empty() || (numEntries != freeEntries) || + !readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) { + dumpInsts(); +// assert(0); + } +*/ resetState(); dependGraph.reset(); + instsToExecute.clear(); switchedOut = true; for (int i = 0; i < numThreads; ++i) { memDepUnit[i].switchOut(); @@ -642,9 +651,12 @@ template <class Impl> void InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx) { + DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum); // The CPU could have been sleeping until this op completed (*extremely* // long latency op). Wake it if it was. This may be overkill. if (isSwitchedOut()) { + DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n", + inst->seqNum); return; } @@ -1036,6 +1048,10 @@ InstructionQueue<Impl>::doSquash(unsigned tid) (squashed_inst->isMemRef() && !squashed_inst->memOpDone)) { + DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x " + "squashed.\n", + tid, squashed_inst->seqNum, squashed_inst->readPC()); + // Remove the instruction from the dependency list. if (!squashed_inst->isNonSpeculative() && !squashed_inst->isStoreConditional() && @@ -1066,7 +1082,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid) ++iqSquashedOperandsExamined; } - } else { + } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) { NonSpecMapIt ns_inst_it = nonSpecInsts.find(squashed_inst->seqNum); assert(ns_inst_it != nonSpecInsts.end()); @@ -1093,10 +1109,6 @@ InstructionQueue<Impl>::doSquash(unsigned tid) count[squashed_inst->threadNumber]--; ++freeEntries; - - DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x " - "squashed.\n", - tid, squashed_inst->seqNum, squashed_inst->readPC()); } instList[tid].erase(squash_it--); diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 2bbab71f0..a1ac5adb8 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -167,6 +167,16 @@ LSQ<Impl>::regStats() template<class Impl> void +LSQ<Impl>::regStats() +{ + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].regStats(); + } +} + +template<class Impl> +void LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) { activeThreads = at_ptr; diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 1358a3699..8537e9dd7 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -407,20 +407,9 @@ class LSQUnit { // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. - /** Total number of loads forwaded from LSQ stores. */ Stats::Scalar<> lsqForwLoads; - /** Total number of loads ignored due to invalid addresses. */ - Stats::Scalar<> invAddrLoads; - - /** Total number of squashed loads. */ - Stats::Scalar<> lsqSquashedLoads; - - /** Total number of responses from the memory system that are - * ignored due to the instruction already being squashed. */ - Stats::Scalar<> lsqIgnoredResponses; - /** Total number of squashed stores. */ Stats::Scalar<> lsqSquashedStores; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index fa716c712..2922b81bd 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -180,6 +180,10 @@ LSQUnit<Impl>::regStats() .name(name() + ".ignoredResponses") .desc("Number of memory responses ignored because the instruction is squashed"); + lsqMemOrderViolation + .name(name() + ".memOrderViolation") + .desc("Number of memory ordering violations"); + lsqSquashedStores .name(name() + ".squashedStores") .desc("Number of stores squashed"); @@ -220,8 +224,10 @@ void LSQUnit<Impl>::switchOut() { switchedOut = true; - for (int i = 0; i < loadQueue.size(); ++i) + for (int i = 0; i < loadQueue.size(); ++i) { + assert(!loadQueue[i]); loadQueue[i] = NULL; + } assert(storesToWB == 0); } @@ -408,6 +414,11 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst) if (load_fault != NoFault) { // Send this instruction to commit, also make sure iew stage // realizes there is activity. + // Mark it as executed unless it is an uncached load that + // needs to hit the head of commit. + if (!(inst->req->flags & UNCACHEABLE) || inst->isAtCommit()) { + inst->setExecuted(); + } iewStage->instToCommit(inst); iewStage->activityThisCycle(); } @@ -467,6 +478,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) // A load incorrectly passed this store. Squash and refetch. // For now return a fault to show that it was unsuccessful. memDepViolator = loadQueue[load_idx]; + ++lsqMemOrderViolation; return genMachineCheckFault(); } diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index 16f67a4e0..c649ca385 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -109,6 +109,9 @@ template <class MemDepPred, class Impl> void MemDepUnit<MemDepPred, Impl>::switchOut() { + assert(instList[0].empty()); + assert(instsToReplay.empty()); + assert(memDepHash.empty()); // Clear any state. for (int i = 0; i < Impl::MaxThreads; ++i) { instList[i].clear(); diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index ba26a01dd..177b9cb87 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -417,6 +417,8 @@ class DefaultRename /** The maximum skid buffer size. */ unsigned skidBufferMax; + PhysRegIndex maxPhysicalRegs; + /** Enum to record the source of a structure full stall. Can come from * either ROB, IQ, LSQ, and it is priortized in that order. */ diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 782c0fe5f..248d7deb6 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -41,7 +41,8 @@ DefaultRename<Impl>::DefaultRename(Params *params) commitToRenameDelay(params->commitToRenameDelay), renameWidth(params->renameWidth), commitWidth(params->commitWidth), - numThreads(params->numberOfThreads) + numThreads(params->numberOfThreads), + maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs) { _status = Inactive; @@ -286,6 +287,11 @@ DefaultRename<Impl>::switchOut() // Put the renamed physical register back on the free list. freeList->addReg(hb_it->newPhysReg); + // Be sure to mark its register as ready if it's a misc register. + if (hb_it->newPhysReg >= maxPhysicalRegs) { + scoreboard->setReg(hb_it->newPhysReg); + } + historyBuffer[i].erase(hb_it++); } insts[i].clear(); @@ -889,6 +895,11 @@ DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid) // Put the renamed physical register back on the free list. freeList->addReg(hb_it->newPhysReg); + // Be sure to mark its register as ready if it's a misc register. + if (hb_it->newPhysReg >= maxPhysicalRegs) { + scoreboard->setReg(hb_it->newPhysReg); + } + historyBuffer[tid].erase(hb_it++); ++renameUndoneMaps; diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh index b6f2e14c0..0247deb52 100644 --- a/src/cpu/o3/thread_state.hh +++ b/src/cpu/o3/thread_state.hh @@ -31,8 +31,11 @@ #ifndef __CPU_O3_THREAD_STATE_HH__ #define __CPU_O3_THREAD_STATE_HH__ +#include "base/callback.hh" +#include "base/output.hh" #include "cpu/thread_context.hh" #include "cpu/thread_state.hh" +#include "sim/sim_exit.hh" class Event; class Process; @@ -75,8 +78,22 @@ struct O3ThreadState : public ThreadState { #if FULL_SYSTEM O3ThreadState(O3CPU *_cpu, int _thread_num) : ThreadState(-1, _thread_num), - inSyscall(0), trapPending(0) - { } + cpu(_cpu), inSyscall(0), trapPending(0) + { + if (cpu->params->profile) { + profile = new FunctionProfile(cpu->params->system->kernelSymtab); + Callback *cb = + new MakeCallback<O3ThreadState, + &O3ThreadState::dumpFuncProfile>(this); + registerExitCallback(cb); + } + + // let's fill with a dummy node for now so we don't get a segfault + // on the first cycle when there's no node available. + static ProfileNode dummyNode; + profileNode = &dummyNode; + profilePC = 3; + } #else O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject *mem) @@ -95,6 +112,14 @@ struct O3ThreadState : public ThreadState { /** Handles the syscall. */ void syscall(int64_t callnum) { process->syscall(callnum, tc); } #endif + +#if FULL_SYSTEM + void dumpFuncProfile() + { + std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name())); + profile->dump(xcProxy, *os); + } +#endif }; #endif // __CPU_O3_THREAD_STATE_HH__ diff --git a/src/cpu/o3/tournament_pred.cc b/src/cpu/o3/tournament_pred.cc index 7cf78dcb1..ffb941c77 100644 --- a/src/cpu/o3/tournament_pred.cc +++ b/src/cpu/o3/tournament_pred.cc @@ -62,6 +62,8 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize, for (int i = 0; i < localPredictorSize; ++i) localCtrs[i].setBits(localCtrBits); + localPredictorMask = floorPow2(localPredictorSize) - 1; + if (!isPowerOf2(localHistoryTableSize)) { fatal("Invalid local history table size!\n"); } @@ -158,7 +160,7 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history) //Lookup in the local predictor to get its branch prediction local_history_idx = calcLocHistIdx(branch_addr); local_predictor_idx = localHistoryTable[local_history_idx] - & localHistoryMask; + & localPredictorMask; local_prediction = localCtrs[local_predictor_idx].read() > threshold; //Lookup in the global predictor to get its branch prediction @@ -176,7 +178,8 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history) bp_history = (void *)history; assert(globalHistory < globalPredictorSize && - local_history_idx < localPredictorSize); + local_history_idx < localHistoryTableSize && + local_predictor_idx < localPredictorSize); // Commented code is for doing speculative update of counters and // all histories. @@ -234,7 +237,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history) // Get the local predictor's current prediction local_history_idx = calcLocHistIdx(branch_addr); local_predictor_hist = localHistoryTable[local_history_idx]; - local_predictor_idx = local_predictor_hist & localHistoryMask; + local_predictor_idx = local_predictor_hist & localPredictorMask; // Update the choice predictor to tell it which one was correct if // there was a prediction. @@ -256,6 +259,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history) } assert(globalHistory < globalPredictorSize && + local_history_idx < localHistoryTableSize && local_predictor_idx < localPredictorSize); // Update the counters and local history with the proper diff --git a/src/cpu/o3/tournament_pred.hh b/src/cpu/o3/tournament_pred.hh index 66b4aaae2..472944910 100644 --- a/src/cpu/o3/tournament_pred.hh +++ b/src/cpu/o3/tournament_pred.hh @@ -159,6 +159,9 @@ class TournamentBP /** Size of the local predictor. */ unsigned localPredictorSize; + /** Mask to get the proper index bits into the predictor. */ + unsigned localPredictorMask; + /** Number of bits of the local predictor's counters. */ unsigned localCtrBits; |