diff options
Diffstat (limited to 'cpu/o3')
-rw-r--r-- | cpu/o3/alpha_cpu_builder.cc | 33 | ||||
-rw-r--r-- | cpu/o3/alpha_cpu_impl.hh | 2 | ||||
-rw-r--r-- | cpu/o3/alpha_params.hh | 14 | ||||
-rw-r--r-- | cpu/o3/commit.hh | 13 | ||||
-rw-r--r-- | cpu/o3/commit_impl.hh | 62 | ||||
-rw-r--r-- | cpu/o3/cpu.cc | 66 | ||||
-rw-r--r-- | cpu/o3/cpu.hh | 6 | ||||
-rw-r--r-- | cpu/o3/decode_impl.hh | 9 | ||||
-rw-r--r-- | cpu/o3/fetch.hh | 6 | ||||
-rw-r--r-- | cpu/o3/fetch_impl.hh | 19 | ||||
-rw-r--r-- | cpu/o3/iew.hh | 107 | ||||
-rw-r--r-- | cpu/o3/iew_impl.hh | 157 | ||||
-rw-r--r-- | cpu/o3/inst_queue.hh | 2 | ||||
-rw-r--r-- | cpu/o3/inst_queue_impl.hh | 23 | ||||
-rw-r--r-- | cpu/o3/lsq.hh | 3 | ||||
-rw-r--r-- | cpu/o3/lsq_impl.hh | 10 | ||||
-rw-r--r-- | cpu/o3/lsq_unit.hh | 53 | ||||
-rw-r--r-- | cpu/o3/lsq_unit_impl.hh | 47 | ||||
-rw-r--r-- | cpu/o3/mem_dep_unit.cc | 2 | ||||
-rw-r--r-- | cpu/o3/mem_dep_unit_impl.hh | 14 | ||||
-rw-r--r-- | cpu/o3/regfile.hh | 16 | ||||
-rw-r--r-- | cpu/o3/rename_impl.hh | 4 | ||||
-rw-r--r-- | cpu/o3/rob.hh | 2 | ||||
-rw-r--r-- | cpu/o3/rob_impl.hh | 14 |
24 files changed, 414 insertions, 270 deletions
diff --git a/cpu/o3/alpha_cpu_builder.cc b/cpu/o3/alpha_cpu_builder.cc index 08d42cd46..c563fbef3 100644 --- a/cpu/o3/alpha_cpu_builder.cc +++ b/cpu/o3/alpha_cpu_builder.cc @@ -94,12 +94,10 @@ Param<unsigned> renameWidth; Param<unsigned> commitToIEWDelay; Param<unsigned> renameToIEWDelay; Param<unsigned> issueToExecuteDelay; +Param<unsigned> dispatchWidth; Param<unsigned> issueWidth; -Param<unsigned> executeWidth; -Param<unsigned> executeIntWidth; -Param<unsigned> executeFloatWidth; -Param<unsigned> executeBranchWidth; -Param<unsigned> executeMemoryWidth; +Param<unsigned> wbWidth; +Param<unsigned> wbDepth; SimObjectParam<FUPool *> fuPool; Param<unsigned> iewToCommitDelay; @@ -109,6 +107,9 @@ Param<unsigned> squashWidth; Param<Tick> trapLatency; Param<Tick> fetchTrapLatency; +Param<unsigned> backComSize; +Param<unsigned> forwardComSize; + Param<std::string> predType; Param<unsigned> localPredictorSize; Param<unsigned> localCtrBits; @@ -219,12 +220,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) "Issue/Execute/Writeback delay"), INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -235,6 +234,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), INIT_PARAM(localCtrBits, "Bits per counter"), @@ -353,12 +355,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->commitToIEWDelay = commitToIEWDelay; params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; @@ -368,6 +368,9 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->trapLatency = trapLatency; params->fetchTrapLatency = fetchTrapLatency; + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; + params->predType = predType; params->localPredictorSize = localPredictorSize; params->localCtrBits = localCtrBits; diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh index f39fdf6b6..1bf0652cd 100644 --- a/cpu/o3/alpha_cpu_impl.hh +++ b/cpu/o3/alpha_cpu_impl.hh @@ -383,7 +383,7 @@ AlphaFullCPU<Impl>::AlphaXC::copyArchRegs(ExecContext *xc) } // Copy the misc regs. - cpu->regFile.miscRegs[tid].copyMiscRegs(xc); + TheISA::copyMiscRegs(xc, this); // Then finally set the PC and the next PC. cpu->setPC(xc->readPC(), tid); diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh index f0836a9fd..4ab130d02 100644 --- a/cpu/o3/alpha_params.hh +++ b/cpu/o3/alpha_params.hh @@ -106,12 +106,10 @@ class AlphaSimpleParams : public BaseFullCPU::Params unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; + unsigned dispatchWidth; unsigned issueWidth; - unsigned executeWidth; - unsigned executeIntWidth; - unsigned executeFloatWidth; - unsigned executeBranchWidth; - unsigned executeMemoryWidth; + unsigned wbWidth; + unsigned wbDepth; FUPool *fuPool; // @@ -125,6 +123,12 @@ class AlphaSimpleParams : public BaseFullCPU::Params Tick fetchTrapLatency; // + // Timebuffer sizes + // + unsigned backComSize; + unsigned forwardComSize; + + // // Branch predictor (BP, BTB, RAS) // std::string predType; diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh index d93822394..b153effc4 100644 --- a/cpu/o3/commit.hh +++ b/cpu/o3/commit.hh @@ -160,10 +160,6 @@ class DefaultCommit /** Sets the pointer to the queue coming from IEW. */ void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); - void setFetchStage(Fetch *fetch_stage); - - Fetch *fetchStage; - /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); @@ -367,11 +363,6 @@ class DefaultCommit */ unsigned renameWidth; - /** IEW width, in instructions. Used so ROB knows how many - * instructions to get from the IEW instruction queue. - */ - unsigned iewWidth; - /** Commit width, in instructions. */ unsigned commitWidth; @@ -392,10 +383,6 @@ class DefaultCommit */ Tick trapLatency; - Tick fetchTrapLatency; - - Tick fetchFaultTick; - /** The commit PC of each thread. Refers to the instruction that * is currently being processed/committed. */ diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh index 798f30294..364e685c2 100644 --- a/cpu/o3/commit_impl.hh +++ b/cpu/o3/commit_impl.hh @@ -71,12 +71,10 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) renameToROBDelay(params->renameToROBDelay), fetchToCommitDelay(params->commitToFetchDelay), renameWidth(params->renameWidth), - iewWidth(params->executeWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), switchedOut(false), - trapLatency(params->trapLatency), - fetchTrapLatency(params->fetchTrapLatency) + trapLatency(params->trapLatency) { _status = Active; _nextStatus = Inactive; @@ -114,10 +112,8 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) changedROBNumEntries[i] = false; trapSquash[i] = false; xcSquash[i] = false; + PC[i] = nextPC[i] = 0; } - - fetchFaultTick = 0; - fetchTrapWait = 0; } template <class Impl> @@ -240,7 +236,6 @@ DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr) cpu->activateStage(FullCPU::CommitIdx); trapLatency = cpu->cycles(trapLatency); - fetchTrapLatency = cpu->cycles(fetchTrapLatency); } template <class Impl> @@ -299,13 +294,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) template <class Impl> void -DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage) -{ - fetchStage = fetch_stage; -} - -template <class Impl> -void DefaultCommit<Impl>::setIEWStage(IEW *iew_stage) { iewStage = iew_stage; @@ -431,7 +419,7 @@ DefaultCommit<Impl>::setNextStatus() } } - assert(squashes == squashCounter); + squashCounter = squashes; // If commit is currently squashing, then it will have activity for the // next cycle. Set its next status as active. @@ -536,8 +524,6 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid) commitStatus[tid] = ROBSquashing; cpu->activityThisCycle(); - - ++squashCounter; } template <class Impl> @@ -555,8 +541,6 @@ DefaultCommit<Impl>::squashFromXC(unsigned tid) cpu->activityThisCycle(); xcSquash[tid] = false; - - ++squashCounter; } template <class Impl> @@ -571,6 +555,9 @@ DefaultCommit<Impl>::tick() return; } + if ((*activeThreads).size() <=0) + return; + list<unsigned>::iterator threads = (*activeThreads).begin(); // Check if any of the threads are done squashing. Change the @@ -582,10 +569,12 @@ DefaultCommit<Impl>::tick() if (rob->isDoneSquashing(tid)) { commitStatus[tid] = Running; - --squashCounter; } else { DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" - "insts this cycle.\n", tid); + " insts this cycle.\n", tid); + rob->doSquash(tid); + toIEW->commitInfo[tid].robSquashing = true; + wroteToTimeBuffer = true; } } } @@ -691,29 +680,7 @@ DefaultCommit<Impl>::commit() while (threads != (*activeThreads).end()) { unsigned tid = *threads++; -/* - if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { - // Record the fault. Wait until it's empty in the ROB. - // Then handle the trap. Ignore it if there's already a - // trap pending as fetch will be redirected. - fetchFault = fromFetch->fetchFault; - fetchFaultTick = curTick + fetchTrapLatency; - commitStatus[0] = FetchTrapPending; - DPRINTF(Commit, "Fault from fetch recorded. Will trap if the " - "ROB empties without squashing the fault.\n"); - fetchTrapWait = 0; - } - // Fetch may tell commit to clear the trap if it's been squashed. - if (fromFetch->clearFetchFault) { - DPRINTF(Commit, "Received clear fetch fault signal\n"); - fetchTrapWait = 0; - if (commitStatus[0] == FetchTrapPending) { - DPRINTF(Commit, "Clearing fault from fetch\n"); - commitStatus[0] = Running; - } - } -*/ // Not sure which one takes priority. I think if we have // both, that's a bad sign. if (trapSquash[tid] == true) { @@ -741,8 +708,6 @@ DefaultCommit<Impl>::commit() commitStatus[tid] = ROBSquashing; - ++squashCounter; - // If we want to include the squashing instruction in the squash, // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; @@ -944,7 +909,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // and committed this instruction. thread[tid]->funcExeInst--; - head_inst->reachedCommit = true; + head_inst->setAtCommit(); if (head_inst->isNonSpeculative() || head_inst->isStoreConditional() || @@ -1060,7 +1025,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Generate trap squash event. generateTrapEvent(tid); - +// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC()); return false; #else // !FULL_SYSTEM panic("fault (%d) detected @ PC %08p", inst_fault, @@ -1083,6 +1048,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->renamedDestRegIdx(i)); } + if (head_inst->isCopy()) + panic("Should not commit any copy instructions!"); + // Finally clear the head ROB entry. rob->retireHead(tid); diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc index 8d72bdc41..f1571e61b 100644 --- a/cpu/o3/cpu.cc +++ b/cpu/o3/cpu.cc @@ -108,12 +108,14 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) // For now just have these time buffers be pretty big. // @todo: Make these time buffer sizes parameters or derived // from latencies - timeBuffer(5, 5), - fetchQueue(5, 5), - decodeQueue(5, 5), - renameQueue(5, 5), - iewQueue(5, 5), - activityRec(NumStages, 10, params->activity), + timeBuffer(params->backComSize, params->forwardComSize), + fetchQueue(params->backComSize, params->forwardComSize), + decodeQueue(params->backComSize, params->forwardComSize), + renameQueue(params->backComSize, params->forwardComSize), + iewQueue(params->backComSize, params->forwardComSize), + activityRec(NumStages, + params->backComSize + params->forwardComSize, + params->activity), globalSeqNum(1), @@ -180,7 +182,6 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) commit.setIEWQueue(&iewQueue); commit.setRenameQueue(&renameQueue); - commit.setFetchStage(&fetch); commit.setIEWStage(&iew); rename.setIEWStage(&iew); rename.setCommitStage(&commit); @@ -709,7 +710,7 @@ void FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) { // Flush out any old data from the time buffers. - for (int i = 0; i < 10; ++i) { + for (int i = 0; i < timeBuffer.getSize(); ++i) { timeBuffer.advance(); fetchQueue.advance(); decodeQueue.advance(); @@ -758,6 +759,46 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) tickEvent.schedule(curTick); } +/* +template <class Impl> +void +FullO3CPU<Impl>::serialize(std::ostream &os) +{ + BaseCPU::serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); + + // Use SimpleThread's ability to checkpoint to make it easier to + // write out the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + nameOut(os, csprintf("%s.xc.%i", name(), i)); + temp.copyXC(thread[i]->getXC()); + temp.serialize(os); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) +{ + BaseCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + + // Use SimpleThread's ability to checkpoint to make it easier to + // read in the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + temp.copyXC(thread[i]->getXC()); + temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); + thread[i]->getXC()->copyArchRegs(temp.getXC()); + } +} +*/ template <class Impl> uint64_t FullO3CPU<Impl>::readIntReg(int reg_idx) @@ -866,7 +907,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatRegSingle(phys_reg, val); } @@ -875,7 +917,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatRegDouble(phys_reg, val); } @@ -884,7 +927,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatRegInt(phys_reg, val); } diff --git a/cpu/o3/cpu.hh b/cpu/o3/cpu.hh index f4b19bfb3..ef5c9ae53 100644 --- a/cpu/o3/cpu.hh +++ b/cpu/o3/cpu.hh @@ -63,6 +63,12 @@ class BaseFullCPU : public BaseCPU void regStats(); + /** Sets this CPU's ID. */ + void setCpuId(int id) { cpu_id = id; } + + /** Reads this CPU's ID. */ + int readCpuId() { return cpu_id; } + protected: int cpu_id; }; diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh index 0b686375e..e1af4d423 100644 --- a/cpu/o3/decode_impl.hh +++ b/cpu/o3/decode_impl.hh @@ -278,7 +278,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].predIncorrect = true; toFetch->decodeInfo[tid].squash = true; - toFetch->decodeInfo[tid].nextPC = inst->readNextPC(); + toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); toFetch->decodeInfo[tid].branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); @@ -294,7 +294,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) for (int i=0; i<fromFetch->size; i++) { if (fromFetch->insts[i]->threadNumber == tid && fromFetch->insts[i]->seqNum > inst->seqNum) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); } } @@ -343,7 +343,7 @@ DefaultDecode<Impl>::squash(unsigned tid) for (int i=0; i<fromFetch->size; i++) { if (fromFetch->insts[i]->threadNumber == tid) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); squash_count++; } } @@ -721,9 +721,8 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid) // Go ahead and compute any PC-relative branches. if (inst->isDirectCtrl() && inst->isUncondCtrl()) { ++decodeBranchResolved; - inst->setNextPC(inst->branchTarget()); - if (inst->mispredicted()) { + if (inst->branchTarget() != inst->readPredTarg()) { ++decodeBranchMispred; // Might want to set some sort of boolean and just do diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh index 92a87ab54..0bde56ce9 100644 --- a/cpu/o3/fetch.hh +++ b/cpu/o3/fetch.hh @@ -358,6 +358,12 @@ class DefaultFetch /** The cache line being fetched. */ uint8_t *cacheData[Impl::MaxThreads]; + /** The PC of the cacheline that has been loaded. */ + Addr cacheDataPC[Impl::MaxThreads]; + + /** Whether or not the cache data is valid. */ + bool cacheDataValid[Impl::MaxThreads]; + /** Size of instructions. */ int instSize; diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh index a309bd49a..cc09c4a41 100644 --- a/cpu/o3/fetch_impl.hh +++ b/cpu/o3/fetch_impl.hh @@ -138,6 +138,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; stalls[tid].decode = 0; stalls[tid].rename = 0; @@ -334,6 +336,7 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req) // Wake up the CPU (if it went to sleep and was waiting on this completion // event). cpu->wakeCPU(); + cacheDataValid[tid] = true; DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", tid); @@ -466,7 +469,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (interruptPending && flags == 0 || switchedOut) { + if (interruptPending && flags == 0) { // Hold off fetch from getting new instructions while an interrupt // is pending. return false; @@ -475,6 +478,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Align the fetch PC so it's at the start of a cache block. fetch_PC = icacheBlockAlignPC(fetch_PC); + // If we've already got the block, no need to try to fetch it again. + if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { + return true; + } + // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. memReq[tid] = new MemReq(); @@ -525,6 +533,9 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid MemAccessResult result = icacheInterface->access(memReq[tid]); + cacheDataPC[tid] = fetch_PC; + cacheDataValid[tid] = false; + fetchedCacheLines++; // If the cache missed, then schedule an event to wake @@ -1002,8 +1013,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetch_PC = next_PC; if (instruction->isQuiesce()) { - warn("%lli: Quiesce instruction encountered, halting fetch!", - curTick); +// warn("%lli: Quiesce instruction encountered, halting fetch!", +// curTick); fetchStatus[tid] = QuiescePending; ++numInst; status_change = true; @@ -1067,7 +1078,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetchStatus[tid] = TrapPending; status_change = true; - warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); +// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #else // !FULL_SYSTEM fatal("fault (%d) detected @ PC %08p", fault, PC[tid]); #endif // FULL_SYSTEM diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh index eda6a6bc0..d21c573fe 100644 --- a/cpu/o3/iew.hh +++ b/cpu/o3/iew.hh @@ -224,6 +224,47 @@ class DefaultIEW /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } + void incrWb(InstSeqNum &sn) + { + if (++wbOutstanding == wbMax) + ableToIssue = false; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); + assert(wbOutstanding <= wbMax); +#ifdef DEBUG + wbList.insert(sn); +#endif + } + + void decrWb(InstSeqNum &sn) + { + if (wbOutstanding-- == wbMax) + ableToIssue = true; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); + assert(wbOutstanding >= 0); +#ifdef DEBUG + assert(wbList.find(sn) != wbList.end()); + wbList.erase(sn); +#endif + } + +#ifdef DEBUG + std::set<InstSeqNum> wbList; + + void dumpWb() + { + std::set<InstSeqNum>::iterator wb_it = wbList.begin(); + while (wb_it != wbList.end()) { + cprintf("[sn:%lli]\n", + (*wb_it)); + wb_it++; + } + } +#endif + + bool canIssue() { return ableToIssue; } + + bool ableToIssue; + private: /** Sends commit proper information for a squash due to a branch * mispredict. @@ -281,6 +322,9 @@ class DefaultIEW /** Processes inputs and changes state accordingly. */ void checkSignalsAndUpdate(unsigned tid); + /** Removes instructions from rename from a thread's instruction list. */ + void emptyRenameInsts(unsigned tid); + /** Sorts instructions coming from rename into lists separated by thread. */ void sortInsts(); @@ -401,20 +445,12 @@ class DefaultIEW */ unsigned issueToExecuteDelay; - /** Width of issue's read path, in instructions. The read path is both - * the skid buffer and the rename instruction queue. - * Note to self: is this really different than issueWidth? - */ - unsigned issueReadWidth; + /** Width of dispatch, in instructions. */ + unsigned dispatchWidth; /** Width of issue, in instructions. */ unsigned issueWidth; - /** Width of execute, in instructions. Might make more sense to break - * down into FP vs int. - */ - unsigned executeWidth; - /** Index into queue of instructions being written back. */ unsigned wbNumInst; @@ -425,6 +461,17 @@ class DefaultIEW */ unsigned wbCycle; + /** Number of instructions in flight that will writeback. */ + int wbOutstanding; + + /** Writeback width. */ + unsigned wbWidth; + + /** Writeback width * writeback depth, where writeback depth is + * the number of cycles of writing back instructions that can be + * buffered. */ + unsigned wbMax; + /** Number of active threads. */ unsigned numThreads; @@ -459,14 +506,6 @@ class DefaultIEW Stats::Scalar<> iewIQFullEvents; /** Stat for number of times the LSQ becomes full. */ Stats::Scalar<> iewLSQFullEvents; - /** Stat for total number of executed instructions. */ - Stats::Scalar<> iewExecutedInsts; - /** Stat for total number of executed load instructions. */ - Stats::Vector<> iewExecLoadInsts; - /** Stat for total number of executed store instructions. */ -// Stats::Scalar<> iewExecStoreInsts; - /** Stat for total number of squashed instructions skipped at execute. */ - Stats::Scalar<> iewExecSquashedInsts; /** Stat for total number of memory ordering violation events. */ Stats::Scalar<> memOrderViolationEvents; /** Stat for total number of incorrect predicted taken branches. */ @@ -476,28 +515,27 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; + /** Stat for total number of executed instructions. */ + Stats::Scalar<> iewExecutedInsts; + /** Stat for total number of executed load instructions. */ + Stats::Vector<> iewExecLoadInsts; + /** Stat for total number of executed store instructions. */ +// Stats::Scalar<> iewExecStoreInsts; + /** Stat for total number of squashed instructions skipped at execute. */ + Stats::Scalar<> iewExecSquashedInsts; /** Number of executed software prefetches. */ - Stats::Vector<> exeSwp; + Stats::Vector<> iewExecutedSwp; /** Number of executed nops. */ - Stats::Vector<> exeNop; + Stats::Vector<> iewExecutedNop; /** Number of executed meomory references. */ - Stats::Vector<> exeRefs; + Stats::Vector<> iewExecutedRefs; /** Number of executed branches. */ - Stats::Vector<> exeBranches; - -// Stats::Vector<> issued_ops; -/* - Stats::Vector<> stat_fu_busy; - Stats::Vector2d<> stat_fuBusy; - Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; -*/ - /** Number of instructions issued per cycle. */ - Stats::Formula issueRate; + Stats::Vector<> iewExecutedBranches; /** Number of executed store instructions. */ Stats::Formula iewExecStoreInsts; -// Stats::Formula issue_op_rate; -// Stats::Formula fu_busy_rate; + /** Number of instructions executed per cycle. */ + Stats::Formula iewExecRate; + /** Number of instructions sent to commit. */ Stats::Vector<> iewInstsToCommit; /** Number of instructions that writeback. */ @@ -510,7 +548,6 @@ class DefaultIEW * to resource contention. */ Stats::Vector<> wbPenalized; - /** Number of instructions per cycle written back. */ Stats::Formula wbRate; /** Average number of woken instructions per writeback. */ diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh index 3ed20cb75..102be4f8d 100644 --- a/cpu/o3/iew_impl.hh +++ b/cpu/o3/iew_impl.hh @@ -56,9 +56,11 @@ DefaultIEW<Impl>::LdWritebackEvent::process() //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); if (iewStage->isSwitchedOut()) { + iewStage->decrWb(inst->seqNum); inst = NULL; return; } else if (inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); iewStage->wakeCPU(); inst = NULL; return; @@ -93,16 +95,17 @@ DefaultIEW<Impl>::LdWritebackEvent::description() template<class Impl> DefaultIEW<Impl>::DefaultIEW(Params *params) : // @todo: Make this into a parameter. - issueToExecQueue(5, 5), + issueToExecQueue(params->backComSize, params->forwardComSize), instQueue(params), ldstQueue(params), fuPool(params->fuPool), commitToIEWDelay(params->commitToIEWDelay), renameToIEWDelay(params->renameToIEWDelay), issueToExecuteDelay(params->issueToExecuteDelay), - issueReadWidth(params->issueWidth), + dispatchWidth(params->dispatchWidth), issueWidth(params->issueWidth), - executeWidth(params->executeWidth), + wbOutstanding(0), + wbWidth(params->wbWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -125,8 +128,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) fetchRedirect[i] = false; } + wbMax = wbWidth * params->wbDepth; + updateLSQNextCycle = false; + ableToIssue = true; + skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -144,6 +151,7 @@ DefaultIEW<Impl>::regStats() using namespace Stats; instQueue.regStats(); + ldstQueue.regStats(); iewIdleCycles .name(name() + ".iewIdleCycles") @@ -189,20 +197,6 @@ DefaultIEW<Impl>::regStats() .name(name() + ".iewLSQFullEvents") .desc("Number of times the LSQ has become full, causing a stall"); - iewExecutedInsts - .name(name() + ".iewExecutedInsts") - .desc("Number of executed instructions"); - - iewExecLoadInsts - .init(cpu->number_of_threads) - .name(name() + ".iewExecLoadInsts") - .desc("Number of load instructions executed") - .flags(total); - - iewExecSquashedInsts - .name(name() + ".iewExecSquashedInsts") - .desc("Number of squashed instructions skipped in execute"); - memOrderViolationEvents .name(name() + ".memOrderViolationEvents") .desc("Number of memory order violations"); @@ -221,47 +215,49 @@ DefaultIEW<Impl>::regStats() branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; - exeSwp + iewExecutedInsts + .name(name() + ".iewExecutedInsts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .init(cpu->number_of_threads) + .name(name() + ".iewExecLoadInsts") + .desc("Number of load instructions executed") + .flags(total); + + iewExecSquashedInsts + .name(name() + ".iewExecSquashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + iewExecutedSwp .init(cpu->number_of_threads) .name(name() + ".EXEC:swp") .desc("number of swp insts executed") - .flags(total) - ; + .flags(total); - exeNop + iewExecutedNop .init(cpu->number_of_threads) .name(name() + ".EXEC:nop") .desc("number of nop insts executed") - .flags(total) - ; + .flags(total); - exeRefs + iewExecutedRefs .init(cpu->number_of_threads) .name(name() + ".EXEC:refs") .desc("number of memory reference insts executed") - .flags(total) - ; + .flags(total); - exeBranches + iewExecutedBranches .init(cpu->number_of_threads) .name(name() + ".EXEC:branches") .desc("Number of branches executed") - .flags(total) - ; - - issueRate - .name(name() + ".EXEC:rate") - .desc("Inst execution rate") - .flags(total) - ; - issueRate = iewExecutedInsts / cpu->numCycles; + .flags(total); iewExecStoreInsts .name(name() + ".EXEC:stores") .desc("Number of stores executed") - .flags(total) - ; - iewExecStoreInsts = exeRefs - iewExecLoadInsts; + .flags(total); + iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts; /* for (int i=0; i<Num_OpClasses; ++i) { stringstream subname; @@ -277,58 +273,50 @@ DefaultIEW<Impl>::regStats() .init(cpu->number_of_threads) .name(name() + ".WB:sent") .desc("cumulative count of insts sent to commit") - .flags(total) - ; + .flags(total); writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") - .flags(total) - ; + .flags(total); producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") - .flags(total) - ; + .flags(total); consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") - .flags(total) - ; + .flags(total); wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate = wbPenalized / writebackCount; wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") - .flags(total) - ; + .flags(total); wbFanout = producerInst / consumerInst; wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") - .flags(total) - ; + .flags(total); wbRate = writebackCount / cpu->numCycles; } @@ -481,8 +469,7 @@ DefaultIEW<Impl>::takeOverFrom() updateLSQNextCycle = false; - // @todo: Fix hardcoded number - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < issueToExecQueue.getSize(); ++i) { issueToExecQueue.advance(); } } @@ -515,16 +502,7 @@ DefaultIEW<Impl>::squash(unsigned tid) skidBuffer[tid].pop(); } - while (!insts[tid].empty()) { - if (insts[tid].front()->isLoad() || - insts[tid].front()->isStore() ) { - toRename->iewInfo[tid].dispatchedToLSQ++; - } - - toRename->iewInfo[tid].dispatched++; - - insts[tid].pop(); - } + emptyRenameInsts(tid); } template<class Impl> @@ -650,14 +628,16 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; - if (wbNumInst == issueWidth) { + if (wbNumInst == wbWidth) { ++wbCycle; wbNumInst = 0; } - assert(wbCycle < 5); + assert((wbCycle * wbWidth + wbNumInst) <= wbMax); } + DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n", + wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst); // Add finished instruction to queue to commit. (*iewQueue)[wbCycle].insts[wbNumInst] = inst; (*iewQueue)[wbCycle].size++; @@ -670,7 +650,7 @@ DefaultIEW<Impl>::validInstsFromRename() unsigned inst_count = 0; for (int i=0; i<fromRename->size; i++) { - if (!fromRename->insts[i]->squashed) + if (!fromRename->insts[i]->isSquashed()) inst_count++; } @@ -858,10 +838,12 @@ DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid) } if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n"); + DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid); dispatchStatus[tid] = Squashing; + emptyRenameInsts(tid); + wroteToTimeBuffer = true; return; } @@ -912,6 +894,22 @@ DefaultIEW<Impl>::sortInsts() template <class Impl> void +DefaultIEW<Impl>::emptyRenameInsts(unsigned tid) +{ + while (!insts[tid].empty()) { + if (insts[tid].front()->isLoad() || + insts[tid].front()->isStore() ) { + toRename->iewInfo[tid].dispatchedToLSQ++; + } + + toRename->iewInfo[tid].dispatched++; + + insts[tid].pop(); + } +} + +template <class Impl> +void DefaultIEW<Impl>::wakeCPU() { cpu->wakeCPU(); @@ -1010,7 +1008,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) // Loop through the instructions, putting them in the instruction // queue. for ( ; dis_num_inst < insts_to_add && - dis_num_inst < issueReadWidth; + dis_num_inst < dispatchWidth; ++dis_num_inst) { inst = insts_to_dispatch.front(); @@ -1149,7 +1147,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) instQueue.recordProducer(inst); - exeNop[tid]++; + iewExecutedNop[tid]++; add_to_iq = false; } else if (inst->isExecuted()) { @@ -1263,6 +1261,7 @@ DefaultIEW<Impl>::executeInsts() ++iewExecSquashedInsts; + decrWb(inst->seqNum); continue; } @@ -1399,8 +1398,8 @@ DefaultIEW<Impl>::writebackInsts() DynInstPtr inst = toCommit->insts[inst_num]; int tid = inst->threadNumber; - DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n", - inst->readPC()); + DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); iewInstsToCommit[tid]++; @@ -1425,6 +1424,8 @@ DefaultIEW<Impl>::writebackInsts() } writebackCount[tid]++; } + + decrWb(inst->seqNum); } } @@ -1561,7 +1562,7 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) - exeSwp[thread_number]++; + iewExecutedSwp[thread_number]++; else iewExecutedInsts++; #else @@ -1572,13 +1573,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // Control operations // if (inst->isControl()) - exeBranches[thread_number]++; + iewExecutedBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { - exeRefs[thread_number]++; + iewExecutedRefs[thread_number]++; if (inst->isLoad()) { iewExecLoadInsts[thread_number]++; diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh index 4802cbaf4..80cd71f0d 100644 --- a/cpu/o3/inst_queue.hh +++ b/cpu/o3/inst_queue.hh @@ -490,8 +490,6 @@ class InstructionQueue /** Number of instructions issued per cycle. */ Stats::Formula issueRate; -// Stats::Formula issue_stores; -// Stats::Formula issue_op_rate; /** Number of times the FU was busy. */ Stats::Vector<> fuBusy; /** Number of times the FU was busy per instruction issued. */ diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh index d677a259c..72cb0d708 100644 --- a/cpu/o3/inst_queue_impl.hh +++ b/cpu/o3/inst_queue_impl.hh @@ -288,22 +288,7 @@ InstructionQueue<Impl>::regStats() .flags(total) ; issueRate = iqInstsIssued / cpu->numCycles; -/* - issue_stores - .name(name() + ".ISSUE:stores") - .desc("Number of stores issued") - .flags(total) - ; - issue_stores = exe_refs - exe_loads; -*/ -/* - issue_op_rate - .name(name() + ".ISSUE:op_rate") - .desc("Operation issue rate") - .flags(total) - ; - issue_op_rate = issued_ops / numCycles; -*/ + statFuBusy .init(Num_OpClasses) .name(name() + ".ISSUE:fu_full") @@ -700,6 +685,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() int total_issued = 0; while (total_issued < totalWidth && + iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -790,13 +776,14 @@ InstructionQueue<Impl>::scheduleReadyInsts() // complete. ++freeEntries; count[tid]--; - issuing_inst->removeInIQ(); + issuing_inst->clearInIQ(); } else { memDepUnit[tid].issue(issuing_inst); } listOrder.erase(order_it++); statIssuedInstType[tid][op_class]++; + iewStage->incrWb(issuing_inst->seqNum); } else { statFuBusy[op_class]++; fuBusy[tid]++; @@ -1096,7 +1083,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid) // inst will flow through the rest of the pipeline. squashed_inst->setIssued(); squashed_inst->setCanCommit(); - squashed_inst->removeInIQ(); + squashed_inst->clearInIQ(); //Update Thread IQ Count count[squashed_inst->threadNumber]--; diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh index b321d4590..c67225bc0 100644 --- a/cpu/o3/lsq.hh +++ b/cpu/o3/lsq.hh @@ -62,6 +62,9 @@ class LSQ { /** Returns the name of the LSQ. */ std::string name() const; + /** Registers the statistics for each LSQ Unit. */ + void regStats(); + /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); /** Sets the CPU pointer. */ diff --git a/cpu/o3/lsq_impl.hh b/cpu/o3/lsq_impl.hh index a6ad27522..a8a55af1a 100644 --- a/cpu/o3/lsq_impl.hh +++ b/cpu/o3/lsq_impl.hh @@ -106,6 +106,16 @@ LSQ<Impl>::name() const template<class Impl> void +LSQ<Impl>::regStats() +{ + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].regStats(); + } +} + +template<class Impl> +void LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr) { activeThreads = at_ptr; diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh index a6afff743..fe174a97d 100644 --- a/cpu/o3/lsq_unit.hh +++ b/cpu/o3/lsq_unit.hh @@ -101,6 +101,9 @@ class LSQUnit { /** Returns the name of the LSQ unit. */ std::string name() const; + /** Registers statistics. */ + void regStats(); + /** Sets the CPU pointer. */ void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; } @@ -153,9 +156,6 @@ class LSQUnit { /** Writes back stores. */ void writebackStores(); - // @todo: Include stats in the LSQ unit. - //void regStats(); - /** Clears all the entries in the LQ. */ void clearLQ(); @@ -369,25 +369,34 @@ class LSQUnit { // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. -/* - // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; + /** Total number of loads forwaded from LSQ stores. */ + Stats::Scalar<> lsqForwLoads; + + /** Total number of loads ignored due to invalid addresses. */ + Stats::Scalar<> invAddrLoads; + + /** Total number of squashed loads. */ + Stats::Scalar<> lsqSquashedLoads; - // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; + /** Total number of responses from the memory system that are + * ignored due to the instruction already being squashed. */ + Stats::Scalar<> lsqIgnoredResponses; - // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; + /** Total number of squashed stores. */ + Stats::Scalar<> lsqSquashedStores; - // total non-speculative bogus addresses seen (debug var) - Counter sim_invalid_addrs; - Stats::Vector<> fu_busy; //cumulative fu busy + /** Total number of software prefetches ignored due to invalid addresses. */ + Stats::Scalar<> invAddrSwpfs; - // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; + /** Ready loads blocked due to partial store-forwarding. */ + Stats::Scalar<> lsqBlockedLoads; + + /** Number of loads that were rescheduled. */ + Stats::Scalar<> lsqRescheduledLoads; + + /** Number of times the LSQ is blocked due to the cache. */ + Stats::Scalar<> lsqCacheBlocked; - Stats::Scalar<> lsqInversion; -*/ public: /** Executes the load at the given index. */ template <class T> @@ -441,8 +450,9 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). if (req->flags & UNCACHEABLE && - (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) { + (load_idx != loadHead || !loadQueue[load_idx]->isAtCommit())) { iewStage->rescheduleMemInst(loadQueue[load_idx]); + ++lsqRescheduledLoads; return TheISA::genMachineCheckFault(); } @@ -552,6 +562,8 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(loadQueue[load_idx]); + iewStage->decrWb(loadQueue[load_idx]->seqNum); + ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not // complete. @@ -559,6 +571,7 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) "Store idx %i to load addr %#x\n", store_idx, req->vaddr); + ++lsqBlockedLoads; return NoFault; } } @@ -579,6 +592,10 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) // if we have a cache, do cache access too if (fault == NoFault && dcacheInterface) { if (dcacheInterface->isBlocked()) { + ++lsqCacheBlocked; + + iewStage->decrWb(inst->seqNum); + // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) return NoFault; diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh index 4ee8bb234..5cc3078f8 100644 --- a/cpu/o3/lsq_unit_impl.hh +++ b/cpu/o3/lsq_unit_impl.hh @@ -126,6 +126,47 @@ LSQUnit<Impl>::name() const template<class Impl> void +LSQUnit<Impl>::regStats() +{ + lsqForwLoads + .name(name() + ".forwLoads") + .desc("Number of loads that had data forwarded from stores"); + + invAddrLoads + .name(name() + ".invAddrLoads") + .desc("Number of loads ignored due to an invalid address"); + + lsqSquashedLoads + .name(name() + ".squashedLoads") + .desc("Number of loads squashed"); + + lsqIgnoredResponses + .name(name() + ".ignoredResponses") + .desc("Number of memory responses ignored because the instruction is squashed"); + + lsqSquashedStores + .name(name() + ".squashedStores") + .desc("Number of stores squashed"); + + invAddrSwpfs + .name(name() + ".invAddrSwpfs") + .desc("Number of software prefetches ignored due to an invalid address"); + + lsqBlockedLoads + .name(name() + ".blockedLoads") + .desc("Number of blocked loads due to partial load-store forwarding"); + + lsqRescheduledLoads + .name(name() + ".rescheduledLoads") + .desc("Number of loads that were rescheduled"); + + lsqCacheBlocked + .name(name() + ".cacheBlocked") + .desc("Number of times an access to memory failed due to the cache being blocked"); +} + +template<class Impl> +void LSQUnit<Impl>::clearLQ() { loadQueue.clear(); @@ -548,6 +589,7 @@ LSQUnit<Impl>::writebackStores() if (dcacheInterface && dcacheInterface->isBlocked()) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" " is blocked!\n"); + ++lsqCacheBlocked; break; } @@ -705,7 +747,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - loadQueue[load_idx]->squashed = true; + loadQueue[load_idx]->setSquashed(); loadQueue[load_idx] = NULL; --loads; @@ -748,7 +790,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst->setSquashed(); storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; @@ -765,6 +807,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) storeTail = store_idx; decrStIdx(store_idx); + ++lsqSquashedStores; } } diff --git a/cpu/o3/mem_dep_unit.cc b/cpu/o3/mem_dep_unit.cc index ccdd1a515..b0f91d44f 100644 --- a/cpu/o3/mem_dep_unit.cc +++ b/cpu/o3/mem_dep_unit.cc @@ -35,6 +35,7 @@ // AlphaSimpleImpl. template class MemDepUnit<StoreSet, AlphaSimpleImpl>; +#ifdef DEBUG template <> int MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0; @@ -44,3 +45,4 @@ MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0; template <> int MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0; +#endif diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh index 595e9293f..bfe694bd8 100644 --- a/cpu/o3/mem_dep_unit_impl.hh +++ b/cpu/o3/mem_dep_unit_impl.hh @@ -59,7 +59,9 @@ MemDepUnit<MemDepPred, Impl>::~MemDepUnit() } } +#ifdef DEBUG assert(MemDepEntry::memdep_count == 0); +#endif } template <class MemDepPred, class Impl> @@ -141,7 +143,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst) // Add the MemDepEntry to the hash. memDepHash.insert( std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry)); +#ifdef DEBUG MemDepEntry::memdep_insert++; +#endif instList[tid].push_back(inst); @@ -227,7 +231,9 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst) // Insert the MemDepEntry into the hash. memDepHash.insert( std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry)); +#ifdef DEBUG MemDepEntry::memdep_insert++; +#endif // Add the instruction to the list. instList[tid].push_back(inst); @@ -275,7 +281,9 @@ MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst) // Add the MemDepEntry to the hash. memDepHash.insert( std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry)); +#ifdef DEBUG MemDepEntry::memdep_insert++; +#endif // Add the instruction to the instruction list. instList[tid].push_back(barr_inst); @@ -375,7 +383,9 @@ MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst) (*hash_it).second = NULL; memDepHash.erase(hash_it); +#ifdef DEBUG MemDepEntry::memdep_erase++; +#endif } template <class MemDepPred, class Impl> @@ -470,7 +480,9 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num, (*hash_it).second = NULL; memDepHash.erase(hash_it); +#ifdef DEBUG MemDepEntry::memdep_erase++; +#endif instList[tid].erase(squash_it--); } @@ -551,5 +563,7 @@ MemDepUnit<MemDepPred, Impl>::dumpLists() cprintf("Memory dependence hash size: %i\n", memDepHash.size()); +#ifdef DEBUG cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count); +#endif } diff --git a/cpu/o3/regfile.hh b/cpu/o3/regfile.hh index ed1238d36..76c43d3a1 100644 --- a/cpu/o3/regfile.hh +++ b/cpu/o3/regfile.hh @@ -223,10 +223,10 @@ class PhysRegFile public: /** (signed) integer register file. */ - std::vector<IntReg> intRegFile; + IntReg *intRegFile; /** Floating point register file. */ - std::vector<FloatReg> floatRegFile; + FloatReg *floatRegFile; /** Miscellaneous register file. */ MiscRegFile miscRegs[Impl::MaxThreads]; @@ -256,11 +256,15 @@ PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs, : numPhysicalIntRegs(_numPhysicalIntRegs), numPhysicalFloatRegs(_numPhysicalFloatRegs) { - intRegFile.resize(numPhysicalIntRegs); - floatRegFile.resize(numPhysicalFloatRegs); + intRegFile = new IntReg[numPhysicalIntRegs]; + floatRegFile = new FloatReg[numPhysicalFloatRegs]; - //memset(intRegFile, 0, sizeof(*intRegFile)); - //memset(floatRegFile, 0, sizeof(*floatRegFile)); + for (int i = 0; i < Impl::MaxThreads; ++i) { + miscRegs[i].clear(); + } + + memset(intRegFile, 0, sizeof(*intRegFile)); + memset(floatRegFile, 0, sizeof(*floatRegFile)); } #endif diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh index 829c99584..93f5b3504 100644 --- a/cpu/o3/rename_impl.hh +++ b/cpu/o3/rename_impl.hh @@ -348,7 +348,7 @@ DefaultRename<Impl>::squash(unsigned tid) for (int i=0; i<fromDecode->size; i++) { if (fromDecode->insts[i]->threadNumber == tid) { - fromDecode->insts[i]->squashed = true; + fromDecode->insts[i]->setSquashed(); wroteToTimeBuffer = true; squashCount++; } @@ -1029,7 +1029,7 @@ DefaultRename<Impl>::validInsts() unsigned inst_count = 0; for (int i=0; i<fromDecode->size; i++) { - if (!fromDecode->insts[i]->squashed) + if (!fromDecode->insts[i]->isSquashed()) inst_count++; } diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh index bdbdde32f..2043e0b34 100644 --- a/cpu/o3/rob.hh +++ b/cpu/o3/rob.hh @@ -305,7 +305,7 @@ class ROB private: /** The sequence number of the squashed instruction. */ - InstSeqNum squashedSeqNum; + InstSeqNum squashedSeqNum[Impl::MaxThreads]; /** Is the ROB done squashing. */ bool doneSquashing[Impl::MaxThreads]; diff --git a/cpu/o3/rob_impl.hh b/cpu/o3/rob_impl.hh index 25e0c80fd..62c4d9cf7 100644 --- a/cpu/o3/rob_impl.hh +++ b/cpu/o3/rob_impl.hh @@ -38,10 +38,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth, : numEntries(_numEntries), squashWidth(_squashWidth), numInstsInROB(0), - squashedSeqNum(0), numThreads(_numThreads) { for (int tid=0; tid < numThreads; tid++) { + squashedSeqNum[tid] = 0; doneSquashing[tid] = true; threadEntries[tid] = 0; } @@ -274,7 +274,7 @@ ROB<Impl>::retireHead(unsigned tid) --numInstsInROB; --threadEntries[tid]; - head_inst->removeInROB(); + head_inst->clearInROB(); head_inst->setCommitted(); instList[tid].erase(head_it); @@ -349,11 +349,11 @@ void ROB<Impl>::doSquash(unsigned tid) { DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n", - tid, squashedSeqNum); + tid, squashedSeqNum[tid]); assert(squashIt[tid] != instList[tid].end()); - if ((*squashIt[tid])->seqNum < squashedSeqNum) { + if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -368,7 +368,7 @@ ROB<Impl>::doSquash(unsigned tid) for (int numSquashed = 0; numSquashed < squashWidth && squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum; + (*squashIt[tid])->seqNum > squashedSeqNum[tid]; ++numSquashed) { DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n", @@ -405,7 +405,7 @@ ROB<Impl>::doSquash(unsigned tid) // Check if ROB is done squashing. - if ((*squashIt[tid])->seqNum <= squashedSeqNum) { + if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -517,7 +517,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid) doneSquashing[tid] = false; - squashedSeqNum = squash_num; + squashedSeqNum[tid] = squash_num; if (!instList[tid].empty()) { InstIt tail_thread = instList[tid].end(); |