/* * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim */ #include "config/use_checker.hh" #include "cpu/ozone/lw_back_end.hh" #include "cpu/op_class.hh" #if USE_CHECKER #include "cpu/checker/cpu.hh" #endif template void LWBackEnd::generateTrapEvent(Tick latency) { DPRINTF(BE, "Generating trap event\n"); TrapEvent *trap = new TrapEvent(this); trap->schedule(curTick + cpu->ticks(latency)); thread->trapPending = true; } template int LWBackEnd::wakeDependents(DynInstPtr &inst, bool memory_deps) { assert(!inst->isSquashed()); std::vector &dependents = memory_deps ? inst->getMemDeps() : inst->getDependents(); int num_outputs = dependents.size(); DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); for (int i = 0; i < num_outputs; i++) { DynInstPtr dep_inst = dependents[i]; if (!memory_deps) { dep_inst->markSrcRegReady(); } else { if (!dep_inst->isSquashed()) dep_inst->markMemInstReady(inst.get()); } DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); if (dep_inst->readyToIssue() && dep_inst->isInROB() && !dep_inst->isNonSpeculative() && !dep_inst->isStoreConditional() && dep_inst->memDepReady() && !dep_inst->isMemBarrier() && !dep_inst->isWriteBarrier()) { DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n", dep_inst->seqNum); exeList.push(dep_inst); if (dep_inst->iqItValid) { DPRINTF(BE, "Removing instruction from waiting list\n"); waitingList.erase(dep_inst->iqIt); waitingInsts--; dep_inst->iqItValid = false; assert(waitingInsts >= 0); } if (dep_inst->isMemRef()) { removeWaitingMemOp(dep_inst); DPRINTF(BE, "Issued a waiting mem op [sn:%lli]\n", dep_inst->seqNum); } } } return num_outputs; } template void LWBackEnd::rescheduleMemInst(DynInstPtr &inst) { replayList.push_front(inst); } template LWBackEnd::TrapEvent::TrapEvent(LWBackEnd *_be) : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) { this->setFlags(Event::AutoDelete); } template void LWBackEnd::TrapEvent::process() { be->trapSquash = true; } template const char * LWBackEnd::TrapEvent::description() { return "Trap"; } template void LWBackEnd::replayMemInst(DynInstPtr &inst) { bool found_inst = false; while (!replayList.empty()) { exeList.push(replayList.front()); if (replayList.front() == inst) { found_inst = true; } replayList.pop_front(); } assert(found_inst); } template LWBackEnd::LWBackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0), trapSquash(false), tcSquash(false), latency(params->backEndLatency), width(params->backEndWidth), lsqLimits(params->lsqLimits), exactFullStall(true) { numROBEntries = params->numROBEntries; numInsts = 0; maxOutstandingMemOps = params->maxOutstandingMemOps; numWaitingMemOps = 0; waitingInsts = 0; switchedOut = false; switchPending = false; LSQ.setBE(this); // Setup IQ and LSQ with their parameters here. instsToDispatch = d2i.getWire(-1); instsToExecute = i2e.getWire(-1); dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; issueWidth = params->issueWidth ? params->issueWidth : width; wbWidth = params->wbWidth ? params->wbWidth : width; commitWidth = params->commitWidth ? params->commitWidth : width; LSQ.init(params, params->LQEntries, params->SQEntries, 0); dispatchStatus = Running; commitStatus = Running; } template std::string LWBackEnd::name() const { return cpu->name() + ".backend"; } template void LWBackEnd::regStats() { using namespace Stats; LSQ.regStats(); robCapEvents .init(cpu->number_of_threads) .name(name() + ".ROB:cap_events") .desc("number of cycles where ROB cap was active") .flags(total) ; robCapInstCount .init(cpu->number_of_threads) .name(name() + ".ROB:cap_inst") .desc("number of instructions held up by ROB cap") .flags(total) ; iqCapEvents .init(cpu->number_of_threads) .name(name() +".IQ:cap_events" ) .desc("number of cycles where IQ cap was active") .flags(total) ; iqCapInstCount .init(cpu->number_of_threads) .name(name() + ".IQ:cap_inst") .desc("number of instructions held up by IQ cap") .flags(total) ; exeInst .init(cpu->number_of_threads) .name(name() + ".ISSUE:count") .desc("number of insts issued") .flags(total) ; exeSwp .init(cpu->number_of_threads) .name(name() + ".ISSUE:swp") .desc("number of swp insts issued") .flags(total) ; exeNop .init(cpu->number_of_threads) .name(name() + ".ISSUE:nop") .desc("number of nop insts issued") .flags(total) ; exeRefs .init(cpu->number_of_threads) .name(name() + ".ISSUE:refs") .desc("number of memory reference insts issued") .flags(total) ; exeLoads .init(cpu->number_of_threads) .name(name() + ".ISSUE:loads") .desc("number of load insts issued") .flags(total) ; exeBranches .init(cpu->number_of_threads) .name(name() + ".ISSUE:branches") .desc("Number of branches issued") .flags(total) ; issuedOps .init(cpu->number_of_threads) .name(name() + ".ISSUE:op_count") .desc("number of insts issued") .flags(total) ; /* for (int i=0; inumber_of_threads) .name(name() + ".LSQ:forw_loads") .desc("number of loads forwarded via LSQ") .flags(total) ; invAddrLoads .init(cpu->number_of_threads) .name(name() + ".ISSUE:addr_loads") .desc("number of invalid-address loads") .flags(total) ; invAddrSwpfs .init(cpu->number_of_threads) .name(name() + ".ISSUE:addr_swpfs") .desc("number of invalid-address SW prefetches") .flags(total) ; lsqBlockedLoads .init(cpu->number_of_threads) .name(name() + ".LSQ:blocked_loads") .desc("number of ready loads not issued due to memory disambiguation") .flags(total) ; lsqInversion .name(name() + ".ISSUE:lsq_invert") .desc("Number of times LSQ instruction issued early") ; nIssuedDist .init(issueWidth + 1) .name(name() + ".ISSUE:issued_per_cycle") .desc("Number of insts issued each cycle") .flags(total | pdf | dist) ; /* issueDelayDist .init(Num_OpClasses,0,99,2) .name(name() + ".ISSUE:") .desc("cycles from operands ready to issue") .flags(pdf | cdf) ; queueResDist .init(Num_OpClasses, 0, 99, 2) .name(name() + ".IQ:residence:") .desc("cycles from dispatch to issue") .flags(total | pdf | cdf ) ; for (int i = 0; i < Num_OpClasses; ++i) { queueResDist.subname(i, opClassStrings[i]); } */ writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") .flags(total) ; producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") .flags(total) ; consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") .flags(total) ; wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") .flags(total) ; wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") .flags(total) ; wbPenalizedRate = wbPenalized / writebackCount; wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") .flags(total) ; wbFanout = producerInst / consumerInst; wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") .flags(total) ; wbRate = writebackCount / cpu->numCycles; statComInst .init(cpu->number_of_threads) .name(name() + ".COM:count") .desc("Number of instructions committed") .flags(total) ; statComSwp .init(cpu->number_of_threads) .name(name() + ".COM:swp_count") .desc("Number of s/w prefetches committed") .flags(total) ; statComRefs .init(cpu->number_of_threads) .name(name() + ".COM:refs") .desc("Number of memory references committed") .flags(total) ; statComLoads .init(cpu->number_of_threads) .name(name() + ".COM:loads") .desc("Number of loads committed") .flags(total) ; statComMembars .init(cpu->number_of_threads) .name(name() + ".COM:membars") .desc("Number of memory barriers committed") .flags(total) ; statComBranches .init(cpu->number_of_threads) .name(name() + ".COM:branches") .desc("Number of branches committed") .flags(total) ; nCommittedDist .init(0,commitWidth,1) .name(name() + ".COM:committed_per_cycle") .desc("Number of insts commited each cycle") .flags(pdf) ; // // Commit-Eligible instructions... // // -> The number of instructions eligible to commit in those // cycles where we reached our commit BW limit (less the number // actually committed) // // -> The average value is computed over ALL CYCLES... not just // the BW limited cycles // // -> The standard deviation is computed only over cycles where // we reached the BW limit // commitEligible .init(cpu->number_of_threads) .name(name() + ".COM:bw_limited") .desc("number of insts not committed due to BW limits") .flags(total) ; commitEligibleSamples .name(name() + ".COM:bw_lim_events") .desc("number cycles where commit BW limit reached") ; squashedInsts .init(cpu->number_of_threads) .name(name() + ".COM:squashed_insts") .desc("Number of instructions removed from inst list") ; ROBSquashedInsts .init(cpu->number_of_threads) .name(name() + ".COM:rob_squashed_insts") .desc("Number of instructions removed from inst list when they reached the head of the ROB") ; ROBFcount .name(name() + ".ROB:full_count") .desc("number of cycles where ROB was full") ; ROBCount .init(cpu->number_of_threads) .name(name() + ".ROB:occupancy") .desc(name() + ".ROB occupancy (cumulative)") .flags(total) ; ROBFullRate .name(name() + ".ROB:full_rate") .desc("ROB full per cycle") ; ROBFullRate = ROBFcount / cpu->numCycles; ROBOccRate .name(name() + ".ROB:occ_rate") .desc("ROB occupancy rate") .flags(total) ; ROBOccRate = ROBCount / cpu->numCycles; /* ROBOccDist .init(cpu->number_of_threads,0,numROBEntries,2) .name(name() + ".ROB:occ_dist") .desc("ROB Occupancy per cycle") .flags(total | cdf) ; */ } template void LWBackEnd::setCPU(OzoneCPU *cpu_ptr) { cpu = cpu_ptr; LSQ.setCPU(cpu_ptr); checker = cpu->checker; } template void LWBackEnd::setCommBuffer(TimeBuffer *_comm) { comm = _comm; toIEW = comm->getWire(0); fromCommit = comm->getWire(-1); } #if FULL_SYSTEM template void LWBackEnd::checkInterrupts() { if (cpu->checkInterrupts && cpu->check_interrupts(tc) && !trapSquash && !tcSquash) { frontEnd->interruptPending = true; if (robEmpty() && !LSQ.hasStoresToWB()) { // Will need to squash all instructions currently in flight and have // the interrupt handler restart at the last non-committed inst. // Most of that can be handled through the trap() function. The // processInterrupts() function really just checks for interrupts // and then calls trap() if there is an interrupt present. // Not sure which thread should be the one to interrupt. For now // always do thread 0. assert(!thread->inSyscall); thread->inSyscall = true; // CPU will handle implementation of the interrupt. cpu->processInterrupts(); // Now squash or record that I need to squash this cycle. commitStatus = TrapPending; // Exit state update mode to avoid accidental updating. thread->inSyscall = false; // Generate trap squash event. generateTrapEvent(); DPRINTF(BE, "Interrupt detected.\n"); } else { DPRINTF(BE, "Interrupt must wait for ROB to drain.\n"); } } } #endif template void LWBackEnd::handleFault(Fault &fault, Tick latency) { DPRINTF(BE, "Handling fault!\n"); assert(!thread->inSyscall); thread->inSyscall = true; // Consider holding onto the trap and waiting until the trap event // happens for this to be executed. fault->invoke(thread->getTC()); // Exit state update mode to avoid accidental updating. thread->inSyscall = false; commitStatus = TrapPending; // Generate trap squash event. generateTrapEvent(latency); } template void LWBackEnd::tick() { DPRINTF(BE, "Ticking back end\n"); // Read in any done instruction information and update the IQ or LSQ. updateStructures(); if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) { cpu->signalSwitched(); return; } readyInstsForCommit(); numInstsToWB.advance(); ROBCount[0]+= numInsts; wbCycle = 0; #if FULL_SYSTEM checkInterrupts(); #endif if (trapSquash) { assert(!tcSquash); squashFromTrap(); } else if (tcSquash) { squashFromTC(); } if (dispatchStatus != Blocked) { dispatchInsts(); } else { checkDispatchStatus(); } if (commitStatus != TrapPending) { executeInsts(); commitInsts(); } LSQ.writebackStores(); DPRINTF(BE, "Waiting insts: %i, mem ops: %i, ROB entries in use: %i, " "LSQ loads: %i, LSQ stores: %i\n", waitingInsts, numWaitingMemOps, numInsts, LSQ.numLoads(), LSQ.numStores()); #ifdef DEBUG assert(numInsts == instList.size()); assert(waitingInsts == waitingList.size()); assert(numWaitingMemOps == waitingMemOps.size()); assert(!switchedOut); #endif } template void LWBackEnd::updateStructures() { if (fromCommit->doneSeqNum) { LSQ.commitLoads(fromCommit->doneSeqNum); LSQ.commitStores(fromCommit->doneSeqNum); } if (fromCommit->nonSpecSeqNum) { if (fromCommit->uncached) { // LSQ.executeLoad(fromCommit->lqIdx); } else { // IQ.scheduleNonSpec( // fromCommit->nonSpecSeqNum); } } } template void LWBackEnd::addToLSQ(DynInstPtr &inst) { // Do anything LSQ specific here? LSQ.insert(inst); } template void LWBackEnd::dispatchInsts() { DPRINTF(BE, "Trying to dispatch instructions.\n"); while (numInsts < numROBEntries && numWaitingMemOps < maxOutstandingMemOps) { // Get instruction from front of time buffer if (lsqLimits && LSQ.isFull()) { break; } DynInstPtr inst = frontEnd->getInst(); if (!inst) { break; } else if (inst->isSquashed()) { continue; } ++numInsts; instList.push_front(inst); inst->setInROB(); DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n", inst->seqNum, inst->readPC()); for (int i = 0; i < inst->numDestRegs(); ++i) renameTable[inst->destRegIdx(i)] = inst; if (inst->isMemBarrier() || inst->isWriteBarrier()) { if (memBarrier) { DPRINTF(BE, "Instruction [sn:%lli] is waiting on " "barrier [sn:%lli].\n", inst->seqNum, memBarrier->seqNum); memBarrier->addMemDependent(inst); inst->addSrcMemInst(memBarrier); } memBarrier = inst; inst->setCanCommit(); } else if (inst->readyToIssue() && !inst->isNonSpeculative() && !inst->isStoreConditional()) { if (inst->isMemRef()) { LSQ.insert(inst); if (memBarrier) { DPRINTF(BE, "Instruction [sn:%lli] is waiting on " "barrier [sn:%lli].\n", inst->seqNum, memBarrier->seqNum); memBarrier->addMemDependent(inst); inst->addSrcMemInst(memBarrier); addWaitingMemOp(inst); waitingList.push_front(inst); inst->iqIt = waitingList.begin(); inst->iqItValid = true; waitingInsts++; } else { DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " "exeList.\n", inst->seqNum); exeList.push(inst); } } else if (inst->isNop()) { DPRINTF(BE, "Nop encountered [sn:%lli], skipping exeList.\n", inst->seqNum); inst->setIssued(); inst->setExecuted(); inst->setCanCommit(); numInstsToWB[0]++; } else { DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " "exeList.\n", inst->seqNum); exeList.push(inst); } } else { if (inst->isNonSpeculative() || inst->isStoreConditional()) { inst->setCanCommit(); DPRINTF(BE, "Adding non speculative instruction\n"); } if (inst->isMemRef()) { addWaitingMemOp(inst); LSQ.insert(inst); if (memBarrier) { memBarrier->addMemDependent(inst); inst->addSrcMemInst(memBarrier); DPRINTF(BE, "Instruction [sn:%lli] is waiting on " "barrier [sn:%lli].\n", inst->seqNum, memBarrier->seqNum); } } DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to " "waitingList.\n", inst->seqNum); waitingList.push_front(inst); inst->iqIt = waitingList.begin(); inst->iqItValid = true; waitingInsts++; } } // Check if IQ or LSQ is full. If so we'll need to break and stop // removing instructions. Also update the number of insts to remove // from the queue. Check here if we don't care about exact stall // conditions. /* bool stall = false; if (IQ.isFull()) { DPRINTF(BE, "IQ is full!\n"); stall = true; } else if (LSQ.isFull()) { DPRINTF(BE, "LSQ is full!\n"); stall = true; } else if (isFull()) { DPRINTF(BE, "ROB is full!\n"); stall = true; ROB_fcount++; } if (stall) { d2i.advance(); dispatchStall(); return; } */ } template void LWBackEnd::dispatchStall() { dispatchStatus = Blocked; if (!cpu->decoupledFrontEnd) { // Tell front end to stall here through a timebuffer, or just tell // it directly. } } template void LWBackEnd::checkDispatchStatus() { DPRINTF(BE, "Checking dispatch status\n"); assert(dispatchStatus == Blocked); if (!LSQ.isFull() && !isFull()) { DPRINTF(BE, "Dispatch no longer blocked\n"); dispatchStatus = Running; dispatchInsts(); } } template void LWBackEnd::executeInsts() { DPRINTF(BE, "Trying to execute instructions\n"); int num_executed = 0; while (!exeList.empty() && num_executed < issueWidth) { DynInstPtr inst = exeList.top(); DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n", inst->seqNum, inst->readPC()); // Check if the instruction is squashed; if so then skip it // and don't count it towards the FU usage. if (inst->isSquashed()) { DPRINTF(BE, "Execute: Instruction was squashed.\n"); // Not sure how to handle this plus the method of sending # of // instructions to use. Probably will just have to count it // towards the bandwidth usage, but not the FU usage. ++num_executed; // Consider this instruction executed so that commit can go // ahead and retire the instruction. inst->setExecuted(); // Not sure if I should set this here or just let commit try to // commit any squashed instructions. I like the latter a bit more. inst->setCanCommit(); // ++iewExecSquashedInsts; exeList.pop(); continue; } Fault fault = NoFault; // Execute instruction. // Note that if the instruction faults, it will be handled // at the commit stage. if (inst->isMemRef() && (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { DPRINTF(BE, "Execute: Initiating access for memory " "reference.\n"); if (inst->isLoad()) { LSQ.executeLoad(inst); } else if (inst->isStore()) { Fault fault = LSQ.executeStore(inst); if (!inst->isStoreConditional() && fault == NoFault) { inst->setExecuted(); instToCommit(inst); } else if (fault != NoFault) { // If the instruction faulted, then we need to send it along to commit // without the instruction completing. // Send this instruction to commit, also make sure iew stage // realizes there is activity. inst->setExecuted(); instToCommit(inst); } } else { panic("Unknown mem type!"); } } else { inst->execute(); inst->setExecuted(); instToCommit(inst); } updateExeInstStats(inst); ++funcExeInst; ++num_executed; exeList.pop(); if (inst->mispredicted()) { squashDueToBranch(inst); break; } else if (LSQ.violation()) { // Get the DynInst that caused the violation. Note that this // clears the violation signal. DynInstPtr violator; violator = LSQ.getMemDepViolator(); DPRINTF(BE, "LDSTQ detected a violation. Violator PC: " "%#x, inst PC: %#x. Addr is: %#x.\n", violator->readPC(), inst->readPC(), inst->physEffAddr); // Squash. squashDueToMemViolation(inst); } } issuedOps[0]+= num_executed; nIssuedDist[num_executed]++; } template void LWBackEnd::instToCommit(DynInstPtr &inst) { DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", inst->seqNum, inst->readPC()); if (!inst->isSquashed()) { if (inst->isExecuted()) { inst->setResultReady(); int dependents = wakeDependents(inst); if (dependents) { producerInst[0]++; consumerInst[0]+= dependents; } } } writeback.push_back(inst); numInstsToWB[0]++; writebackCount[0]++; } template void LWBackEnd::readyInstsForCommit() { for (int i = numInstsToWB[-latency]; !writeback.empty() && i; --i) { DynInstPtr inst = writeback.front(); writeback.pop_front(); if (!inst->isSquashed()) { DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", inst->seqNum, inst->readPC()); inst->setCanCommit(); } } } #if 0 template void LWBackEnd::writebackInsts() { int wb_width = wbWidth; // Using this method I'm not quite sure how to prevent an // instruction from waking its own dependents multiple times, // without the guarantee that commit always has enough bandwidth // to accept all instructions being written back. This guarantee // might not be too unrealistic. InstListIt wb_inst_it = writeback.begin(); InstListIt wb_end_it = writeback.end(); int inst_num = 0; int consumer_insts = 0; for (; inst_num < wb_width && wb_inst_it != wb_end_it; inst_num++) { DynInstPtr inst = (*wb_inst_it); // Some instructions will be sent to commit without having // executed because they need commit to handle them. // E.g. Uncached loads have not actually executed when they // are first sent to commit. Instead commit must tell the LSQ // when it's ready to execute the uncached load. if (!inst->isSquashed()) { DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", inst->seqNum, inst->readPC()); inst->setCanCommit(); inst->setResultReady(); if (inst->isExecuted()) { int dependents = wakeDependents(inst); if (dependents) { producer_inst[0]++; consumer_insts+= dependents; } } } writeback.erase(wb_inst_it++); } LSQ.writebackStores(); consumer_inst[0]+= consumer_insts; writeback_count[0]+= inst_num; } #endif template bool LWBackEnd::commitInst(int inst_num) { // Read instruction from the head of the ROB DynInstPtr inst = instList.back(); // Make sure instruction is valid assert(inst); if (!inst->readyToCommit()) return false; DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n", inst->seqNum, inst->readPC()); thread->setPC(inst->readPC()); thread->setNextPC(inst->readNextPC()); inst->setAtCommit(); // If the instruction is not executed yet, then it is a non-speculative // or store inst. Signal backwards that it should be executed. if (!inst->isExecuted()) { if (inst->isNonSpeculative() || (inst->isStoreConditional() && inst->getFault() == NoFault) || inst->isMemBarrier() || inst->isWriteBarrier()) { #if !FULL_SYSTEM // Hack to make sure syscalls aren't executed until all stores // write back their data. This direct communication shouldn't // be used for anything other than this. if (inst_num > 0 || LSQ.hasStoresToWB()) #else if ((inst->isMemBarrier() || inst->isWriteBarrier() || inst->isQuiesce()) && LSQ.hasStoresToWB()) #endif { DPRINTF(BE, "Waiting for all stores to writeback.\n"); return false; } DPRINTF(BE, "Encountered a store or non-speculative " "instruction at the head of the ROB, PC %#x.\n", inst->readPC()); if (inst->isMemBarrier() || inst->isWriteBarrier()) { DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n", inst->seqNum); assert(memBarrier); wakeDependents(inst, true); if (memBarrier == inst) memBarrier = NULL; inst->clearMemDependents(); } // Send back the non-speculative instruction's sequence number. if (inst->iqItValid) { DPRINTF(BE, "Removing instruction from waiting list\n"); waitingList.erase(inst->iqIt); inst->iqItValid = false; waitingInsts--; assert(waitingInsts >= 0); if (inst->isStore()) removeWaitingMemOp(inst); } exeList.push(inst); // Change the instruction so it won't try to commit again until // it is executed. inst->clearCanCommit(); // ++commitNonSpecStalls; return false; } else if (inst->isLoad()) { DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n", inst->seqNum, inst->readPC()); // Send back the non-speculative instruction's sequence // number. Maybe just tell the lsq to re-execute the load. // Send back the non-speculative instruction's sequence number. if (inst->iqItValid) { DPRINTF(BE, "Removing instruction from waiting list\n"); waitingList.erase(inst->iqIt); inst->iqItValid = false; waitingInsts--; assert(waitingInsts >= 0); removeWaitingMemOp(inst); } replayMemInst(inst); inst->clearCanCommit(); return false; } else { panic("Trying to commit un-executed instruction " "of unknown type!\n"); } } // Not handled for now. assert(!inst->isThreadSync()); assert(inst->memDepReady()); // Stores will mark themselves as totally completed as they need // to wait to writeback to memory. @todo: Hack...attempt to fix // having the checker be forced to wait until a store completes in // order to check all of the instructions. If the store at the // head of the check list misses, but a later store hits, then // loads in the checker may see the younger store values instead // of the store they should see. Either the checker needs its own // memory (annoying to update), its own store buffer (how to tell // which value is correct?), or something else... if (!inst->isStore()) { inst->setCompleted(); } // Check if the instruction caused a fault. If so, trap. Fault inst_fault = inst->getFault(); // Use checker prior to updating anything due to traps or PC // based events. #if USE_CHECKER if (checker) { checker->verify(inst); } #endif if (inst_fault != NoFault) { DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", inst->seqNum, inst->readPC()); // Instruction is completed as it has a fault. inst->setCompleted(); if (LSQ.hasStoresToWB()) { DPRINTF(BE, "Stores still in flight, will wait until drained.\n"); return false; } else if (inst_num != 0) { DPRINTF(BE, "Will wait until instruction is head of commit group.\n"); return false; } #if USE_CHECKER else if (checker && inst->isStore()) { checker->verify(inst); } #endif thread->setInst( static_cast(inst->staticInst->machInst)); handleFault(inst_fault); return false; } int freed_regs = 0; for (int i = 0; i < inst->numDestRegs(); ++i) { DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n", (int)inst->destRegIdx(i), inst->seqNum); thread->renameTable[inst->destRegIdx(i)] = inst; ++freed_regs; } #if FULL_SYSTEM if (thread->profile) { // bool usermode = // (xc->readMiscRegNoEffect(AlphaISA::IPR_DTB_CM) & 0x18) != 0; // thread->profilePC = usermode ? 1 : inst->readPC(); thread->profilePC = inst->readPC(); ProfileNode *node = thread->profile->consume(thread->getTC(), inst->staticInst); if (node) thread->profileNode = node; } #endif if (inst->traceData) { inst->traceData->setFetchSeq(inst->seqNum); inst->traceData->setCPSeq(thread->numInst); inst->traceData->finalize(); inst->traceData = NULL; } if (inst->isCopy()) panic("Should not commit any copy instructions!"); inst->clearDependents(); frontEnd->addFreeRegs(freed_regs); instList.pop_back(); --numInsts; ++thread->funcExeInst; // Maybe move this to where the fault is handled; if the fault is // handled, don't try to set this myself as the fault will set it. // If not, then I set thread->PC = thread->nextPC and // thread->nextPC = thread->nextPC + 4. thread->setPC(thread->readNextPC()); thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst)); updateComInstStats(inst); // Write the done sequence number here. toIEW->doneSeqNum = inst->seqNum; lastCommitCycle = curTick; #if FULL_SYSTEM int count = 0; Addr oldpc; do { if (count == 0) assert(!thread->inSyscall && !thread->trapPending); oldpc = thread->readPC(); cpu->system->pcEventQueue.service( thread->getTC()); count++; } while (oldpc != thread->readPC()); if (count > 1) { DPRINTF(BE, "PC skip function event, stopping commit\n"); tcSquash = true; return false; } #endif return true; } template void LWBackEnd::commitInsts() { // Not sure this should be a loop or not. int inst_num = 0; while (!instList.empty() && inst_num < commitWidth) { if (instList.back()->isSquashed()) { instList.back()->clearDependents(); ROBSquashedInsts[instList.back()->threadNumber]++; instList.pop_back(); --numInsts; continue; } if (!commitInst(inst_num++)) { DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC " "%#x is head of ROB and not ready\n", instList.back()->seqNum, instList.back()->readPC()); --inst_num; break; } } nCommittedDist.sample(inst_num); } template void LWBackEnd::squash(const InstSeqNum &sn) { LSQ.squash(sn); int freed_regs = 0; InstListIt insts_end_it = waitingList.end(); InstListIt insts_it = waitingList.begin(); while (insts_it != insts_end_it && (*insts_it)->seqNum > sn) { if ((*insts_it)->isSquashed()) { ++insts_it; continue; } DPRINTF(BE, "Squashing instruction on waitingList PC %#x, [sn:%lli].\n", (*insts_it)->readPC(), (*insts_it)->seqNum); if ((*insts_it)->isMemRef()) { DPRINTF(BE, "Squashing a waiting mem op [sn:%lli]\n", (*insts_it)->seqNum); removeWaitingMemOp((*insts_it)); } waitingList.erase(insts_it++); waitingInsts--; } assert(waitingInsts >= 0); insts_it = instList.begin(); while (!instList.empty() && (*insts_it)->seqNum > sn) { if ((*insts_it)->isSquashed()) { panic("Instruction should not be already squashed and on list!"); ++insts_it; continue; } DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n", (*insts_it)->readPC(), (*insts_it)->seqNum); // Mark the instruction as squashed, and ready to commit so that // it can drain out of the pipeline. (*insts_it)->setSquashed(); (*insts_it)->setCanCommit(); (*insts_it)->clearInROB(); for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n", (int)(*insts_it)->destRegIdx(i), prev_dest->seqNum); renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; ++freed_regs; } (*insts_it)->clearDependents(); squashedInsts[(*insts_it)->threadNumber]++; instList.erase(insts_it++); --numInsts; } while (memBarrier && memBarrier->seqNum > sn) { DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously " "squashed)\n", memBarrier->seqNum); memBarrier->clearMemDependents(); if (memBarrier->memDepReady()) { DPRINTF(BE, "No previous barrier\n"); memBarrier = NULL; } else { std::list &srcs = memBarrier->getMemSrcs(); memBarrier = srcs.front(); srcs.pop_front(); assert(srcs.empty()); DPRINTF(BE, "Previous barrier: [sn:%lli]\n", memBarrier->seqNum); } } insts_it = replayList.begin(); insts_end_it = replayList.end(); while (!replayList.empty() && insts_it != insts_end_it) { if ((*insts_it)->seqNum < sn) { ++insts_it; continue; } assert((*insts_it)->isSquashed()); replayList.erase(insts_it++); } frontEnd->addFreeRegs(freed_regs); } template void LWBackEnd::squashFromTC() { InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1; squash(squashed_inst); frontEnd->squash(squashed_inst, thread->readPC(), false, false); frontEnd->interruptPending = false; thread->trapPending = false; thread->inSyscall = false; tcSquash = false; commitStatus = Running; } template void LWBackEnd::squashFromTrap() { InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1; squash(squashed_inst); frontEnd->squash(squashed_inst, thread->readPC(), false, false); frontEnd->interruptPending = false; thread->trapPending = false; thread->inSyscall = false; trapSquash = false; commitStatus = Running; } template void LWBackEnd::squashDueToBranch(DynInstPtr &inst) { // Update the branch predictor state I guess DPRINTF(BE, "Squashing due to branch [sn:%lli], will restart at PC %#x\n", inst->seqNum, inst->readNextPC()); squash(inst->seqNum); frontEnd->squash(inst->seqNum, inst->readNextPC(), true, inst->mispredicted()); } template void LWBackEnd::squashDueToMemViolation(DynInstPtr &inst) { // Update the branch predictor state I guess DPRINTF(BE, "Squashing due to violation [sn:%lli], will restart at PC %#x\n", inst->seqNum, inst->readNextPC()); squash(inst->seqNum); frontEnd->squash(inst->seqNum, inst->readNextPC(), false, inst->mispredicted()); } template void LWBackEnd::squashDueToMemBlocked(DynInstPtr &inst) { DPRINTF(IEW, "Memory blocked, squashing load and younger insts, " "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum); squash(inst->seqNum - 1); frontEnd->squash(inst->seqNum - 1, inst->readPC()); } template void LWBackEnd::switchOut() { switchPending = true; } template void LWBackEnd::doSwitchOut() { switchedOut = true; switchPending = false; // Need to get rid of all committed, non-speculative state and write it // to memory/TC. In this case this is stores that have committed and not // yet written back. assert(robEmpty()); assert(!LSQ.hasStoresToWB()); writeback.clear(); for (int i = 0; i < numInstsToWB.getSize() + 1; ++i) numInstsToWB.advance(); // squash(0); assert(waitingList.empty()); assert(instList.empty()); assert(replayList.empty()); assert(writeback.empty()); LSQ.switchOut(); } template void LWBackEnd::takeOverFrom(ThreadContext *old_tc) { assert(!squashPending); squashSeqNum = 0; squashNextPC = 0; tcSquash = false; trapSquash = false; numInsts = 0; numWaitingMemOps = 0; waitingMemOps.clear(); waitingInsts = 0; switchedOut = false; dispatchStatus = Running; commitStatus = Running; LSQ.takeOverFrom(old_tc); } template void LWBackEnd::updateExeInstStats(DynInstPtr &inst) { int thread_number = inst->threadNumber; // // Pick off the software prefetches // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) exeSwp[thread_number]++; else exeInst[thread_number]++; #else exeInst[thread_number]++; #endif // // Control operations // if (inst->isControl()) exeBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { exeRefs[thread_number]++; if (inst->isLoad()) exeLoads[thread_number]++; } } template void LWBackEnd::updateComInstStats(DynInstPtr &inst) { unsigned tid = inst->threadNumber; // keep an instruction count thread->numInst++; thread->numInsts++; cpu->numInst++; // // Pick off the software prefetches // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) { statComSwp[tid]++; } else { statComInst[tid]++; } #else statComInst[tid]++; #endif // // Control Instructions // if (inst->isControl()) statComBranches[tid]++; // // Memory references // if (inst->isMemRef()) { statComRefs[tid]++; if (inst->isLoad()) { statComLoads[tid]++; } } if (inst->isMemBarrier()) { statComMembars[tid]++; } } template void LWBackEnd::dumpInsts() { int num = 0; int valid_num = 0; InstListIt inst_list_it = --(instList.end()); cprintf("ExeList size: %i\n", exeList.size()); cprintf("Inst list size: %i\n", instList.size()); while (inst_list_it != instList.end()) { cprintf("Instruction:%i\n", num); if (!(*inst_list_it)->isSquashed()) { if (!(*inst_list_it)->isIssued()) { ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { // Loads that have not been marked as executed still count // towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } } cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" "Issued:%i\nSquashed:%i\n", (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, (*inst_list_it)->threadNumber, (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); } cprintf("\n"); inst_list_it--; ++num; } inst_list_it = --(writeback.end()); cprintf("Writeback list size: %i\n", writeback.size()); while (inst_list_it != writeback.end()) { cprintf("Instruction:%i\n", num); if (!(*inst_list_it)->isSquashed()) { if (!(*inst_list_it)->isIssued()) { ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { // Loads that have not been marked as executed still count // towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } } cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" "Issued:%i\nSquashed:%i\n", (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, (*inst_list_it)->threadNumber, (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); } cprintf("\n"); inst_list_it--; ++num; } cprintf("Waiting list size: %i\n", waitingList.size()); inst_list_it = --(waitingList.end()); while (inst_list_it != waitingList.end()) { cprintf("Instruction:%i\n", num); if (!(*inst_list_it)->isSquashed()) { if (!(*inst_list_it)->isIssued()) { ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { // Loads that have not been marked as executed still count // towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } } cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" "Issued:%i\nSquashed:%i\n", (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, (*inst_list_it)->threadNumber, (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); } cprintf("\n"); inst_list_it--; ++num; } cprintf("waitingMemOps list size: %i\n", waitingMemOps.size()); MemIt waiting_it = waitingMemOps.begin(); while (waiting_it != waitingMemOps.end()) { cprintf("[sn:%lli] ", (*waiting_it)); waiting_it++; ++num; } cprintf("\n"); }