diff options
-rw-r--r-- | cpu/o3/dep_graph.hh | 213 | ||||
-rw-r--r-- | cpu/o3/iew.hh | 59 | ||||
-rw-r--r-- | cpu/o3/iew_impl.hh | 152 | ||||
-rw-r--r-- | cpu/o3/inst_queue.cc | 4 | ||||
-rw-r--r-- | cpu/o3/inst_queue.hh | 114 | ||||
-rw-r--r-- | cpu/o3/inst_queue_impl.hh | 480 |
6 files changed, 469 insertions, 553 deletions
diff --git a/cpu/o3/dep_graph.hh b/cpu/o3/dep_graph.hh new file mode 100644 index 000000000..f8ae38da4 --- /dev/null +++ b/cpu/o3/dep_graph.hh @@ -0,0 +1,213 @@ + +#ifndef __CPU_O3_DEP_GRAPH_HH__ +#define __CPU_O3_DEP_GRAPH_HH__ + +#include "cpu/o3/comm.hh" + +template <class DynInstPtr> +class DependencyEntry +{ + public: + DependencyEntry() + : inst(NULL), next(NULL) + { } + + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry<DynInstPtr> *next; +}; + +template <class DynInstPtr> +class DependencyGraph +{ + public: + typedef DependencyEntry<DynInstPtr> DepEntry; + + DependencyGraph() + : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) + { } + + void resize(int num_entries); + + void reset(); + + void insert(PhysRegIndex idx, DynInstPtr &new_inst); + + void setInst(PhysRegIndex idx, DynInstPtr &new_inst) + { dependGraph[idx].inst = new_inst; } + + void clearInst(PhysRegIndex idx) + { dependGraph[idx].inst = NULL; } + + void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove); + + DynInstPtr pop(PhysRegIndex idx); + + bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; } + + /** Debugging function to dump out the dependency graph. + */ + void dump(); + + private: + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DepEntry *dependGraph; + + int numEntries; + + // Debug variable, remove when done testing. + unsigned memAllocCounter; + + public: + uint64_t nodesTraversed; + uint64_t nodesRemoved; +}; + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::resize(int num_entries) +{ + numEntries = num_entries; + dependGraph = new DepEntry[numEntries]; +} + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::reset() +{ + // Clear the dependency graph + DepEntry *curr; + DepEntry *prev; + + for (int i = 0; i < numEntries; ++i) { + curr = dependGraph[i].next; + + while (curr) { + memAllocCounter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + if (dependGraph[i].inst) { + dependGraph[i].inst = NULL; + } + + dependGraph[i].next = NULL; + } +} + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::insert(PhysRegIndex idx, DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DepEntry *new_entry = new DepEntry; + new_entry->next = dependGraph[idx].next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + dependGraph[idx].next = new_entry; + + ++memAllocCounter; +} + + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::remove(PhysRegIndex idx, + DynInstPtr &inst_to_remove) +{ + DepEntry *prev = &dependGraph[idx]; + DepEntry *curr = dependGraph[idx].next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + nodesRemoved++; + + // Find the instruction to remove within the dependency linked list. + while (curr->inst != inst_to_remove) { + prev = curr; + curr = curr->next; + nodesTraversed++; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --memAllocCounter; + + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + + delete curr; +} + +template <class DynInstPtr> +DynInstPtr +DependencyGraph<DynInstPtr>::pop(PhysRegIndex idx) +{ + DepEntry *node; + node = dependGraph[idx].next; + DynInstPtr inst = NULL; + if (node) { + inst = node->inst; + dependGraph[idx].next = node->next; + node->inst = NULL; + memAllocCounter--; + delete node; + } + return inst; +} + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::dump() +{ + DepEntry *curr; + + for (int i = 0; i < numEntries; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", + i, curr->inst->readPC(), curr->inst->seqNum); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x [sn:%lli] ", + curr->inst->readPC(), curr->inst->seqNum); + } + + cprintf("\n"); + } + cprintf("memAllocCounter: %i\n", memAllocCounter); +} + +#endif // __CPU_O3_DEP_GRAPH_HH__ diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh index 72be25668..935320628 100644 --- a/cpu/o3/iew.hh +++ b/cpu/o3/iew.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,20 +41,23 @@ class FUPool; /** - * DefaultIEW handles both single threaded and SMT IEW(issue/execute/writeback). - * It handles the dispatching of instructions to the LSQ/IQ as part of the issue - * stage, and has the IQ try to issue instructions each cycle. The execute - * latency is actually tied into the issue latency to allow the IQ to be able to + * DefaultIEW handles both single threaded and SMT IEW + * (issue/execute/writeback). It handles the dispatching of + * instructions to the LSQ/IQ as part of the issue stage, and has the + * IQ try to issue instructions each cycle. The execute latency is + * actually tied into the issue latency to allow the IQ to be able to * do back-to-back scheduling without having to speculatively schedule - * instructions. This happens by having the IQ have access to the functional - * units, and the IQ gets the execution latencies from the FUs when it issues - * instructions. Instructions reach the execute stage on the last cycle of - * their execution, which is when the IQ knows to wake up any dependent - * instructions, allowing back to back scheduling. The execute portion of IEW - * separates memory instructions from non-memory instructions, either telling - * the LSQ to execute the instruction, or executing the instruction directly. - * The writeback portion of IEW completes the instructions by waking up any - * dependents, and marking the register ready on the scoreboard. + * instructions. This happens by having the IQ have access to the + * functional units, and the IQ gets the execution latencies from the + * FUs when it issues instructions. Instructions reach the execute + * stage on the last cycle of their execution, which is when the IQ + * knows to wake up any dependent instructions, allowing back to back + * scheduling. The execute portion of IEW separates memory + * instructions from non-memory instructions, either telling the LSQ + * to execute the instruction, or executing the instruction directly. + * The writeback portion of IEW completes the instructions by waking + * up any dependents, and marking the register ready on the + * scoreboard. */ template<class Impl> class DefaultIEW @@ -214,10 +217,8 @@ class DefaultIEW /** Tells CPU that the IEW stage is inactive and idle. */ inline void deactivateStage(); -//#if !FULL_SYSTEM /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } -//#endif private: /** Sends commit proper information for a squash due to a branch @@ -469,10 +470,10 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; - Stats::Vector<> exe_swp; - Stats::Vector<> exe_nop; - Stats::Vector<> exe_refs; - Stats::Vector<> exe_branches; + Stats::Vector<> exeSwp; + Stats::Vector<> exeNop; + Stats::Vector<> exeRefs; + Stats::Vector<> exeBranches; // Stats::Vector<> issued_ops; /* @@ -481,20 +482,20 @@ class DefaultIEW Stats::Vector<> dist_unissued; Stats::Vector2d<> stat_issued_inst_type; */ - Stats::Formula issue_rate; + Stats::Formula issueRate; Stats::Formula iewExecStoreInsts; // Stats::Formula issue_op_rate; // Stats::Formula fu_busy_rate; Stats::Vector<> iewInstsToCommit; - Stats::Vector<> writeback_count; - Stats::Vector<> producer_inst; - Stats::Vector<> consumer_inst; - Stats::Vector<> wb_penalized; - - Stats::Formula wb_rate; - Stats::Formula wb_fanout; - Stats::Formula wb_penalized_rate; + Stats::Vector<> writebackCount; + Stats::Vector<> producerInst; + Stats::Vector<> consumerInst; + Stats::Vector<> wbPenalized; + + Stats::Formula wbRate; + Stats::Formula wbFanout; + Stats::Formula wbPenalizedRate; }; #endif // __CPU_O3_IEW_HH__ diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh index cbd7396f7..59f4055a6 100644 --- a/cpu/o3/iew_impl.hh +++ b/cpu/o3/iew_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -69,7 +69,7 @@ DefaultIEW<Impl>::LdWritebackEvent::process() if (!inst->isExecuted()) { inst->setExecuted(); - // Execute again to copy data to proper place. + // Complete access to copy data to proper place. if (inst->isStore()) { inst->completeAcc(); } @@ -78,7 +78,6 @@ DefaultIEW<Impl>::LdWritebackEvent::process() // Need to insert instruction into queue to commit iewStage->instToCommit(inst); - //wroteToTimeBuffer = true; iewStage->activityThisCycle(); inst = NULL; @@ -93,8 +92,7 @@ DefaultIEW<Impl>::LdWritebackEvent::description() template<class Impl> DefaultIEW<Impl>::DefaultIEW(Params *params) - : // Just make this time buffer really big for now - // @todo: Make this into a parameter. + : // @todo: Make this into a parameter. issueToExecQueue(5, 5), instQueue(params), ldstQueue(params), @@ -108,7 +106,6 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) numThreads(params->numberOfThreads), switchedOut(false) { - DPRINTF(IEW, "executeIntWidth: %i.\n", params->executeIntWidth); _status = Active; exeStatus = Running; wbStatus = Idle; @@ -130,7 +127,6 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) updateLSQNextCycle = false; - // @todo: Make into a parameter skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -149,8 +145,6 @@ DefaultIEW<Impl>::regStats() instQueue.regStats(); - //ldstQueue.regStats(); - iewIdleCycles .name(name() + ".iewIdleCycles") .desc("Number of cycles IEW is idle"); @@ -167,8 +161,6 @@ DefaultIEW<Impl>::regStats() .name(name() + ".iewUnblockCycles") .desc("Number of cycles IEW is unblocking"); -// iewWBInsts; - iewDispatchedInsts .name(name() + ".iewDispatchedInsts") .desc("Number of instructions dispatched to IQ"); @@ -206,11 +198,7 @@ DefaultIEW<Impl>::regStats() .name(name() + ".iewExecLoadInsts") .desc("Number of load instructions executed") .flags(total); -/* - iewExecStoreInsts - .name(name() + ".iewExecStoreInsts") - .desc("Number of store instructions executed"); -*/ + iewExecSquashedInsts .name(name() + ".iewExecSquashedInsts") .desc("Number of squashed instructions skipped in execute"); @@ -233,47 +221,47 @@ DefaultIEW<Impl>::regStats() branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; - exe_swp + exeSwp .init(cpu->number_of_threads) .name(name() + ".EXEC:swp") .desc("number of swp insts executed") .flags(total) ; - exe_nop + exeNop .init(cpu->number_of_threads) .name(name() + ".EXEC:nop") .desc("number of nop insts executed") .flags(total) ; - exe_refs + exeRefs .init(cpu->number_of_threads) .name(name() + ".EXEC:refs") .desc("number of memory reference insts executed") .flags(total) ; - exe_branches + exeBranches .init(cpu->number_of_threads) .name(name() + ".EXEC:branches") .desc("Number of branches executed") .flags(total) ; - issue_rate + issueRate .name(name() + ".EXEC:rate") .desc("Inst execution rate") .flags(total) ; - issue_rate = iewExecutedInsts / cpu->numCycles; + issueRate = iewExecutedInsts / cpu->numCycles; iewExecStoreInsts .name(name() + ".EXEC:stores") .desc("Number of stores executed") .flags(total) ; - iewExecStoreInsts = exe_refs - iewExecLoadInsts; + iewExecStoreInsts = exeRefs - iewExecLoadInsts; /* for (int i=0; i<Num_OpClasses; ++i) { stringstream subname; @@ -292,56 +280,56 @@ DefaultIEW<Impl>::regStats() .flags(total) ; - writeback_count + writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") .flags(total) ; - producer_inst + producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") .flags(total) ; - consumer_inst + consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") .flags(total) ; - wb_penalized + wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") .flags(total) ; - wb_penalized_rate + wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") .flags(total) ; - wb_penalized_rate = wb_penalized / writeback_count; + wbPenalizedRate = wbPenalized / writebackCount; - wb_fanout + wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") .flags(total) ; - wb_fanout = producer_inst / consumer_inst; + wbFanout = producerInst / consumerInst; - wb_rate + wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") .flags(total) ; - wb_rate = writeback_count / cpu->numCycles; + wbRate = writebackCount / cpu->numCycles; } template<class Impl> @@ -507,7 +495,7 @@ DefaultIEW<Impl>::squash(unsigned tid) instQueue.squash(tid); // Tell the LDSTQ to start squashing. - ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum,tid); + ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid); updatedQueues = true; @@ -543,18 +531,15 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid) DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, PC: %#x " "[sn:%i].\n", tid, inst->readPC(), inst->seqNum); - // Tell rename to squash through the time buffer. toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->mispredPC[tid] = inst->readPC(); toCommit->nextPC[tid] = inst->readNextPC(); toCommit->branchMispredict[tid] = true; - // Prediction was incorrect, so send back inverse. toCommit->branchTaken[tid] = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); toCommit->includeSquashInst[tid] = false; - //toCommit->iewSquashNum[tid] = inst->seqNum; wroteToTimeBuffer = true; } @@ -566,13 +551,11 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid) DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, " "PC: %#x [sn:%i].\n", tid, inst->readPC(), inst->seqNum); - // Tell rename to squash through the time buffer. toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->nextPC[tid] = inst->readNextPC(); toCommit->includeSquashInst[tid] = false; - //toCommit->iewSquashNum[tid] = inst->seqNum; wroteToTimeBuffer = true; } @@ -611,7 +594,6 @@ DefaultIEW<Impl>::block(unsigned tid) // reprocessed when this stage unblocks. skidInsert(tid); - // Set the status to Blocked. dispatchStatus[tid] = Blocked; } @@ -661,10 +643,7 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) // to. If there are free write ports at the time, then go ahead // and write the instruction to that time. If there are not, // keep looking back to see where's the first time there's a - // free slot. What happens if you run out of free spaces? - // For now naively assume that all instructions take one cycle. - // Otherwise would have to look into the time buffer based on the - // latency of the instruction. + // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; if (wbNumInst == issueWidth) { @@ -918,10 +897,10 @@ void DefaultIEW<Impl>::sortInsts() { int insts_from_rename = fromRename->size; - +#ifdef DEBUG for (int i = 0; i < numThreads; i++) assert(insts[i].empty()); - +#endif for (int i = 0; i < insts_from_rename; ++i) { insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]); } @@ -1047,9 +1026,6 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) // Be sure to mark these instructions as ready so that the // commit stage can go ahead and execute them, and mark // them as issued so the IQ doesn't reprocess them. - // ------------- - // @TODO: What happens if the ldstqueue is full? - // Do we process the other instructions? // Check for squashed instructions. if (inst->isSquashed()) { @@ -1125,6 +1101,9 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) ++iewDispStoreInsts; if (inst->isNonSpeculative()) { + // Non-speculative stores (namely store conditionals) + // need to be set as "canCommit()" so that commit can + // process them when they reach the head of commit. inst->setCanCommit(); instQueue.insertNonSpec(inst); add_to_iq = false; @@ -1137,6 +1116,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) toRename->iewInfo[tid].dispatchedToLSQ++; #if FULL_SYSTEM } else if (inst->isMemBarrier() || inst->isWriteBarrier()) { + // Same as non-speculative stores. inst->setCanCommit(); instQueue.insertBarrier(inst); add_to_iq = false; @@ -1145,7 +1125,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction " "encountered, skipping.\n", tid); - // Same hack as with stores. + // Same as non-speculative stores. inst->setCanCommit(); // Specifically insert it as nonspeculative. @@ -1162,9 +1142,9 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) inst->setExecuted(); inst->setCanCommit(); - instQueue.advanceTail(inst); + instQueue.recordProducer(inst); - exe_nop[tid]++; + exeNop[tid]++; add_to_iq = false; } else if (inst->isExecuted()) { @@ -1175,7 +1155,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) inst->setIssued(); inst->setCanCommit(); - instQueue.advanceTail(inst); + instQueue.recordProducer(inst); add_to_iq = false; } else { @@ -1237,7 +1217,6 @@ template <class Impl> void DefaultIEW<Impl>::executeInsts() { - //bool fetch_redirect[(*activeThreads).size()]; wbNumInst = 0; wbCycle = 0; @@ -1254,20 +1233,17 @@ DefaultIEW<Impl>::executeInsts() // Execute/writeback any instructions that are available. int inst_num = 0; - for ( ; inst_num < issueWidth && /* Haven't exceeded issue bandwidth */ - fromIssue->insts[inst_num]; - ++inst_num) { + for ( ; inst_num < issueWidth && fromIssue->insts[inst_num]; + ++inst_num) { DPRINTF(IEW, "Execute: Executing instructions from IQ.\n"); - // Get instruction from issue's queue. DynInstPtr inst = fromIssue->insts[inst_num]; DPRINTF(IEW, "Execute: Processing PC %#x, [tid:%i] [sn:%i].\n", inst->readPC(), inst->threadNumber,inst->seqNum); // Check if the instruction is squashed; if so then skip it - // and don't count it towards the FU usage. if (inst->isSquashed()) { DPRINTF(IEW, "Execute: Instruction was squashed.\n"); @@ -1299,22 +1275,19 @@ DefaultIEW<Impl>::executeInsts() // Loads will mark themselves as executed, and their writeback // event adds the instruction to the queue to commit fault = ldstQueue.executeLoad(inst); - -// ++iewExecLoadInsts; } else if (inst->isStore()) { ldstQueue.executeStore(inst); -// ++iewExecStoreInsts; - // If the store had a fault then it may not have a mem req if (inst->req && !(inst->req->flags & LOCKED)) { inst->setExecuted(); instToCommit(inst); } - // Store conditionals will mark themselves as executed, and - // their writeback event will add the instruction to the queue - // to commit. + + // Store conditionals will mark themselves as + // executed, and their writeback event will add the + // instruction to the queue to commit. } else { panic("Unexpected memory type!\n"); } @@ -1329,10 +1302,9 @@ DefaultIEW<Impl>::executeInsts() updateExeInstStats(inst); - // Check if branch was correct. This check happens after the - // instruction is added to the queue because even if the branch - // is mispredicted, the branch instruction itself is still valid. - // Only handle this if there hasn't already been something that + // Check if branch prediction was correct, if not then we need + // to tell commit to squash in flight instructions. Only + // handle this if there hasn't already been something that // redirects fetch in this group of instructions. // This probably needs to prioritize the redirects if a different @@ -1360,7 +1332,8 @@ DefaultIEW<Impl>::executeInsts() } else if (ldstQueue.violation(tid)) { fetchRedirect[tid] = true; - // Get the DynInst that caused the violation. Note that this + // If there was an ordering violation, then get the + // DynInst that caused the violation. Note that this // clears the violation signal. DynInstPtr violator; violator = ldstQueue.getMemDepViolator(tid); @@ -1409,13 +1382,11 @@ template <class Impl> void DefaultIEW<Impl>::writebackInsts() { - // Loop through the head of the time buffer and wake any dependents. - // These instructions are about to write back. In the simple model - // this loop can really happen within the previous loop, but when - // instructions have actual latencies, this loop must be separate. - // Also mark scoreboard that this instruction is finally complete. - // Either have IEW have direct access to rename map, or have this as - // part of backwards communication. + // Loop through the head of the time buffer and wake any + // dependents. These instructions are about to write back. Also + // mark scoreboard that this instruction is finally complete. + // Either have IEW have direct access to scoreboard, or have this + // as part of backwards communication. for (int inst_num = 0; inst_num < issueWidth && toCommit->insts[inst_num]; inst_num++) { DynInstPtr inst = toCommit->insts[inst_num]; @@ -1441,9 +1412,9 @@ DefaultIEW<Impl>::writebackInsts() scoreboard->setReg(inst->renamedDestRegIdx(i)); } - producer_inst[tid]++; - consumer_inst[tid]+= dependents; - writeback_count[tid]++; + producerInst[tid]++; + consumerInst[tid]+= dependents; + writebackCount[tid]++; } } } @@ -1452,8 +1423,6 @@ template<class Impl> void DefaultIEW<Impl>::tick() { - // Try to fill up issue queue with as many instructions as bandwidth - // allows. wbNumInst = 0; wbCycle = 0; @@ -1462,9 +1431,12 @@ DefaultIEW<Impl>::tick() sortInsts(); + // Free function units marked as being freed this cycle. + fuPool->processFreeUnits(); + list<unsigned>::iterator threads = (*activeThreads).begin(); - // Check stall and squash signals. + // Check stall and squash signals, dispatch any instructions. while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -1472,7 +1444,6 @@ DefaultIEW<Impl>::tick() checkSignalsAndUpdate(tid); dispatch(tid); - } if (exeStatus != Squashing) { @@ -1502,9 +1473,6 @@ DefaultIEW<Impl>::tick() // Writeback any stores using any leftover bandwidth. ldstQueue.writebackStores(); - // Free function units marked as being freed this cycle. - fuPool->processFreeUnits(); - // Check the committed load/store signals to see if there's a load // or store to commit. Also check if it's being told to execute a // nonspeculative instruction. @@ -1557,8 +1525,6 @@ DefaultIEW<Impl>::tick() DPRINTF(IEW, "[tid:%i], Dispatch dispatched %i instructions.\n", tid, toRename->iewInfo[tid].dispatched); - - //thread_queue.pop(); } DPRINTF(IEW, "IQ has %i free entries (Can schedule: %i). " @@ -1585,7 +1551,7 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) - exe_swp[thread_number]++; + exeSwp[thread_number]++; else iewExecutedInsts++; #else @@ -1596,13 +1562,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // Control operations // if (inst->isControl()) - exe_branches[thread_number]++; + exeBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { - exe_refs[thread_number]++; + exeRefs[thread_number]++; if (inst->isLoad()) { iewExecLoadInsts[thread_number]++; diff --git a/cpu/o3/inst_queue.cc b/cpu/o3/inst_queue.cc index 2ff2282b4..95ae2b699 100644 --- a/cpu/o3/inst_queue.cc +++ b/cpu/o3/inst_queue.cc @@ -32,7 +32,3 @@ // Force instantiation of InstructionQueue. template class InstructionQueue<AlphaSimpleImpl>; - -template<> -unsigned -InstructionQueue<AlphaSimpleImpl>::DependencyEntry::mem_alloc_counter = 0; diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh index 982294b4f..6bdf4ddc2 100644 --- a/cpu/o3/inst_queue.hh +++ b/cpu/o3/inst_queue.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/inst_seq.hh" +#include "cpu/o3/dep_graph.hh" #include "encumbered/cpu/full/op_class.hh" #include "sim/host.hh" @@ -91,6 +92,8 @@ class InstructionQueue /** Pointer back to the instruction queue. */ InstructionQueue<Impl> *iqPtr; + bool freeFU; + public: /** Construct a FU completion event. */ FUCompletion(DynInstPtr &_inst, int fu_idx, @@ -98,6 +101,7 @@ class InstructionQueue virtual void process(); virtual const char *description(); + void setFreeFU() { freeFU = true; } }; /** Constructs an IQ. */ @@ -114,8 +118,6 @@ class InstructionQueue void resetState(); - void resetDependencyGraph(); - /** Sets CPU pointer. */ void setCPU(FullCPU *_cpu) { cpu = _cpu; } @@ -170,11 +172,11 @@ class InstructionQueue void insertBarrier(DynInstPtr &barr_inst); /** - * Advances the tail of the IQ, used if an instruction is not added to the - * IQ for scheduling. - * @todo: Rename this function. + * Records the instruction as the producer of a register without + * adding it to the rest of the IQ. */ - void advanceTail(DynInstPtr &inst); + void recordProducer(DynInstPtr &inst) + { addToProducers(inst); } /** Process FU completion event. */ void processFUCompletion(DynInstPtr &inst, int fu_idx); @@ -224,9 +226,6 @@ class InstructionQueue /** Returns the number of used entries for a thread. */ unsigned getCount(unsigned tid) { return count[tid]; }; - /** Updates the number of free entries. */ - void updateFreeEntries(int num) { freeEntries += num; } - /** Debug function to print all instructions. */ void printInsts(); @@ -286,15 +285,6 @@ class InstructionQueue } }; - /** - * Struct for an IQ entry. It includes the instruction and an iterator - * to the instruction's spot in the IQ. - */ - struct IQEntry { - DynInstPtr inst; - ListIt iqIt; - }; - typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue; @@ -309,7 +299,6 @@ class InstructionQueue * inside of DynInst), when these instructions are woken up only * the sequence number will be available. Thus it is most efficient to be * able to search by the sequence number alone. - * @todo: Maybe change this to a priority queue per thread. */ std::map<InstSeqNum, DynInstPtr> nonSpecInsts; @@ -324,6 +313,9 @@ class InstructionQueue /** List that contains the age order of the oldest instruction of each * ready queue. Used to select the oldest instruction available * among op classes. + * @todo: Might be better to just move these entries around instead + * of creating new ones every time the position changes due to an + * instruction issuing. Not sure std::list supports this. */ std::list<ListOrderEntry> listOrder; @@ -346,6 +338,8 @@ class InstructionQueue */ void moveToYoungerInst(ListOrderIt age_order_it); + DependencyGraph<DynInstPtr> dependGraph; + ////////////////////////////////////// // Various parameters ////////////////////////////////////// @@ -397,57 +391,9 @@ class InstructionQueue bool switchedOut; - ////////////////////////////////// - // Variables needed for squashing - ////////////////////////////////// - /** The sequence number of the squashed instruction. */ InstSeqNum squashedSeqNum[Impl::MaxThreads]; - /** Iterator that points to the last instruction that has been squashed. - * This will not be valid unless the IQ is in the process of squashing. - */ - ListIt squashIt[Impl::MaxThreads]; - - /////////////////////////////////// - // Dependency graph stuff - /////////////////////////////////// - - class DependencyEntry - { - public: - DependencyEntry() - : inst(NULL), next(NULL) - { } - - DynInstPtr inst; - //Might want to include data about what arch. register the - //dependence is waiting on. - DependencyEntry *next; - - //This function, and perhaps this whole class, stand out a little - //bit as they don't fit a classification well. I want access - //to the underlying structure of the linked list, yet at - //the same time it feels like this should be something abstracted - //away. So for now it will sit here, within the IQ, until - //a better implementation is decided upon. - // This function probably shouldn't be within the entry... - void insert(DynInstPtr &new_inst); - - void remove(DynInstPtr &inst_to_remove); - - // Debug variable, remove when done testing. - static unsigned mem_alloc_counter; - }; - - /** Array of linked lists. Each linked list is a list of all the - * instructions that depend upon a given register. The actual - * register's index is used to index into the graph; ie all - * instructions in flight that are dependent upon r34 will be - * in the linked list of dependGraph[34]. - */ - DependencyEntry *dependGraph; - /** A cache of the recently woken registers. It is 1 if the register * has been woken up recently, and 0 if the register has been added * to the dependency graph and has not yet received its value. It @@ -456,11 +402,11 @@ class InstructionQueue */ std::vector<bool> regScoreboard; - /** Adds an instruction to the dependency graph, as a producer. */ + /** Adds an instruction to the dependency graph, as a consumer. */ bool addToDependents(DynInstPtr &new_inst); - /** Adds an instruction to the dependency graph, as a consumer. */ - void createDependency(DynInstPtr &new_inst); + /** Adds an instruction to the dependency graph, as a producer. */ + void addToProducers(DynInstPtr &new_inst); /** Moves an instruction to the ready queue if it is ready. */ void addIfReady(DynInstPtr &inst); @@ -471,10 +417,6 @@ class InstructionQueue */ int countInsts(); - /** Debugging function to dump out the dependency graph. - */ - void dumpDependGraph(); - /** Debugging function to dump all the list sizes, as well as print * out the list of nonspeculative instructions. Should not be used * in any other capacity, but it has no harmful sideaffects. @@ -490,20 +432,16 @@ class InstructionQueue Stats::Scalar<> iqInstsAdded; /** Stat for number of non-speculative instructions added. */ Stats::Scalar<> iqNonSpecInstsAdded; -// Stats::Scalar<> iqIntInstsAdded; + Stats::Scalar<> iqInstsIssued; /** Stat for number of integer instructions issued. */ Stats::Scalar<> iqIntInstsIssued; -// Stats::Scalar<> iqFloatInstsAdded; /** Stat for number of floating point instructions issued. */ Stats::Scalar<> iqFloatInstsIssued; -// Stats::Scalar<> iqBranchInstsAdded; /** Stat for number of branch instructions issued. */ Stats::Scalar<> iqBranchInstsIssued; -// Stats::Scalar<> iqMemInstsAdded; /** Stat for number of memory instructions issued. */ Stats::Scalar<> iqMemInstsIssued; -// Stats::Scalar<> iqMiscInstsAdded; /** Stat for number of miscellaneous instructions issued. */ Stats::Scalar<> iqMiscInstsIssued; /** Stat for number of squashed instructions that were ready to issue. */ @@ -518,20 +456,20 @@ class InstructionQueue */ Stats::Scalar<> iqSquashedNonSpecRemoved; - Stats::VectorDistribution<> queue_res_dist; - Stats::Distribution<> n_issued_dist; - Stats::VectorDistribution<> issue_delay_dist; + Stats::VectorDistribution<> queueResDist; + Stats::Distribution<> numIssuedDist; + Stats::VectorDistribution<> issueDelayDist; - Stats::Vector<> stat_fu_busy; + Stats::Vector<> statFuBusy; // Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; + Stats::Vector2d<> statIssuedInstType; - Stats::Formula issue_rate; + Stats::Formula issueRate; // Stats::Formula issue_stores; // Stats::Formula issue_op_rate; - Stats::Vector<> fu_busy; //cumulative fu busy + Stats::Vector<> fuBusy; //cumulative fu busy - Stats::Formula fu_busy_rate; + Stats::Formula fuBusyRate; }; #endif //__CPU_O3_INST_QUEUE_HH__ diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh index 0d9cc09f3..ed57ac257 100644 --- a/cpu/o3/inst_queue_impl.hh +++ b/cpu/o3/inst_queue_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,14 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -// Todo: -// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake -// it; either do in reverse order, or have added instructions put into a -// different ready queue that, in scheduleRreadyInsts(), gets put onto the -// normal ready queue. This would however give only a one cycle delay, -// but probably is more flexible to actually add in a delay parameter than -// just running it backwards. - #include <limits> #include <vector> @@ -49,7 +41,7 @@ InstructionQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst, int fu_idx, InstructionQueue<Impl> *iq_ptr) : Event(&mainEventQueue, Stat_Event_Pri), - inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr) + inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false) { this->setFlags(Event::AutoDelete); } @@ -58,7 +50,7 @@ template <class Impl> void InstructionQueue<Impl>::FUCompletion::process() { - iqPtr->processFUCompletion(inst, fuIdx); + iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1); inst = NULL; } @@ -93,14 +85,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) //Create an entry for each physical register within the //dependency graph. - dependGraph = new DependencyEntry[numPhysRegs]; - - // Initialize all the head pointers to point to NULL, and all the - // entries as unready. - for (int i = 0; i < numPhysRegs; ++i) { - dependGraph[i].next = NULL; - dependGraph[i].inst = NULL; - } + dependGraph.resize(numPhysRegs); // Resize the register scoreboard. regScoreboard.resize(numPhysRegs); @@ -165,10 +150,9 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) template <class Impl> InstructionQueue<Impl>::~InstructionQueue() { - resetDependencyGraph(); - assert(DependencyEntry::mem_alloc_counter == 0); - - delete [] dependGraph; + dependGraph.reset(); + cprintf("Nodes traversed: %i, removed: %i\n", + dependGraph.nodesTraversed, dependGraph.nodesRemoved); } template <class Impl> @@ -193,8 +177,6 @@ InstructionQueue<Impl>::regStats() .desc("Number of non-speculative instructions added to the IQ") .prereq(iqNonSpecInstsAdded); -// iqIntInstsAdded; - iqInstsIssued .name(name() + ".iqInstsIssued") .desc("Number of instructions issued") @@ -205,29 +187,21 @@ InstructionQueue<Impl>::regStats() .desc("Number of integer instructions issued") .prereq(iqIntInstsIssued); -// iqFloatInstsAdded; - iqFloatInstsIssued .name(name() + ".iqFloatInstsIssued") .desc("Number of float instructions issued") .prereq(iqFloatInstsIssued); -// iqBranchInstsAdded; - iqBranchInstsIssued .name(name() + ".iqBranchInstsIssued") .desc("Number of branch instructions issued") .prereq(iqBranchInstsIssued); -// iqMemInstsAdded; - iqMemInstsIssued .name(name() + ".iqMemInstsIssued") .desc("Number of memory instructions issued") .prereq(iqMemInstsIssued); -// iqMiscInstsAdded; - iqMiscInstsIssued .name(name() + ".iqMiscInstsIssued") .desc("Number of miscellaneous instructions issued") @@ -255,16 +229,16 @@ InstructionQueue<Impl>::regStats() .desc("Number of squashed non-spec instructions that were removed") .prereq(iqSquashedNonSpecRemoved); - queue_res_dist + queueResDist .init(Num_OpClasses, 0, 99, 2) .name(name() + ".IQ:residence:") .desc("cycles from dispatch to issue") .flags(total | pdf | cdf ) ; for (int i = 0; i < Num_OpClasses; ++i) { - queue_res_dist.subname(i, opClassStrings[i]); + queueResDist.subname(i, opClassStrings[i]); } - n_issued_dist + numIssuedDist .init(0,totalWidth,1) .name(name() + ".ISSUE:issued_per_cycle") .desc("Number of insts issued each cycle") @@ -281,19 +255,19 @@ InstructionQueue<Impl>::regStats() dist_unissued.subname(i, unissued_names[i]); } */ - stat_issued_inst_type + statIssuedInstType .init(numThreads,Num_OpClasses) .name(name() + ".ISSUE:FU_type") .desc("Type of FU issued") .flags(total | pdf | dist) ; - stat_issued_inst_type.ysubnames(opClassStrings); + statIssuedInstType.ysubnames(opClassStrings); // // How long did instructions for a particular FU type wait prior to issue // - issue_delay_dist + issueDelayDist .init(Num_OpClasses,0,99,2) .name(name() + ".ISSUE:") .desc("cycles from operands ready to issue") @@ -303,15 +277,15 @@ InstructionQueue<Impl>::regStats() for (int i=0; i<Num_OpClasses; ++i) { stringstream subname; subname << opClassStrings[i] << "_delay"; - issue_delay_dist.subname(i, subname.str()); + issueDelayDist.subname(i, subname.str()); } - issue_rate + issueRate .name(name() + ".ISSUE:rate") .desc("Inst issue rate") .flags(total) ; - issue_rate = iqInstsIssued / cpu->numCycles; + issueRate = iqInstsIssued / cpu->numCycles; /* issue_stores .name(name() + ".ISSUE:stores") @@ -328,29 +302,29 @@ InstructionQueue<Impl>::regStats() ; issue_op_rate = issued_ops / numCycles; */ - stat_fu_busy + statFuBusy .init(Num_OpClasses) .name(name() + ".ISSUE:fu_full") .desc("attempts to use FU when none available") .flags(pdf | dist) ; for (int i=0; i < Num_OpClasses; ++i) { - stat_fu_busy.subname(i, opClassStrings[i]); + statFuBusy.subname(i, opClassStrings[i]); } - fu_busy + fuBusy .init(numThreads) .name(name() + ".ISSUE:fu_busy_cnt") .desc("FU busy when requested") .flags(total) ; - fu_busy_rate + fuBusyRate .name(name() + ".ISSUE:fu_busy_rate") .desc("FU busy rate (busy events/executed inst)") .flags(total) ; - fu_busy_rate = fu_busy / iqInstsIssued; + fuBusyRate = fuBusy / iqInstsIssued; for ( int i=0; i < numThreads; i++) { // Tell mem dependence unit to reg stats as well. @@ -396,35 +370,6 @@ InstructionQueue<Impl>::resetState() template <class Impl> void -InstructionQueue<Impl>::resetDependencyGraph() -{ - // Clear the dependency graph - DependencyEntry *curr; - DependencyEntry *prev; - - for (int i = 0; i < numPhysRegs; ++i) { - curr = dependGraph[i].next; - - while (curr) { - DependencyEntry::mem_alloc_counter--; - - prev = curr; - curr = prev->next; - prev->inst = NULL; - - delete prev; - } - - if (dependGraph[i].inst) { - dependGraph[i].inst = NULL; - } - - dependGraph[i].next = NULL; - } -} - -template <class Impl> -void InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr) { DPRINTF(IQ, "Setting active threads list pointer.\n"); @@ -454,7 +399,7 @@ void InstructionQueue<Impl>::switchOut() { resetState(); - resetDependencyGraph(); + dependGraph.reset(); switchedOut = true; for (int i = 0; i < numThreads; ++i) { memDepUnit[i].switchOut(); @@ -562,20 +507,15 @@ InstructionQueue<Impl>::insert(DynInstPtr &new_inst) // Make sure the instruction is valid assert(new_inst); - DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", - new_inst->readPC()); + DPRINTF(IQ, "Adding instruction [sn:%lli] PC %#x to the IQ.\n", + new_inst->seqNum, new_inst->readPC()); - // Check if there are any free entries. Panic if there are none. - // Might want to have this return a fault in the future instead of - // panicing. assert(freeEntries != 0); instList[new_inst->threadNumber].push_back(new_inst); - // Decrease the number of free entries. --freeEntries; - //Mark Instruction as in IQ new_inst->setInIQ(); // Look through its source registers (physical regs), and mark any @@ -584,21 +524,16 @@ InstructionQueue<Impl>::insert(DynInstPtr &new_inst) // Have this instruction set itself as the producer of its destination // register(s). - createDependency(new_inst); + addToProducers(new_inst); - // If it's a memory instruction, add it to the memory dependency - // unit. if (new_inst->isMemRef()) { memDepUnit[new_inst->threadNumber].insert(new_inst); } else { - // If the instruction is ready then add it to the ready list. addIfReady(new_inst); } ++iqInstsAdded; - - //Update Thread IQ Count count[new_inst->threadNumber]++; assert(freeEntries == (numEntries - countInsts())); @@ -611,30 +546,25 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &new_inst) // @todo: Clean up this code; can do it by setting inst as unable // to issue, then calling normal insert on the inst. - // Make sure the instruction is valid assert(new_inst); nonSpecInsts[new_inst->seqNum] = new_inst; - DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", - new_inst->readPC()); + DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %#x " + "to the IQ.\n", + new_inst->seqNum, new_inst->readPC()); - // Check if there are any free entries. Panic if there are none. - // Might want to have this return a fault in the future instead of - // panicing. assert(freeEntries != 0); instList[new_inst->threadNumber].push_back(new_inst); - // Decrease the number of free entries. --freeEntries; - //Mark Instruction as in IQ new_inst->setInIQ(); // Have this instruction set itself as the producer of its destination // register(s). - createDependency(new_inst); + addToProducers(new_inst); // If it's a memory instruction, add it to the memory dependency // unit. @@ -644,7 +574,6 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &new_inst) ++iqNonSpecInstsAdded; - //Update Thread IQ Count count[new_inst->threadNumber]++; assert(freeEntries == (numEntries - countInsts())); @@ -661,15 +590,6 @@ InstructionQueue<Impl>::insertBarrier(DynInstPtr &barr_inst) template <class Impl> void -InstructionQueue<Impl>::advanceTail(DynInstPtr &inst) -{ - // Have this instruction set itself as the producer of its destination - // register(s). - createDependency(inst); -} - -template <class Impl> -void InstructionQueue<Impl>::addToOrderList(OpClass op_class) { assert(!readyInsts[op_class].empty()); @@ -733,8 +653,15 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx) iewStage->wakeCPU(); - fuPool->freeUnit(fu_idx); + if (fu_idx > -1) + fuPool->freeUnitNextCycle(fu_idx); + // @todo: Ensure that these FU Completions happen at the beginning + // of a cycle, otherwise they could add too many instructions to + // the queue. + // @todo: This could break if there's multiple multi-cycle ops + // finishing on this cycle. Maybe implement something like + // instToCommit in iew_impl.hh. int &size = issueToExecuteQueue->access(0)->size; issueToExecuteQueue->access(0)->insts[size++] = inst; @@ -752,20 +679,6 @@ InstructionQueue<Impl>::scheduleReadyInsts() IssueStruct *i2e_info = issueToExecuteQueue->access(0); - // Will need to reorder the list if either a queue is not on the list, - // or it has an older instruction than last time. - for (int i = 0; i < Num_OpClasses; ++i) { - if (!readyInsts[i].empty()) { - if (!queueOnList[i]) { - addToOrderList(OpClass(i)); - } else if (readyInsts[i].top()->seqNum < - (*readyIt[i]).oldestInst) { - listOrder.erase(readyIt[i]); - addToOrderList(OpClass(i)); - } - } - } - // Have iterator to head of the list // While I haven't exceeded bandwidth or reached the end of the list, // Try to get a FU that can do what this op needs. @@ -779,7 +692,8 @@ InstructionQueue<Impl>::scheduleReadyInsts() int total_issued = 0; int exec_queue_slot = i2e_info->size; - while (exec_queue_slot < totalWidth && order_it != order_end_it) { + while (exec_queue_slot < totalWidth && total_issued < totalWidth && + order_it != order_end_it) { OpClass op_class = (*order_it).queueType; assert(!readyInsts[op_class].empty()); @@ -805,70 +719,47 @@ InstructionQueue<Impl>::scheduleReadyInsts() continue; } - int idx = fuPool->getUnit(op_class); - + int idx = -2; + int op_latency = 1; int tid = issuing_inst->threadNumber; - if (idx == -2) { - assert(op_class == No_OpClass); - - i2e_info->insts[exec_queue_slot++] = issuing_inst; - i2e_info->size++; - - DPRINTF(IQ, "Thread %i: Issuing instruction PC that needs no FU" - " %#x [sn:%lli]\n", - tid, issuing_inst->readPC(), - issuing_inst->seqNum); - - readyInsts[op_class].pop(); - - if (!readyInsts[op_class].empty()) { - moveToYoungerInst(order_it); - } else { - readyIt[op_class] = listOrder.end(); - queueOnList[op_class] = false; - } - - issuing_inst->setIssued(); - ++total_issued; + if (op_class != No_OpClass) { + idx = fuPool->getUnit(op_class); - if (!issuing_inst->isMemRef()) { - // Memory instructions can not be freed from the IQ until they - // complete. - ++freeEntries; - count[tid]--; - issuing_inst->removeInIQ(); - } else { - memDepUnit[tid].issue(issuing_inst); + if (idx > -1) { + op_latency = fuPool->getOpLatency(op_class); } + } - listOrder.erase(order_it++); - - stat_issued_inst_type[tid][op_class]++; - } else if (idx != -1) { - int op_latency = fuPool->getOpLatency(op_class); - + if (idx == -2 || idx != -1) { if (op_latency == 1) { i2e_info->insts[exec_queue_slot++] = issuing_inst; i2e_info->size++; - // Add the FU onto the list of FU's to be freed next cycle. - fuPool->freeUnit(idx); + // Add the FU onto the list of FU's to be freed next + // cycle if we used one. + if (idx >= 0) + fuPool->freeUnitNextCycle(idx); } else { int issue_latency = fuPool->getIssueLatency(op_class); + // Generate completion event for the FU + FUCompletion *execution = new FUCompletion(issuing_inst, + idx, this); - if (issue_latency > 1) { - // Generate completion event for the FU - FUCompletion *execution = new FUCompletion(issuing_inst, - idx, this); + execution->schedule(curTick + cpu->cycles(issue_latency - 1)); - execution->schedule(curTick + cpu->cycles(issue_latency - 1)); + // @todo: Enforce that issue_latency == 1 or op_latency + if (issue_latency > 1) { + execution->setFreeFU(); } else { - i2e_info->insts[exec_queue_slot++] = issuing_inst; - i2e_info->size++; + // @todo: Not sure I'm accounting for the + // multi-cycle op in a pipelined FU properly, or + // the number of instructions issued in one cycle. +// i2e_info->insts[exec_queue_slot++] = issuing_inst; +// i2e_info->size++; // Add the FU onto the list of FU's to be freed next cycle. - fuPool->freeUnit(idx); + fuPool->freeUnitNextCycle(idx); } } @@ -900,15 +791,16 @@ InstructionQueue<Impl>::scheduleReadyInsts() } listOrder.erase(order_it++); - stat_issued_inst_type[tid][op_class]++; + statIssuedInstType[tid][op_class]++; } else { - stat_fu_busy[op_class]++; - fu_busy[tid]++; + statFuBusy[op_class]++; + fuBusy[tid]++; ++order_it; } } - n_issued_dist.sample(total_issued); + numIssuedDist.sample(total_issued); + iqInstsIssued+= total_issued; if (total_issued) { cpu->activityThisCycle(); @@ -930,10 +822,8 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst) unsigned tid = (*inst_it).second->threadNumber; - // Mark this instruction as ready to issue. (*inst_it).second->setCanIssue(); - // Now schedule the instruction. if (!(*inst_it).second->isMemRef()) { addIfReady((*inst_it).second); } else { @@ -949,7 +839,6 @@ template <class Impl> void InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid) { - /*Need to go through each thread??*/ DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n", tid,inst); @@ -973,18 +862,13 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst) DPRINTF(IQ, "Waking dependents of completed instruction.\n"); assert(!completed_inst->isSquashed()); - // Look at the physical destination register of the DynInst - // and look it up on the dependency graph. Then mark as ready - // any instructions within the instruction queue. - DependencyEntry *curr; - DependencyEntry *prev; // Tell the memory dependence unit to wake any dependents on this // instruction if it is a memory instruction. Also complete the memory - // instruction at this point since we know it executed fine. - // @todo: Might want to rename "completeMemInst" to - // something that indicates that it won't need to be replayed, and call - // this earlier. Might not be a big deal. + // instruction at this point since we know it executed without issues. + // @todo: Might want to rename "completeMemInst" to something that + // indicates that it won't need to be replayed, and call this + // earlier. Might not be a big deal. if (completed_inst->isMemRef()) { memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst); completeMemInst(completed_inst); @@ -1010,39 +894,31 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst) DPRINTF(IQ, "Waking any dependents on register %i.\n", (int) dest_reg); - //Maybe abstract this part into a function. - //Go through the dependency chain, marking the registers as ready - //within the waiting instructions. - - curr = dependGraph[dest_reg].next; + //Go through the dependency chain, marking the registers as + //ready within the waiting instructions. + DynInstPtr dep_inst = dependGraph.pop(dest_reg); - while (curr) { + while (dep_inst) { DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n", - curr->inst->readPC()); + dep_inst->readPC()); // Might want to give more information to the instruction - // so that it knows which of its source registers is ready. - // However that would mean that the dependency graph entries - // would need to hold the src_reg_idx. - curr->inst->markSrcRegReady(); + // so that it knows which of its source registers is + // ready. However that would mean that the dependency + // graph entries would need to hold the src_reg_idx. + dep_inst->markSrcRegReady(); - addIfReady(curr->inst); + addIfReady(dep_inst); - DependencyEntry::mem_alloc_counter--; - - prev = curr; - curr = prev->next; - prev->inst = NULL; + dep_inst = dependGraph.pop(dest_reg); ++dependents; - - delete prev; } - // Reset the head node now that all of its dependents have been woken - // up. - dependGraph[dest_reg].next = NULL; - dependGraph[dest_reg].inst = NULL; + // Reset the head node now that all of its dependents have + // been woken up. + assert(dependGraph.empty(dest_reg)); + dependGraph.clearInst(dest_reg); // Mark the scoreboard as having that register ready. regScoreboard[dest_reg] = true; @@ -1058,6 +934,16 @@ InstructionQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst) readyInsts[op_class].push(ready_inst); + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + if (!queueOnList[op_class]) { + addToOrderList(op_class); + } else if (readyInsts[op_class].top()->seqNum < + (*readyIt[op_class]).oldestInst) { + listOrder.erase(readyIt[op_class]); + addToOrderList(op_class); + } + DPRINTF(IQ, "Instruction is ready to issue, putting it onto " "the ready list, PC %#x opclass:%i [sn:%lli].\n", ready_inst->readPC(), op_class, ready_inst->seqNum); @@ -1114,10 +1000,6 @@ InstructionQueue<Impl>::squash(unsigned tid) // time buffer. squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; - // Setup the squash iterator to point to the tail. - squashIt[tid] = instList[tid].end(); - --squashIt[tid]; - // Call doSquash if there are insts in the IQ if (count[tid] > 0) { doSquash(tid); @@ -1131,24 +1013,25 @@ template <class Impl> void InstructionQueue<Impl>::doSquash(unsigned tid) { - // Make sure the squashed sequence number is valid. -// assert(squashedSeqNum[tid] != 0); + // Start at the tail. + ListIt squash_it = instList[tid].end(); + --squash_it; DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n", tid, squashedSeqNum[tid]); // Squash any instructions younger than the squashed sequence number // given. - while (squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum[tid]) { + while (squash_it != instList[tid].end() && + (*squash_it)->seqNum > squashedSeqNum[tid]) { - DynInstPtr squashed_inst = (*squashIt[tid]); + DynInstPtr squashed_inst = (*squash_it); // Only handle the instruction if it actually is in the IQ and // hasn't already been squashed in the IQ. if (squashed_inst->threadNumber != tid || squashed_inst->isSquashedInIQ()) { - --squashIt[tid]; + --squash_it; continue; } @@ -1168,27 +1051,23 @@ InstructionQueue<Impl>::doSquash(unsigned tid) PhysRegIndex src_reg = squashed_inst->renamedSrcRegIdx(src_reg_idx); - // Only remove it from the dependency graph if it was - // placed there in the first place. - // HACK: This assumes that instructions woken up from the - // dependency chain aren't informed that a specific src - // register has become ready. This may not always be true - // in the future. - // Instead of doing a linked list traversal, we can just - // remove these squashed instructions either at issue time, - // or when the register is overwritten. The only downside - // to this is it leaves more room for error. + // Only remove it from the dependency graph if it + // was placed there in the first place. + + // Instead of doing a linked list traversal, we + // can just remove these squashed instructions + // either at issue time, or when the register is + // overwritten. The only downside to this is it + // leaves more room for error. if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && src_reg < numPhysRegs) { - dependGraph[src_reg].remove(squashed_inst); + dependGraph.remove(src_reg, squashed_inst); } ++iqSquashedOperandsExamined; } - - // Might want to remove producers as well. } else { NonSpecMapIt ns_inst_it = nonSpecInsts.find(squashed_inst->seqNum); @@ -1217,75 +1096,17 @@ InstructionQueue<Impl>::doSquash(unsigned tid) ++freeEntries; - if (numThreads > 1) { - DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x " - "squashed.\n", - tid, squashed_inst->seqNum, squashed_inst->readPC()); - } else { - DPRINTF(IQ, "Instruction [sn:%lli] PC %#x squashed.\n", - squashed_inst->seqNum, squashed_inst->readPC()); - } + DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x " + "squashed.\n", + tid, squashed_inst->seqNum, squashed_inst->readPC()); } - instList[tid].erase(squashIt[tid]--); + instList[tid].erase(squash_it--); ++iqSquashedInstsExamined; } } template <class Impl> -void -InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst) -{ - //Add this new, dependent instruction at the head of the dependency - //chain. - - // First create the entry that will be added to the head of the - // dependency chain. - DependencyEntry *new_entry = new DependencyEntry; - new_entry->next = this->next; - new_entry->inst = new_inst; - - // Then actually add it to the chain. - this->next = new_entry; - - ++mem_alloc_counter; -} - -template <class Impl> -void -InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove) -{ - DependencyEntry *prev = this; - DependencyEntry *curr = this->next; - - // Make sure curr isn't NULL. Because this instruction is being - // removed from a dependency list, it must have been placed there at - // an earlier time. The dependency chain should not be empty, - // unless the instruction dependent upon it is already ready. - if (curr == NULL) { - return; - } - - // Find the instruction to remove within the dependency linked list. - while (curr->inst != inst_to_remove) { - prev = curr; - curr = curr->next; - - assert(curr != NULL); - } - - // Now remove this instruction from the list. - prev->next = curr->next; - - --mem_alloc_counter; - - // Could push this off to the destructor of DependencyEntry - curr->inst = NULL; - - delete curr; -} - -template <class Impl> bool InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) { @@ -1313,7 +1134,7 @@ InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) "is being added to the dependency chain.\n", new_inst->readPC(), src_reg); - dependGraph[src_reg].insert(new_inst); + dependGraph.insert(src_reg, new_inst); // Change the return value to indicate that something // was added to the dependency graph. @@ -1323,7 +1144,7 @@ InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) "became ready before it reached the IQ.\n", new_inst->readPC(), src_reg); // Mark a register ready within the instruction. - new_inst->markSrcRegReady(); + new_inst->markSrcRegReady(src_reg_idx); } } } @@ -1333,12 +1154,12 @@ InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) template <class Impl> void -InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst) +InstructionQueue<Impl>::addToProducers(DynInstPtr &new_inst) { - //Actually nothing really needs to be marked when an - //instruction becomes the producer of a register's value, - //but for convenience a ptr to the producing instruction will - //be placed in the head node of the dependency links. + // Nothing really needs to be marked when an instruction becomes + // the producer of a register's value, but for convenience a ptr + // to the producing instruction will be placed in the head node of + // the dependency links. int8_t total_dest_regs = new_inst->numDestRegs(); for (int dest_reg_idx = 0; @@ -1355,12 +1176,12 @@ InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst) continue; } - if (dependGraph[dest_reg].next) { - dumpDependGraph(); + if (!dependGraph.empty(dest_reg)) { + dependGraph.dump(); panic("Dependency graph %i not empty!", dest_reg); } - dependGraph[dest_reg].inst = new_inst; + dependGraph.setInst(dest_reg, new_inst); // Mark the scoreboard to say it's not yet ready. regScoreboard[dest_reg] = false; @@ -1371,7 +1192,7 @@ template <class Impl> void InstructionQueue<Impl>::addIfReady(DynInstPtr &inst) { - //If the instruction now has all of its source registers + // If the instruction now has all of its source registers // available, then add it to the list of ready instructions. if (inst->readyToIssue()) { @@ -1382,7 +1203,6 @@ InstructionQueue<Impl>::addIfReady(DynInstPtr &inst) // Message to the mem dependence unit that this instruction has // its registers ready. - memDepUnit[inst->threadNumber].regsReady(inst); return; @@ -1395,6 +1215,16 @@ InstructionQueue<Impl>::addIfReady(DynInstPtr &inst) inst->readPC(), op_class, inst->seqNum); readyInsts[op_class].push(inst); + + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + if (!queueOnList[op_class]) { + addToOrderList(op_class); + } else if (readyInsts[op_class].top()->seqNum < + (*readyIt[op_class]).oldestInst) { + listOrder.erase(readyIt[op_class]); + addToOrderList(op_class); + } } } @@ -1436,34 +1266,6 @@ InstructionQueue<Impl>::countInsts() template <class Impl> void -InstructionQueue<Impl>::dumpDependGraph() -{ - DependencyEntry *curr; - - for (int i = 0; i < numPhysRegs; ++i) - { - curr = &dependGraph[i]; - - if (curr->inst) { - cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", - i, curr->inst->readPC(), curr->inst->seqNum); - } else { - cprintf("dependGraph[%i]: No producer. consumer: ", i); - } - - while (curr->next != NULL) { - curr = curr->next; - - cprintf("%#x [sn:%lli] ", - curr->inst->readPC(), curr->inst->seqNum); - } - - cprintf("\n"); - } -} - -template <class Impl> -void InstructionQueue<Impl>::dumpLists() { for (int i = 0; i < Num_OpClasses; ++i) { @@ -1524,8 +1326,8 @@ InstructionQueue<Impl>::dumpInsts() cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. + // Loads that have not been marked as executed + // still count towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } |