diff options
author | Kevin Lim <ktlim@umich.edu> | 2006-05-19 15:44:03 -0400 |
---|---|---|
committer | Kevin Lim <ktlim@umich.edu> | 2006-05-19 15:44:03 -0400 |
commit | 5df3e61f168a5dd7d86ba2f81538539622d77bd2 (patch) | |
tree | 09303f47fecd95b0aa9cc91d379adf1e2192be1a | |
parent | c4a87f874a69535f70c0f6f2733ea716e32c70cf (diff) | |
download | gem5-5df3e61f168a5dd7d86ba2f81538539622d77bd2.tar.xz |
IEW/IQ code cleanup and reorganization.
Dependecy graph code moved into its own class.
This requires the changes to the functional units, which is in the next check in.
cpu/o3/iew.hh:
cpu/o3/iew_impl.hh:
IEW and IQ code cleanup and reorganization.
cpu/o3/inst_queue.cc:
Dependency graph code moved into its own class now.
cpu/o3/inst_queue.hh:
IEW/IQ code cleanup and reorganization.
Dependecy graph code moved into its own class.
cpu/o3/inst_queue_impl.hh:
IEW/IQ code cleanup and reorganization.
Dependecy graph code moved into its own class.
Issue loop cleaned up, with completion events for functional units now used more correctly (before they weren't used for multi-cycle ops with pipelined FU's).
--HG--
extra : convert_revision : 35e50192df6f71dc81d46a73fdd65f7ec07c10e4
-rw-r--r-- | cpu/o3/dep_graph.hh | 213 | ||||
-rw-r--r-- | cpu/o3/iew.hh | 59 | ||||
-rw-r--r-- | cpu/o3/iew_impl.hh | 152 | ||||
-rw-r--r-- | cpu/o3/inst_queue.cc | 4 | ||||
-rw-r--r-- | cpu/o3/inst_queue.hh | 114 | ||||
-rw-r--r-- | cpu/o3/inst_queue_impl.hh | 480 |
6 files changed, 469 insertions, 553 deletions
diff --git a/cpu/o3/dep_graph.hh b/cpu/o3/dep_graph.hh new file mode 100644 index 000000000..f8ae38da4 --- /dev/null +++ b/cpu/o3/dep_graph.hh @@ -0,0 +1,213 @@ + +#ifndef __CPU_O3_DEP_GRAPH_HH__ +#define __CPU_O3_DEP_GRAPH_HH__ + +#include "cpu/o3/comm.hh" + +template <class DynInstPtr> +class DependencyEntry +{ + public: + DependencyEntry() + : inst(NULL), next(NULL) + { } + + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry<DynInstPtr> *next; +}; + +template <class DynInstPtr> +class DependencyGraph +{ + public: + typedef DependencyEntry<DynInstPtr> DepEntry; + + DependencyGraph() + : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) + { } + + void resize(int num_entries); + + void reset(); + + void insert(PhysRegIndex idx, DynInstPtr &new_inst); + + void setInst(PhysRegIndex idx, DynInstPtr &new_inst) + { dependGraph[idx].inst = new_inst; } + + void clearInst(PhysRegIndex idx) + { dependGraph[idx].inst = NULL; } + + void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove); + + DynInstPtr pop(PhysRegIndex idx); + + bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; } + + /** Debugging function to dump out the dependency graph. + */ + void dump(); + + private: + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DepEntry *dependGraph; + + int numEntries; + + // Debug variable, remove when done testing. + unsigned memAllocCounter; + + public: + uint64_t nodesTraversed; + uint64_t nodesRemoved; +}; + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::resize(int num_entries) +{ + numEntries = num_entries; + dependGraph = new DepEntry[numEntries]; +} + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::reset() +{ + // Clear the dependency graph + DepEntry *curr; + DepEntry *prev; + + for (int i = 0; i < numEntries; ++i) { + curr = dependGraph[i].next; + + while (curr) { + memAllocCounter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + if (dependGraph[i].inst) { + dependGraph[i].inst = NULL; + } + + dependGraph[i].next = NULL; + } +} + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::insert(PhysRegIndex idx, DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DepEntry *new_entry = new DepEntry; + new_entry->next = dependGraph[idx].next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + dependGraph[idx].next = new_entry; + + ++memAllocCounter; +} + + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::remove(PhysRegIndex idx, + DynInstPtr &inst_to_remove) +{ + DepEntry *prev = &dependGraph[idx]; + DepEntry *curr = dependGraph[idx].next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + nodesRemoved++; + + // Find the instruction to remove within the dependency linked list. + while (curr->inst != inst_to_remove) { + prev = curr; + curr = curr->next; + nodesTraversed++; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --memAllocCounter; + + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + + delete curr; +} + +template <class DynInstPtr> +DynInstPtr +DependencyGraph<DynInstPtr>::pop(PhysRegIndex idx) +{ + DepEntry *node; + node = dependGraph[idx].next; + DynInstPtr inst = NULL; + if (node) { + inst = node->inst; + dependGraph[idx].next = node->next; + node->inst = NULL; + memAllocCounter--; + delete node; + } + return inst; +} + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::dump() +{ + DepEntry *curr; + + for (int i = 0; i < numEntries; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", + i, curr->inst->readPC(), curr->inst->seqNum); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x [sn:%lli] ", + curr->inst->readPC(), curr->inst->seqNum); + } + + cprintf("\n"); + } + cprintf("memAllocCounter: %i\n", memAllocCounter); +} + +#endif // __CPU_O3_DEP_GRAPH_HH__ diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh index 72be25668..935320628 100644 --- a/cpu/o3/iew.hh +++ b/cpu/o3/iew.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,20 +41,23 @@ class FUPool; /** - * DefaultIEW handles both single threaded and SMT IEW(issue/execute/writeback). - * It handles the dispatching of instructions to the LSQ/IQ as part of the issue - * stage, and has the IQ try to issue instructions each cycle. The execute - * latency is actually tied into the issue latency to allow the IQ to be able to + * DefaultIEW handles both single threaded and SMT IEW + * (issue/execute/writeback). It handles the dispatching of + * instructions to the LSQ/IQ as part of the issue stage, and has the + * IQ try to issue instructions each cycle. The execute latency is + * actually tied into the issue latency to allow the IQ to be able to * do back-to-back scheduling without having to speculatively schedule - * instructions. This happens by having the IQ have access to the functional - * units, and the IQ gets the execution latencies from the FUs when it issues - * instructions. Instructions reach the execute stage on the last cycle of - * their execution, which is when the IQ knows to wake up any dependent - * instructions, allowing back to back scheduling. The execute portion of IEW - * separates memory instructions from non-memory instructions, either telling - * the LSQ to execute the instruction, or executing the instruction directly. - * The writeback portion of IEW completes the instructions by waking up any - * dependents, and marking the register ready on the scoreboard. + * instructions. This happens by having the IQ have access to the + * functional units, and the IQ gets the execution latencies from the + * FUs when it issues instructions. Instructions reach the execute + * stage on the last cycle of their execution, which is when the IQ + * knows to wake up any dependent instructions, allowing back to back + * scheduling. The execute portion of IEW separates memory + * instructions from non-memory instructions, either telling the LSQ + * to execute the instruction, or executing the instruction directly. + * The writeback portion of IEW completes the instructions by waking + * up any dependents, and marking the register ready on the + * scoreboard. */ template<class Impl> class DefaultIEW @@ -214,10 +217,8 @@ class DefaultIEW /** Tells CPU that the IEW stage is inactive and idle. */ inline void deactivateStage(); -//#if !FULL_SYSTEM /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } -//#endif private: /** Sends commit proper information for a squash due to a branch @@ -469,10 +470,10 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; - Stats::Vector<> exe_swp; - Stats::Vector<> exe_nop; - Stats::Vector<> exe_refs; - Stats::Vector<> exe_branches; + Stats::Vector<> exeSwp; + Stats::Vector<> exeNop; + Stats::Vector<> exeRefs; + Stats::Vector<> exeBranches; // Stats::Vector<> issued_ops; /* @@ -481,20 +482,20 @@ class DefaultIEW Stats::Vector<> dist_unissued; Stats::Vector2d<> stat_issued_inst_type; */ - Stats::Formula issue_rate; + Stats::Formula issueRate; Stats::Formula iewExecStoreInsts; // Stats::Formula issue_op_rate; // Stats::Formula fu_busy_rate; Stats::Vector<> iewInstsToCommit; - Stats::Vector<> writeback_count; - Stats::Vector<> producer_inst; - Stats::Vector<> consumer_inst; - Stats::Vector<> wb_penalized; - - Stats::Formula wb_rate; - Stats::Formula wb_fanout; - Stats::Formula wb_penalized_rate; + Stats::Vector<> writebackCount; + Stats::Vector<> producerInst; + Stats::Vector<> consumerInst; + Stats::Vector<> wbPenalized; + + Stats::Formula wbRate; + Stats::Formula wbFanout; + Stats::Formula wbPenalizedRate; }; #endif // __CPU_O3_IEW_HH__ diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh index cbd7396f7..59f4055a6 100644 --- a/cpu/o3/iew_impl.hh +++ b/cpu/o3/iew_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -69,7 +69,7 @@ DefaultIEW<Impl>::LdWritebackEvent::process() if (!inst->isExecuted()) { inst->setExecuted(); - // Execute again to copy data to proper place. + // Complete access to copy data to proper place. if (inst->isStore()) { inst->completeAcc(); } @@ -78,7 +78,6 @@ DefaultIEW<Impl>::LdWritebackEvent::process() // Need to insert instruction into queue to commit iewStage->instToCommit(inst); - //wroteToTimeBuffer = true; iewStage->activityThisCycle(); inst = NULL; @@ -93,8 +92,7 @@ DefaultIEW<Impl>::LdWritebackEvent::description() template<class Impl> DefaultIEW<Impl>::DefaultIEW(Params *params) - : // Just make this time buffer really big for now - // @todo: Make this into a parameter. + : // @todo: Make this into a parameter. issueToExecQueue(5, 5), instQueue(params), ldstQueue(params), @@ -108,7 +106,6 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) numThreads(params->numberOfThreads), switchedOut(false) { - DPRINTF(IEW, "executeIntWidth: %i.\n", params->executeIntWidth); _status = Active; exeStatus = Running; wbStatus = Idle; @@ -130,7 +127,6 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) updateLSQNextCycle = false; - // @todo: Make into a parameter skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -149,8 +145,6 @@ DefaultIEW<Impl>::regStats() instQueue.regStats(); - //ldstQueue.regStats(); - iewIdleCycles .name(name() + ".iewIdleCycles") .desc("Number of cycles IEW is idle"); @@ -167,8 +161,6 @@ DefaultIEW<Impl>::regStats() .name(name() + ".iewUnblockCycles") .desc("Number of cycles IEW is unblocking"); -// iewWBInsts; - iewDispatchedInsts .name(name() + ".iewDispatchedInsts") .desc("Number of instructions dispatched to IQ"); @@ -206,11 +198,7 @@ DefaultIEW<Impl>::regStats() .name(name() + ".iewExecLoadInsts") .desc("Number of load instructions executed") .flags(total); -/* - iewExecStoreInsts - .name(name() + ".iewExecStoreInsts") - .desc("Number of store instructions executed"); -*/ + iewExecSquashedInsts .name(name() + ".iewExecSquashedInsts") .desc("Number of squashed instructions skipped in execute"); @@ -233,47 +221,47 @@ DefaultIEW<Impl>::regStats() branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; - exe_swp + exeSwp .init(cpu->number_of_threads) .name(name() + ".EXEC:swp") .desc("number of swp insts executed") .flags(total) ; - exe_nop + exeNop .init(cpu->number_of_threads) .name(name() + ".EXEC:nop") .desc("number of nop insts executed") .flags(total) ; - exe_refs + exeRefs .init(cpu->number_of_threads) .name(name() + ".EXEC:refs") .desc("number of memory reference insts executed") .flags(total) ; - exe_branches + exeBranches .init(cpu->number_of_threads) .name(name() + ".EXEC:branches") .desc("Number of branches executed") .flags(total) ; - issue_rate + issueRate .name(name() + ".EXEC:rate") .desc("Inst execution rate") .flags(total) ; - issue_rate = iewExecutedInsts / cpu->numCycles; + issueRate = iewExecutedInsts / cpu->numCycles; iewExecStoreInsts .name(name() + ".EXEC:stores") .desc("Number of stores executed") .flags(total) ; - iewExecStoreInsts = exe_refs - iewExecLoadInsts; + iewExecStoreInsts = exeRefs - iewExecLoadInsts; /* for (int i=0; i<Num_OpClasses; ++i) { stringstream subname; @@ -292,56 +280,56 @@ DefaultIEW<Impl>::regStats() .flags(total) ; - writeback_count + writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") .flags(total) ; - producer_inst + producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") .flags(total) ; - consumer_inst + consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") .flags(total) ; - wb_penalized + wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") .flags(total) ; - wb_penalized_rate + wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") .flags(total) ; - wb_penalized_rate = wb_penalized / writeback_count; + wbPenalizedRate = wbPenalized / writebackCount; - wb_fanout + wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") .flags(total) ; - wb_fanout = producer_inst / consumer_inst; + wbFanout = producerInst / consumerInst; - wb_rate + wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") .flags(total) ; - wb_rate = writeback_count / cpu->numCycles; + wbRate = writebackCount / cpu->numCycles; } template<class Impl> @@ -507,7 +495,7 @@ DefaultIEW<Impl>::squash(unsigned tid) instQueue.squash(tid); // Tell the LDSTQ to start squashing. - ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum,tid); + ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid); updatedQueues = true; @@ -543,18 +531,15 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid) DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, PC: %#x " "[sn:%i].\n", tid, inst->readPC(), inst->seqNum); - // Tell rename to squash through the time buffer. toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->mispredPC[tid] = inst->readPC(); toCommit->nextPC[tid] = inst->readNextPC(); toCommit->branchMispredict[tid] = true; - // Prediction was incorrect, so send back inverse. toCommit->branchTaken[tid] = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); toCommit->includeSquashInst[tid] = false; - //toCommit->iewSquashNum[tid] = inst->seqNum; wroteToTimeBuffer = true; } @@ -566,13 +551,11 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid) DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, " "PC: %#x [sn:%i].\n", tid, inst->readPC(), inst->seqNum); - // Tell rename to squash through the time buffer. toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->nextPC[tid] = inst->readNextPC(); toCommit->includeSquashInst[tid] = false; - //toCommit->iewSquashNum[tid] = inst->seqNum; wroteToTimeBuffer = true; } @@ -611,7 +594,6 @@ DefaultIEW<Impl>::block(unsigned tid) // reprocessed when this stage unblocks. skidInsert(tid); - // Set the status to Blocked. dispatchStatus[tid] = Blocked; } @@ -661,10 +643,7 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) // to. If there are free write ports at the time, then go ahead // and write the instruction to that time. If there are not, // keep looking back to see where's the first time there's a - // free slot. What happens if you run out of free spaces? - // For now naively assume that all instructions take one cycle. - // Otherwise would have to look into the time buffer based on the - // latency of the instruction. + // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; if (wbNumInst == issueWidth) { @@ -918,10 +897,10 @@ void DefaultIEW<Impl>::sortInsts() { int insts_from_rename = fromRename->size; - +#ifdef DEBUG for (int i = 0; i < numThreads; i++) assert(insts[i].empty()); - +#endif for (int i = 0; i < insts_from_rename; ++i) { insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]); } @@ -1047,9 +1026,6 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) // Be sure to mark these instructions as ready so that the // commit stage can go ahead and execute them, and mark // them as issued so the IQ doesn't reprocess them. - // ------------- - // @TODO: What happens if the ldstqueue is full? - // Do we process the other instructions? // Check for squashed instructions. if (inst->isSquashed()) { @@ -1125,6 +1101,9 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) ++iewDispStoreInsts; if (inst->isNonSpeculative()) { + // Non-speculative stores (namely store conditionals) + // need to be set as "canCommit()" so that commit can + // process them when they reach the head of commit. inst->setCanCommit(); instQueue.insertNonSpec(inst); add_to_iq = false; @@ -1137,6 +1116,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) toRename->iewInfo[tid].dispatchedToLSQ++; #if FULL_SYSTEM } else if (inst->isMemBarrier() || inst->isWriteBarrier()) { + // Same as non-speculative stores. inst->setCanCommit(); instQueue.insertBarrier(inst); add_to_iq = false; @@ -1145,7 +1125,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction " "encountered, skipping.\n", tid); - // Same hack as with stores. + // Same as non-speculative stores. inst->setCanCommit(); // Specifically insert it as nonspeculative. @@ -1162,9 +1142,9 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) inst->setExecuted(); inst->setCanCommit(); - instQueue.advanceTail(inst); + instQueue.recordProducer(inst); - exe_nop[tid]++; + exeNop[tid]++; add_to_iq = false; } else if (inst->isExecuted()) { @@ -1175,7 +1155,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) inst->setIssued(); inst->setCanCommit(); - instQueue.advanceTail(inst); + instQueue.recordProducer(inst); add_to_iq = false; } else { @@ -1237,7 +1217,6 @@ template <class Impl> void DefaultIEW<Impl>::executeInsts() { - //bool fetch_redirect[(*activeThreads).size()]; wbNumInst = 0; wbCycle = 0; @@ -1254,20 +1233,17 @@ DefaultIEW<Impl>::executeInsts() // Execute/writeback any instructions that are available. int inst_num = 0; - for ( ; inst_num < issueWidth && /* Haven't exceeded issue bandwidth */ - fromIssue->insts[inst_num]; - ++inst_num) { + for ( ; inst_num < issueWidth && fromIssue->insts[inst_num]; + ++inst_num) { DPRINTF(IEW, "Execute: Executing instructions from IQ.\n"); - // Get instruction from issue's queue. DynInstPtr inst = fromIssue->insts[inst_num]; DPRINTF(IEW, "Execute: Processing PC %#x, [tid:%i] [sn:%i].\n", inst->readPC(), inst->threadNumber,inst->seqNum); // Check if the instruction is squashed; if so then skip it - // and don't count it towards the FU usage. if (inst->isSquashed()) { DPRINTF(IEW, "Execute: Instruction was squashed.\n"); @@ -1299,22 +1275,19 @@ DefaultIEW<Impl>::executeInsts() // Loads will mark themselves as executed, and their writeback // event adds the instruction to the queue to commit fault = ldstQueue.executeLoad(inst); - -// ++iewExecLoadInsts; } else if (inst->isStore()) { ldstQueue.executeStore(inst); -// ++iewExecStoreInsts; - // If the store had a fault then it may not have a mem req if (inst->req && !(inst->req->flags & LOCKED)) { inst->setExecuted(); instToCommit(inst); } - // Store conditionals will mark themselves as executed, and - // their writeback event will add the instruction to the queue - // to commit. + + // Store conditionals will mark themselves as + // executed, and their writeback event will add the + // instruction to the queue to commit. } else { panic("Unexpected memory type!\n"); } @@ -1329,10 +1302,9 @@ DefaultIEW<Impl>::executeInsts() updateExeInstStats(inst); - // Check if branch was correct. This check happens after the - // instruction is added to the queue because even if the branch - // is mispredicted, the branch instruction itself is still valid. - // Only handle this if there hasn't already been something that + // Check if branch prediction was correct, if not then we need + // to tell commit to squash in flight instructions. Only + // handle this if there hasn't already been something that // redirects fetch in this group of instructions. // This probably needs to prioritize the redirects if a different @@ -1360,7 +1332,8 @@ DefaultIEW<Impl>::executeInsts() } else if (ldstQueue.violation(tid)) { fetchRedirect[tid] = true; - // Get the DynInst that caused the violation. Note that this + // If there was an ordering violation, then get the + // DynInst that caused the violation. Note that this // clears the violation signal. DynInstPtr violator; violator = ldstQueue.getMemDepViolator(tid); @@ -1409,13 +1382,11 @@ template <class Impl> void DefaultIEW<Impl>::writebackInsts() { - // Loop through the head of the time buffer and wake any dependents. - // These instructions are about to write back. In the simple model - // this loop can really happen within the previous loop, but when - // instructions have actual latencies, this loop must be separate. - // Also mark scoreboard that this instruction is finally complete. - // Either have IEW have direct access to rename map, or have this as - // part of backwards communication. + // Loop through the head of the time buffer and wake any + // dependents. These instructions are about to write back. Also + // mark scoreboard that this instruction is finally complete. + // Either have IEW have direct access to scoreboard, or have this + // as part of backwards communication. for (int inst_num = 0; inst_num < issueWidth && toCommit->insts[inst_num]; inst_num++) { DynInstPtr inst = toCommit->insts[inst_num]; @@ -1441,9 +1412,9 @@ DefaultIEW<Impl>::writebackInsts() scoreboard->setReg(inst->renamedDestRegIdx(i)); } - producer_inst[tid]++; - consumer_inst[tid]+= dependents; - writeback_count[tid]++; + producerInst[tid]++; + consumerInst[tid]+= dependents; + writebackCount[tid]++; } } } @@ -1452,8 +1423,6 @@ template<class Impl> void DefaultIEW<Impl>::tick() { - // Try to fill up issue queue with as many instructions as bandwidth - // allows. wbNumInst = 0; wbCycle = 0; @@ -1462,9 +1431,12 @@ DefaultIEW<Impl>::tick() sortInsts(); + // Free function units marked as being freed this cycle. + fuPool->processFreeUnits(); + list<unsigned>::iterator threads = (*activeThreads).begin(); - // Check stall and squash signals. + // Check stall and squash signals, dispatch any instructions. while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -1472,7 +1444,6 @@ DefaultIEW<Impl>::tick() checkSignalsAndUpdate(tid); dispatch(tid); - } if (exeStatus != Squashing) { @@ -1502,9 +1473,6 @@ DefaultIEW<Impl>::tick() // Writeback any stores using any leftover bandwidth. ldstQueue.writebackStores(); - // Free function units marked as being freed this cycle. - fuPool->processFreeUnits(); - // Check the committed load/store signals to see if there's a load // or store to commit. Also check if it's being told to execute a // nonspeculative instruction. @@ -1557,8 +1525,6 @@ DefaultIEW<Impl>::tick() DPRINTF(IEW, "[tid:%i], Dispatch dispatched %i instructions.\n", tid, toRename->iewInfo[tid].dispatched); - - //thread_queue.pop(); } DPRINTF(IEW, "IQ has %i free entries (Can schedule: %i). " @@ -1585,7 +1551,7 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) - exe_swp[thread_number]++; + exeSwp[thread_number]++; else iewExecutedInsts++; #else @@ -1596,13 +1562,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // Control operations // if (inst->isControl()) - exe_branches[thread_number]++; + exeBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { - exe_refs[thread_number]++; + exeRefs[thread_number]++; if (inst->isLoad()) { iewExecLoadInsts[thread_number]++; diff --git a/cpu/o3/inst_queue.cc b/cpu/o3/inst_queue.cc index 2ff2282b4..95ae2b699 100644 --- a/cpu/o3/inst_queue.cc +++ b/cpu/o3/inst_queue.cc @@ -32,7 +32,3 @@ // Force instantiation of InstructionQueue. template class InstructionQueue<AlphaSimpleImpl>; - -template<> -unsigned -InstructionQueue<AlphaSimpleImpl>::DependencyEntry::mem_alloc_counter = 0; diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh index 982294b4f..6bdf4ddc2 100644 --- a/cpu/o3/inst_queue.hh +++ b/cpu/o3/inst_queue.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/inst_seq.hh" +#include "cpu/o3/dep_graph.hh" #include "encumbered/cpu/full/op_class.hh" #include "sim/host.hh" @@ -91,6 +92,8 @@ class InstructionQueue /** Pointer back to the instruction queue. */ InstructionQueue<Impl> *iqPtr; + bool freeFU; + public: /** Construct a FU completion event. */ FUCompletion(DynInstPtr &_inst, int fu_idx, @@ -98,6 +101,7 @@ class InstructionQueue virtual void process(); virtual const char *description(); + void setFreeFU() { freeFU = true; } }; /** Constructs an IQ. */ @@ -114,8 +118,6 @@ class InstructionQueue void resetState(); - void resetDependencyGraph(); - /** Sets CPU pointer. */ void setCPU(FullCPU *_cpu) { cpu = _cpu; } @@ -170,11 +172,11 @@ class InstructionQueue void insertBarrier(DynInstPtr &barr_inst); /** - * Advances the tail of the IQ, used if an instruction is not added to the - * IQ for scheduling. - * @todo: Rename this function. + * Records the instruction as the producer of a register without + * adding it to the rest of the IQ. */ - void advanceTail(DynInstPtr &inst); + void recordProducer(DynInstPtr &inst) + { addToProducers(inst); } /** Process FU completion event. */ void processFUCompletion(DynInstPtr &inst, int fu_idx); @@ -224,9 +226,6 @@ class InstructionQueue /** Returns the number of used entries for a thread. */ unsigned getCount(unsigned tid) { return count[tid]; }; - /** Updates the number of free entries. */ - void updateFreeEntries(int num) { freeEntries += num; } - /** Debug function to print all instructions. */ void printInsts(); @@ -286,15 +285,6 @@ class InstructionQueue } }; - /** - * Struct for an IQ entry. It includes the instruction and an iterator - * to the instruction's spot in the IQ. - */ - struct IQEntry { - DynInstPtr inst; - ListIt iqIt; - }; - typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue; @@ -309,7 +299,6 @@ class InstructionQueue * inside of DynInst), when these instructions are woken up only * the sequence number will be available. Thus it is most efficient to be * able to search by the sequence number alone. - * @todo: Maybe change this to a priority queue per thread. */ std::map<InstSeqNum, DynInstPtr> nonSpecInsts; @@ -324,6 +313,9 @@ class InstructionQueue /** List that contains the age order of the oldest instruction of each * ready queue. Used to select the oldest instruction available * among op classes. + * @todo: Might be better to just move these entries around instead + * of creating new ones every time the position changes due to an + * instruction issuing. Not sure std::list supports this. */ std::list<ListOrderEntry> listOrder; @@ -346,6 +338,8 @@ class InstructionQueue */ void moveToYoungerInst(ListOrderIt age_order_it); + DependencyGraph<DynInstPtr> dependGraph; + ////////////////////////////////////// // Various parameters ////////////////////////////////////// @@ -397,57 +391,9 @@ class InstructionQueue bool switchedOut; - ////////////////////////////////// - // Variables needed for squashing - ////////////////////////////////// - /** The sequence number of the squashed instruction. */ InstSeqNum squashedSeqNum[Impl::MaxThreads]; - /** Iterator that points to the last instruction that has been squashed. - * This will not be valid unless the IQ is in the process of squashing. - */ - ListIt squashIt[Impl::MaxThreads]; - - /////////////////////////////////// - // Dependency graph stuff - /////////////////////////////////// - - class DependencyEntry - { - public: - DependencyEntry() - : inst(NULL), next(NULL) - { } - - DynInstPtr inst; - //Might want to include data about what arch. register the - //dependence is waiting on. - DependencyEntry *next; - - //This function, and perhaps this whole class, stand out a little - //bit as they don't fit a classification well. I want access - //to the underlying structure of the linked list, yet at - //the same time it feels like this should be something abstracted - //away. So for now it will sit here, within the IQ, until - //a better implementation is decided upon. - // This function probably shouldn't be within the entry... - void insert(DynInstPtr &new_inst); - - void remove(DynInstPtr &inst_to_remove); - - // Debug variable, remove when done testing. - static unsigned mem_alloc_counter; - }; - - /** Array of linked lists. Each linked list is a list of all the - * instructions that depend upon a given register. The actual - * register's index is used to index into the graph; ie all - * instructions in flight that are dependent upon r34 will be - * in the linked list of dependGraph[34]. - */ - DependencyEntry *dependGraph; - /** A cache of the recently woken registers. It is 1 if the register * has been woken up recently, and 0 if the register has been added * to the dependency graph and has not yet received its value. It @@ -456,11 +402,11 @@ class InstructionQueue */ std::vector<bool> regScoreboard; - /** Adds an instruction to the dependency graph, as a producer. */ + /** Adds an instruction to the dependency graph, as a consumer. */ bool addToDependents(DynInstPtr &new_inst); - /** Adds an instruction to the dependency graph, as a consumer. */ - void createDependency(DynInstPtr &new_inst); + /** Adds an instruction to the dependency graph, as a producer. */ + void addToProducers(DynInstPtr &new_inst); /** Moves an instruction to the ready queue if it is ready. */ void addIfReady(DynInstPtr &inst); @@ -471,10 +417,6 @@ class InstructionQueue */ int countInsts(); - /** Debugging function to dump out the dependency graph. - */ - void dumpDependGraph(); - /** Debugging function to dump all the list sizes, as well as print * out the list of nonspeculative instructions. Should not be used * in any other capacity, but it has no harmful sideaffects. @@ -490,20 +432,16 @@ class InstructionQueue Stats::Scalar<> iqInstsAdded; /** Stat for number of non-speculative instructions added. */ Stats::Scalar<> iqNonSpecInstsAdded; -// Stats::Scalar<> iqIntInstsAdded; + Stats::Scalar<> iqInstsIssued; /** Stat for number of integer instructions issued. */ Stats::Scalar<> iqIntInstsIssued; -// Stats::Scalar<> iqFloatInstsAdded; /** Stat for number of floating point instructions issued. */ Stats::Scalar<> iqFloatInstsIssued; -// Stats::Scalar<> iqBranchInstsAdded; /** Stat for number of branch instructions issued. */ Stats::Scalar<> iqBranchInstsIssued; -// Stats::Scalar<> iqMemInstsAdded; /** Stat for number of memory instructions issued. */ Stats::Scalar<> iqMemInstsIssued; -// Stats::Scalar<> iqMiscInstsAdded; /** Stat for number of miscellaneous instructions issued. */ Stats::Scalar<> iqMiscInstsIssued; /** Stat for number of squashed instructions that were ready to issue. */ @@ -518,20 +456,20 @@ class InstructionQueue */ Stats::Scalar<> iqSquashedNonSpecRemoved; - Stats::VectorDistribution<> queue_res_dist; - Stats::Distribution<> n_issued_dist; - Stats::VectorDistribution<> issue_delay_dist; + Stats::VectorDistribution<> queueResDist; + Stats::Distribution<> numIssuedDist; + Stats::VectorDistribution<> issueDelayDist; - Stats::Vector<> stat_fu_busy; + Stats::Vector<> statFuBusy; // Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; + Stats::Vector2d<> statIssuedInstType; - Stats::Formula issue_rate; + Stats::Formula issueRate; // Stats::Formula issue_stores; // Stats::Formula issue_op_rate; - Stats::Vector<> fu_busy; //cumulative fu busy + Stats::Vector<> fuBusy; //cumulative fu busy - Stats::Formula fu_busy_rate; + Stats::Formula fuBusyRate; }; #endif //__CPU_O3_INST_QUEUE_HH__ diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh index 0d9cc09f3..ed57ac257 100644 --- a/cpu/o3/inst_queue_impl.hh +++ b/cpu/o3/inst_queue_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,14 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -// Todo: -// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake -// it; either do in reverse order, or have added instructions put into a -// different ready queue that, in scheduleRreadyInsts(), gets put onto the -// normal ready queue. This would however give only a one cycle delay, -// but probably is more flexible to actually add in a delay parameter than -// just running it backwards. - #include <limits> #include <vector> @@ -49,7 +41,7 @@ InstructionQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst, int fu_idx, InstructionQueue<Impl> *iq_ptr) : Event(&mainEventQueue, Stat_Event_Pri), - inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr) + inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false) { this->setFlags(Event::AutoDelete); } @@ -58,7 +50,7 @@ template <class Impl> void InstructionQueue<Impl>::FUCompletion::process() { - iqPtr->processFUCompletion(inst, fuIdx); + iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1); inst = NULL; } @@ -93,14 +85,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) //Create an entry for each physical register within the //dependency graph. - dependGraph = new DependencyEntry[numPhysRegs]; - - // Initialize all the head pointers to point to NULL, and all the - // entries as unready. - for (int i = 0; i < numPhysRegs; ++i) { - dependGraph[i].next = NULL; - dependGraph[i].inst = NULL; - } + dependGraph.resize(numPhysRegs); // Resize the register scoreboard. regScoreboard.resize(numPhysRegs); @@ -165,10 +150,9 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) template <class Impl> InstructionQueue<Impl>::~InstructionQueue() { - resetDependencyGraph(); - assert(DependencyEntry::mem_alloc_counter == 0); - - delete [] dependGraph; + dependGraph.reset(); + cprintf("Nodes traversed: %i, removed: %i\n", + dependGraph.nodesTraversed, dependGraph.nodesRemoved); } template <class Impl> @@ -193,8 +177,6 @@ InstructionQueue<Impl>::regStats() .desc("Number of non-speculative instructions added to the IQ") .prereq(iqNonSpecInstsAdded); -// iqIntInstsAdded; - iqInstsIssued .name(name() + ".iqInstsIssued") .desc("Number of instructions issued") @@ -205,29 +187,21 @@ InstructionQueue<Impl>::regStats() .desc("Number of integer instructions issued") .prereq(iqIntInstsIssued); -// iqFloatInstsAdded; - iqFloatInstsIssued .name(name() + ".iqFloatInstsIssued") .desc("Number of float instructions issued") .prereq(iqFloatInstsIssued); -// iqBranchInstsAdded; - iqBranchInstsIssued .name(name() + ".iqBranchInstsIssued") .desc("Number of branch instructions issued") .prereq(iqBranchInstsIssued); -// iqMemInstsAdded; - iqMemInstsIssued .name(name() + ".iqMemInstsIssued") .desc("Number of memory instructions issued") .prereq(iqMemInstsIssued); -// iqMiscInstsAdded; - iqMiscInstsIssued .name(name() + ".iqMiscInstsIssued") .desc("Number of miscellaneous instructions issued") @@ -255,16 +229,16 @@ InstructionQueue<Impl>::regStats() .desc("Number of squashed non-spec instructions that were removed") .prereq(iqSquashedNonSpecRemoved); - queue_res_dist + queueResDist .init(Num_OpClasses, 0, 99, 2) .name(name() + ".IQ:residence:") .desc("cycles from dispatch to issue") .flags(total | pdf | cdf ) ; for (int i = 0; i < Num_OpClasses; ++i) { - queue_res_dist.subname(i, opClassStrings[i]); + queueResDist.subname(i, opClassStrings[i]); } - n_issued_dist + numIssuedDist .init(0,totalWidth,1) .name(name() + ".ISSUE:issued_per_cycle") .desc("Number of insts issued each cycle") @@ -281,19 +255,19 @@ InstructionQueue<Impl>::regStats() dist_unissued.subname(i, unissued_names[i]); } */ - stat_issued_inst_type + statIssuedInstType .init(numThreads,Num_OpClasses) .name(name() + ".ISSUE:FU_type") .desc("Type of FU issued") .flags(total | pdf | dist) ; - stat_issued_inst_type.ysubnames(opClassStrings); + statIssuedInstType.ysubnames(opClassStrings); // // How long did instructions for a particular FU type wait prior to issue // - issue_delay_dist + issueDelayDist .init(Num_OpClasses,0,99,2) .name(name() + ".ISSUE:") .desc("cycles from operands ready to issue") @@ -303,15 +277,15 @@ InstructionQueue<Impl>::regStats() for (int i=0; i<Num_OpClasses; ++i) { stringstream subname; subname << opClassStrings[i] << "_delay"; - issue_delay_dist.subname(i, subname.str()); + issueDelayDist.subname(i, subname.str()); } - issue_rate + issueRate .name(name() + ".ISSUE:rate") .desc("Inst issue rate") .flags(total) ; - issue_rate = iqInstsIssued / cpu->numCycles; + issueRate = iqInstsIssued / cpu->numCycles; /* issue_stores .name(name() + ".ISSUE:stores") @@ -328,29 +302,29 @@ InstructionQueue<Impl>::regStats() ; issue_op_rate = issued_ops / numCycles; */ - stat_fu_busy + statFuBusy .init(Num_OpClasses) .name(name() + ".ISSUE:fu_full") .desc("attempts to use FU when none available") .flags(pdf | dist) ; for (int i=0; i < Num_OpClasses; ++i) { - stat_fu_busy.subname(i, opClassStrings[i]); + statFuBusy.subname(i, opClassStrings[i]); } - fu_busy + fuBusy .init(numThreads) .name(name() + ".ISSUE:fu_busy_cnt") .desc("FU busy when requested") .flags(total) ; - fu_busy_rate + fuBusyRate .name(name() + ".ISSUE:fu_busy_rate") .desc("FU busy rate (busy events/executed inst)") .flags(total) ; - fu_busy_rate = fu_busy / iqInstsIssued; + fuBusyRate = fuBusy / iqInstsIssued; for ( int i=0; i < numThreads; i++) { // Tell mem dependence unit to reg stats as well. @@ -396,35 +370,6 @@ InstructionQueue<Impl>::resetState() template <class Impl> void -InstructionQueue<Impl>::resetDependencyGraph() -{ - // Clear the dependency graph - DependencyEntry *curr; - DependencyEntry *prev; - - for (int i = 0; i < numPhysRegs; ++i) { - curr = dependGraph[i].next; - - while (curr) { - DependencyEntry::mem_alloc_counter--; - - prev = curr; - curr = prev->next; - prev->inst = NULL; - - delete prev; - } - - if (dependGraph[i].inst) { - dependGraph[i].inst = NULL; - } - - dependGraph[i].next = NULL; - } -} - -template <class Impl> -void InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr) { DPRINTF(IQ, "Setting active threads list pointer.\n"); @@ -454,7 +399,7 @@ void InstructionQueue<Impl>::switchOut() { resetState(); - resetDependencyGraph(); + dependGraph.reset(); switchedOut = true; for (int i = 0; i < numThreads; ++i) { memDepUnit[i].switchOut(); @@ -562,20 +507,15 @@ InstructionQueue<Impl>::insert(DynInstPtr &new_inst) // Make sure the instruction is valid assert(new_inst); - DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", - new_inst->readPC()); + DPRINTF(IQ, "Adding instruction [sn:%lli] PC %#x to the IQ.\n", + new_inst->seqNum, new_inst->readPC()); - // Check if there are any free entries. Panic if there are none. - // Might want to have this return a fault in the future instead of - // panicing. assert(freeEntries != 0); instList[new_inst->threadNumber].push_back(new_inst); - // Decrease the number of free entries. --freeEntries; - //Mark Instruction as in IQ new_inst->setInIQ(); // Look through its source registers (physical regs), and mark any @@ -584,21 +524,16 @@ InstructionQueue<Impl>::insert(DynInstPtr &new_inst) // Have this instruction set itself as the producer of its destination // register(s). - createDependency(new_inst); + addToProducers(new_inst); - // If it's a memory instruction, add it to the memory dependency - // unit. if (new_inst->isMemRef()) { memDepUnit[new_inst->threadNumber].insert(new_inst); } else { - // If the instruction is ready then add it to the ready list. addIfReady(new_inst); } ++iqInstsAdded; - - //Update Thread IQ Count count[new_inst->threadNumber]++; assert(freeEntries == (numEntries - countInsts())); @@ -611,30 +546,25 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &new_inst) // @todo: Clean up this code; can do it by setting inst as unable // to issue, then calling normal insert on the inst. - // Make sure the instruction is valid assert(new_inst); nonSpecInsts[new_inst->seqNum] = new_inst; - DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", - new_inst->readPC()); + DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %#x " + "to the IQ.\n", + new_inst->seqNum, new_inst->readPC()); - // Check if there are any free entries. Panic if there are none. - // Might want to have this return a fault in the future instead of - // panicing. assert(freeEntries != 0); instList[new_inst->threadNumber].push_back(new_inst); - // Decrease the number of free entries. --freeEntries; - //Mark Instruction as in IQ new_inst->setInIQ(); // Have this instruction set itself as the producer of its destination // register(s). - createDependency(new_inst); + addToProducers(new_inst); // If it's a memory instruction, add it to the memory dependency // unit. @@ -644,7 +574,6 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &new_inst) ++iqNonSpecInstsAdded; - //Update Thread IQ Count count[new_inst->threadNumber]++; assert(freeEntries == (numEntries - countInsts())); @@ -661,15 +590,6 @@ InstructionQueue<Impl>::insertBarrier(DynInstPtr &barr_inst) template <class Impl> void -InstructionQueue<Impl>::advanceTail(DynInstPtr &inst) -{ - // Have this instruction set itself as the producer of its destination - // register(s). - createDependency(inst); -} - -template <class Impl> -void InstructionQueue<Impl>::addToOrderList(OpClass op_class) { assert(!readyInsts[op_class].empty()); @@ -733,8 +653,15 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx) iewStage->wakeCPU(); - fuPool->freeUnit(fu_idx); + if (fu_idx > -1) + fuPool->freeUnitNextCycle(fu_idx); + // @todo: Ensure that these FU Completions happen at the beginning + // of a cycle, otherwise they could add too many instructions to + // the queue. + // @todo: This could break if there's multiple multi-cycle ops + // finishing on this cycle. Maybe implement something like + // instToCommit in iew_impl.hh. int &size = issueToExecuteQueue->access(0)->size; issueToExecuteQueue->access(0)->insts[size++] = inst; @@ -752,20 +679,6 @@ InstructionQueue<Impl>::scheduleReadyInsts() IssueStruct *i2e_info = issueToExecuteQueue->access(0); - // Will need to reorder the list if either a queue is not on the list, - // or it has an older instruction than last time. - for (int i = 0; i < Num_OpClasses; ++i) { - if (!readyInsts[i].empty()) { - if (!queueOnList[i]) { - addToOrderList(OpClass(i)); - } else if (readyInsts[i].top()->seqNum < - (*readyIt[i]).oldestInst) { - listOrder.erase(readyIt[i]); - addToOrderList(OpClass(i)); - } - } - } - // Have iterator to head of the list // While I haven't exceeded bandwidth or reached the end of the list, // Try to get a FU that can do what this op needs. @@ -779,7 +692,8 @@ InstructionQueue<Impl>::scheduleReadyInsts() int total_issued = 0; int exec_queue_slot = i2e_info->size; - while (exec_queue_slot < totalWidth && order_it != order_end_it) { + while (exec_queue_slot < totalWidth && total_issued < totalWidth && + order_it != order_end_it) { OpClass op_class = (*order_it).queueType; assert(!readyInsts[op_class].empty()); @@ -805,70 +719,47 @@ InstructionQueue<Impl>::scheduleReadyInsts() continue; } - int idx = fuPool->getUnit(op_class); - + int idx = -2; + int op_latency = 1; int tid = issuing_inst->threadNumber; - if (idx == -2) { - assert(op_class == No_OpClass); - - i2e_info->insts[exec_queue_slot++] = issuing_inst; - i2e_info->size++; - - DPRINTF(IQ, "Thread %i: Issuing instruction PC that needs no FU" - " %#x [sn:%lli]\n", - tid, issuing_inst->readPC(), - issuing_inst->seqNum); - - readyInsts[op_class].pop(); - - if (!readyInsts[op_class].empty()) { - moveToYoungerInst(order_it); - } else { - readyIt[op_class] = listOrder.end(); - queueOnList[op_class] = false; - } - - issuing_inst->setIssued(); - ++total_issued; + if (op_class != No_OpClass) { + idx = fuPool->getUnit(op_class); - if (!issuing_inst->isMemRef()) { - // Memory instructions can not be freed from the IQ until they - // complete. - ++freeEntries; - count[tid]--; - issuing_inst->removeInIQ(); - } else { - memDepUnit[tid].issue(issuing_inst); + if (idx > -1) { + op_latency = fuPool->getOpLatency(op_class); } + } - listOrder.erase(order_it++); - - stat_issued_inst_type[tid][op_class]++; - } else if (idx != -1) { - int op_latency = fuPool->getOpLatency(op_class); - + if (idx == -2 || idx != -1) { if (op_latency == 1) { i2e_info->insts[exec_queue_slot++] = issuing_inst; i2e_info->size++; - // Add the FU onto the list of FU's to be freed next cycle. - fuPool->freeUnit(idx); + // Add the FU onto the list of FU's to be freed next + // cycle if we used one. + if (idx >= 0) + fuPool->freeUnitNextCycle(idx); } else { int issue_latency = fuPool->getIssueLatency(op_class); + // Generate completion event for the FU + FUCompletion *execution = new FUCompletion(issuing_inst, + idx, this); - if (issue_latency > 1) { - // Generate completion event for the FU - FUCompletion *execution = new FUCompletion(issuing_inst, - idx, this); + execution->schedule(curTick + cpu->cycles(issue_latency - 1)); - execution->schedule(curTick + cpu->cycles(issue_latency - 1)); + // @todo: Enforce that issue_latency == 1 or op_latency + if (issue_latency > 1) { + execution->setFreeFU(); } else { - i2e_info->insts[exec_queue_slot++] = issuing_inst; - i2e_info->size++; + // @todo: Not sure I'm accounting for the + // multi-cycle op in a pipelined FU properly, or + // the number of instructions issued in one cycle. +// i2e_info->insts[exec_queue_slot++] = issuing_inst; +// i2e_info->size++; // Add the FU onto the list of FU's to be freed next cycle. - fuPool->freeUnit(idx); + fuPool->freeUnitNextCycle(idx); } } @@ -900,15 +791,16 @@ InstructionQueue<Impl>::scheduleReadyInsts() } listOrder.erase(order_it++); - stat_issued_inst_type[tid][op_class]++; + statIssuedInstType[tid][op_class]++; } else { - stat_fu_busy[op_class]++; - fu_busy[tid]++; + statFuBusy[op_class]++; + fuBusy[tid]++; ++order_it; } } - n_issued_dist.sample(total_issued); + numIssuedDist.sample(total_issued); + iqInstsIssued+= total_issued; if (total_issued) { cpu->activityThisCycle(); @@ -930,10 +822,8 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst) unsigned tid = (*inst_it).second->threadNumber; - // Mark this instruction as ready to issue. (*inst_it).second->setCanIssue(); - // Now schedule the instruction. if (!(*inst_it).second->isMemRef()) { addIfReady((*inst_it).second); } else { @@ -949,7 +839,6 @@ template <class Impl> void InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid) { - /*Need to go through each thread??*/ DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n", tid,inst); @@ -973,18 +862,13 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst) DPRINTF(IQ, "Waking dependents of completed instruction.\n"); assert(!completed_inst->isSquashed()); - // Look at the physical destination register of the DynInst - // and look it up on the dependency graph. Then mark as ready - // any instructions within the instruction queue. - DependencyEntry *curr; - DependencyEntry *prev; // Tell the memory dependence unit to wake any dependents on this // instruction if it is a memory instruction. Also complete the memory - // instruction at this point since we know it executed fine. - // @todo: Might want to rename "completeMemInst" to - // something that indicates that it won't need to be replayed, and call - // this earlier. Might not be a big deal. + // instruction at this point since we know it executed without issues. + // @todo: Might want to rename "completeMemInst" to something that + // indicates that it won't need to be replayed, and call this + // earlier. Might not be a big deal. if (completed_inst->isMemRef()) { memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst); completeMemInst(completed_inst); @@ -1010,39 +894,31 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst) DPRINTF(IQ, "Waking any dependents on register %i.\n", (int) dest_reg); - //Maybe abstract this part into a function. - //Go through the dependency chain, marking the registers as ready - //within the waiting instructions. - - curr = dependGraph[dest_reg].next; + //Go through the dependency chain, marking the registers as + //ready within the waiting instructions. + DynInstPtr dep_inst = dependGraph.pop(dest_reg); - while (curr) { + while (dep_inst) { DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n", - curr->inst->readPC()); + dep_inst->readPC()); // Might want to give more information to the instruction - // so that it knows which of its source registers is ready. - // However that would mean that the dependency graph entries - // would need to hold the src_reg_idx. - curr->inst->markSrcRegReady(); + // so that it knows which of its source registers is + // ready. However that would mean that the dependency + // graph entries would need to hold the src_reg_idx. + dep_inst->markSrcRegReady(); - addIfReady(curr->inst); + addIfReady(dep_inst); - DependencyEntry::mem_alloc_counter--; - - prev = curr; - curr = prev->next; - prev->inst = NULL; + dep_inst = dependGraph.pop(dest_reg); ++dependents; - - delete prev; } - // Reset the head node now that all of its dependents have been woken - // up. - dependGraph[dest_reg].next = NULL; - dependGraph[dest_reg].inst = NULL; + // Reset the head node now that all of its dependents have + // been woken up. + assert(dependGraph.empty(dest_reg)); + dependGraph.clearInst(dest_reg); // Mark the scoreboard as having that register ready. regScoreboard[dest_reg] = true; @@ -1058,6 +934,16 @@ InstructionQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst) readyInsts[op_class].push(ready_inst); + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + if (!queueOnList[op_class]) { + addToOrderList(op_class); + } else if (readyInsts[op_class].top()->seqNum < + (*readyIt[op_class]).oldestInst) { + listOrder.erase(readyIt[op_class]); + addToOrderList(op_class); + } + DPRINTF(IQ, "Instruction is ready to issue, putting it onto " "the ready list, PC %#x opclass:%i [sn:%lli].\n", ready_inst->readPC(), op_class, ready_inst->seqNum); @@ -1114,10 +1000,6 @@ InstructionQueue<Impl>::squash(unsigned tid) // time buffer. squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; - // Setup the squash iterator to point to the tail. - squashIt[tid] = instList[tid].end(); - --squashIt[tid]; - // Call doSquash if there are insts in the IQ if (count[tid] > 0) { doSquash(tid); @@ -1131,24 +1013,25 @@ template <class Impl> void InstructionQueue<Impl>::doSquash(unsigned tid) { - // Make sure the squashed sequence number is valid. -// assert(squashedSeqNum[tid] != 0); + // Start at the tail. + ListIt squash_it = instList[tid].end(); + --squash_it; DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n", tid, squashedSeqNum[tid]); // Squash any instructions younger than the squashed sequence number // given. - while (squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum[tid]) { + while (squash_it != instList[tid].end() && + (*squash_it)->seqNum > squashedSeqNum[tid]) { - DynInstPtr squashed_inst = (*squashIt[tid]); + DynInstPtr squashed_inst = (*squash_it); // Only handle the instruction if it actually is in the IQ and // hasn't already been squashed in the IQ. if (squashed_inst->threadNumber != tid || squashed_inst->isSquashedInIQ()) { - --squashIt[tid]; + --squash_it; continue; } @@ -1168,27 +1051,23 @@ InstructionQueue<Impl>::doSquash(unsigned tid) PhysRegIndex src_reg = squashed_inst->renamedSrcRegIdx(src_reg_idx); - // Only remove it from the dependency graph if it was - // placed there in the first place. - // HACK: This assumes that instructions woken up from the - // dependency chain aren't informed that a specific src - // register has become ready. This may not always be true - // in the future. - // Instead of doing a linked list traversal, we can just - // remove these squashed instructions either at issue time, - // or when the register is overwritten. The only downside - // to this is it leaves more room for error. + // Only remove it from the dependency graph if it + // was placed there in the first place. + + // Instead of doing a linked list traversal, we + // can just remove these squashed instructions + // either at issue time, or when the register is + // overwritten. The only downside to this is it + // leaves more room for error. if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && src_reg < numPhysRegs) { - dependGraph[src_reg].remove(squashed_inst); + dependGraph.remove(src_reg, squashed_inst); } ++iqSquashedOperandsExamined; } - - // Might want to remove producers as well. } else { NonSpecMapIt ns_inst_it = nonSpecInsts.find(squashed_inst->seqNum); @@ -1217,75 +1096,17 @@ InstructionQueue<Impl>::doSquash(unsigned tid) ++freeEntries; - if (numThreads > 1) { - DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x " - "squashed.\n", - tid, squashed_inst->seqNum, squashed_inst->readPC()); - } else { - DPRINTF(IQ, "Instruction [sn:%lli] PC %#x squashed.\n", - squashed_inst->seqNum, squashed_inst->readPC()); - } + DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x " + "squashed.\n", + tid, squashed_inst->seqNum, squashed_inst->readPC()); } - instList[tid].erase(squashIt[tid]--); + instList[tid].erase(squash_it--); ++iqSquashedInstsExamined; } } template <class Impl> -void -InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst) -{ - //Add this new, dependent instruction at the head of the dependency - //chain. - - // First create the entry that will be added to the head of the - // dependency chain. - DependencyEntry *new_entry = new DependencyEntry; - new_entry->next = this->next; - new_entry->inst = new_inst; - - // Then actually add it to the chain. - this->next = new_entry; - - ++mem_alloc_counter; -} - -template <class Impl> -void -InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove) -{ - DependencyEntry *prev = this; - DependencyEntry *curr = this->next; - - // Make sure curr isn't NULL. Because this instruction is being - // removed from a dependency list, it must have been placed there at - // an earlier time. The dependency chain should not be empty, - // unless the instruction dependent upon it is already ready. - if (curr == NULL) { - return; - } - - // Find the instruction to remove within the dependency linked list. - while (curr->inst != inst_to_remove) { - prev = curr; - curr = curr->next; - - assert(curr != NULL); - } - - // Now remove this instruction from the list. - prev->next = curr->next; - - --mem_alloc_counter; - - // Could push this off to the destructor of DependencyEntry - curr->inst = NULL; - - delete curr; -} - -template <class Impl> bool InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) { @@ -1313,7 +1134,7 @@ InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) "is being added to the dependency chain.\n", new_inst->readPC(), src_reg); - dependGraph[src_reg].insert(new_inst); + dependGraph.insert(src_reg, new_inst); // Change the return value to indicate that something // was added to the dependency graph. @@ -1323,7 +1144,7 @@ InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) "became ready before it reached the IQ.\n", new_inst->readPC(), src_reg); // Mark a register ready within the instruction. - new_inst->markSrcRegReady(); + new_inst->markSrcRegReady(src_reg_idx); } } } @@ -1333,12 +1154,12 @@ InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) template <class Impl> void -InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst) +InstructionQueue<Impl>::addToProducers(DynInstPtr &new_inst) { - //Actually nothing really needs to be marked when an - //instruction becomes the producer of a register's value, - //but for convenience a ptr to the producing instruction will - //be placed in the head node of the dependency links. + // Nothing really needs to be marked when an instruction becomes + // the producer of a register's value, but for convenience a ptr + // to the producing instruction will be placed in the head node of + // the dependency links. int8_t total_dest_regs = new_inst->numDestRegs(); for (int dest_reg_idx = 0; @@ -1355,12 +1176,12 @@ InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst) continue; } - if (dependGraph[dest_reg].next) { - dumpDependGraph(); + if (!dependGraph.empty(dest_reg)) { + dependGraph.dump(); panic("Dependency graph %i not empty!", dest_reg); } - dependGraph[dest_reg].inst = new_inst; + dependGraph.setInst(dest_reg, new_inst); // Mark the scoreboard to say it's not yet ready. regScoreboard[dest_reg] = false; @@ -1371,7 +1192,7 @@ template <class Impl> void InstructionQueue<Impl>::addIfReady(DynInstPtr &inst) { - //If the instruction now has all of its source registers + // If the instruction now has all of its source registers // available, then add it to the list of ready instructions. if (inst->readyToIssue()) { @@ -1382,7 +1203,6 @@ InstructionQueue<Impl>::addIfReady(DynInstPtr &inst) // Message to the mem dependence unit that this instruction has // its registers ready. - memDepUnit[inst->threadNumber].regsReady(inst); return; @@ -1395,6 +1215,16 @@ InstructionQueue<Impl>::addIfReady(DynInstPtr &inst) inst->readPC(), op_class, inst->seqNum); readyInsts[op_class].push(inst); + + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + if (!queueOnList[op_class]) { + addToOrderList(op_class); + } else if (readyInsts[op_class].top()->seqNum < + (*readyIt[op_class]).oldestInst) { + listOrder.erase(readyIt[op_class]); + addToOrderList(op_class); + } } } @@ -1436,34 +1266,6 @@ InstructionQueue<Impl>::countInsts() template <class Impl> void -InstructionQueue<Impl>::dumpDependGraph() -{ - DependencyEntry *curr; - - for (int i = 0; i < numPhysRegs; ++i) - { - curr = &dependGraph[i]; - - if (curr->inst) { - cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", - i, curr->inst->readPC(), curr->inst->seqNum); - } else { - cprintf("dependGraph[%i]: No producer. consumer: ", i); - } - - while (curr->next != NULL) { - curr = curr->next; - - cprintf("%#x [sn:%lli] ", - curr->inst->readPC(), curr->inst->seqNum); - } - - cprintf("\n"); - } -} - -template <class Impl> -void InstructionQueue<Impl>::dumpLists() { for (int i = 0; i < Num_OpClasses; ++i) { @@ -1524,8 +1326,8 @@ InstructionQueue<Impl>::dumpInsts() cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. + // Loads that have not been marked as executed + // still count towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } |