diff options
author | Kevin Lim <ktlim@umich.edu> | 2006-05-19 15:53:17 -0400 |
---|---|---|
committer | Kevin Lim <ktlim@umich.edu> | 2006-05-19 15:53:17 -0400 |
commit | e3d5588ca70c88318c1e41e438102034c92c561e (patch) | |
tree | 75886333b22b474893e57254fa00df7a4055df68 | |
parent | 1a6f21b8d23494752cdc9d3a8d1c1a2adfd85ccf (diff) | |
download | gem5-e3d5588ca70c88318c1e41e438102034c92c561e.tar.xz |
O3 code update/cleanup.
cpu/o3/commit_impl.hh:
O3 code update/cleanup. Fetch fault code no longer needed (see previous checkin).
--HG--
extra : convert_revision : f602e7f978e19b8900dce482f38f9c7a195e94da
-rw-r--r-- | cpu/o3/2bit_local_pred.cc | 2 | ||||
-rw-r--r-- | cpu/o3/2bit_local_pred.hh | 2 | ||||
-rw-r--r-- | cpu/o3/alpha_cpu.hh | 18 | ||||
-rw-r--r-- | cpu/o3/bpred_unit.cc | 2 | ||||
-rw-r--r-- | cpu/o3/bpred_unit.hh | 7 | ||||
-rw-r--r-- | cpu/o3/bpred_unit_impl.hh | 6 | ||||
-rw-r--r-- | cpu/o3/comm.hh | 6 | ||||
-rw-r--r-- | cpu/o3/commit.hh | 5 | ||||
-rw-r--r-- | cpu/o3/commit_impl.hh | 59 | ||||
-rw-r--r-- | cpu/o3/decode.hh | 12 | ||||
-rw-r--r-- | cpu/o3/decode_impl.hh | 14 | ||||
-rw-r--r-- | cpu/o3/fetch.hh | 31 | ||||
-rw-r--r-- | cpu/o3/fetch_impl.hh | 29 | ||||
-rw-r--r-- | cpu/o3/lsq.hh | 65 | ||||
-rw-r--r-- | cpu/o3/lsq_impl.hh | 138 | ||||
-rw-r--r-- | cpu/o3/lsq_unit.hh | 218 | ||||
-rw-r--r-- | cpu/o3/lsq_unit_impl.hh | 317 | ||||
-rw-r--r-- | cpu/o3/mem_dep_unit.hh | 9 | ||||
-rw-r--r-- | cpu/o3/mem_dep_unit_impl.hh | 20 | ||||
-rw-r--r-- | cpu/o3/rename.hh | 32 | ||||
-rw-r--r-- | cpu/o3/rename_impl.hh | 35 | ||||
-rw-r--r-- | cpu/o3/rename_map.cc | 81 | ||||
-rw-r--r-- | cpu/o3/rename_map.hh | 5 | ||||
-rw-r--r-- | cpu/o3/rob.hh | 34 | ||||
-rw-r--r-- | cpu/o3/rob_impl.hh | 38 | ||||
-rw-r--r-- | cpu/o3/scoreboard.cc | 1 | ||||
-rw-r--r-- | cpu/o3/store_set.cc | 7 | ||||
-rw-r--r-- | cpu/o3/thread_state.hh | 95 |
28 files changed, 381 insertions, 907 deletions
diff --git a/cpu/o3/2bit_local_pred.cc b/cpu/o3/2bit_local_pred.cc index eab98531d..c3fb2fdb8 100644 --- a/cpu/o3/2bit_local_pred.cc +++ b/cpu/o3/2bit_local_pred.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/cpu/o3/2bit_local_pred.hh b/cpu/o3/2bit_local_pred.hh index 0dfe53819..cd65978ca 100644 --- a/cpu/o3/2bit_local_pred.hh +++ b/cpu/o3/2bit_local_pred.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/cpu/o3/alpha_cpu.hh b/cpu/o3/alpha_cpu.hh index f70793aaa..78ad5f7d8 100644 --- a/cpu/o3/alpha_cpu.hh +++ b/cpu/o3/alpha_cpu.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -87,7 +87,8 @@ class AlphaFullCPU : public FullO3CPU<Impl> virtual Status status() const { return thread->status(); } - virtual void setStatus(Status new_status) { thread->setStatus(new_status); } + virtual void setStatus(Status new_status) + { thread->setStatus(new_status); } /// Set the status to Active. Optional delay indicates number of /// cycles to wait before beginning execution. @@ -168,12 +169,15 @@ class AlphaFullCPU : public FullO3CPU<Impl> virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); // @todo: Figure out where these store cond failures should go. - virtual unsigned readStCondFailures() { return thread->storeCondFailures; } + virtual unsigned readStCondFailures() + { return thread->storeCondFailures; } - virtual void setStCondFailures(unsigned sc_failures) { thread->storeCondFailures = sc_failures; } + virtual void setStCondFailures(unsigned sc_failures) + { thread->storeCondFailures = sc_failures; } #if FULL_SYSTEM - virtual bool inPalMode() { return TheISA::PcPAL(cpu->readPC(thread->tid)); } + virtual bool inPalMode() + { return TheISA::PcPAL(cpu->readPC(thread->tid)); } #endif // Only really makes sense for old CPU model. Lots of code @@ -194,10 +198,6 @@ class AlphaFullCPU : public FullO3CPU<Impl> #endif }; -// friend class AlphaXC; - -// std::vector<ExecContext *> xcProxies; - #if FULL_SYSTEM /** ITB pointer. */ AlphaITB *itb; diff --git a/cpu/o3/bpred_unit.cc b/cpu/o3/bpred_unit.cc index a78dcf463..92344111f 100644 --- a/cpu/o3/bpred_unit.cc +++ b/cpu/o3/bpred_unit.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/cpu/o3/bpred_unit.hh b/cpu/o3/bpred_unit.hh index ee7ffc183..b7814b2e9 100644 --- a/cpu/o3/bpred_unit.hh +++ b/cpu/o3/bpred_unit.hh @@ -43,12 +43,7 @@ /** * Basically a wrapper class to hold both the branch predictor - * and the BTB. Right now I'm unsure of the implementation; it would - * be nicer to have something closer to the CPUPolicy or the Impl where - * this is just typedefs, but it forces the upper level stages to be - * aware of the constructors of the BP and the BTB. The nicer thing - * to do is have this templated on the Impl, accept the usual Params - * object, and be able to call the constructors on the BP and BTB. + * and the BTB. */ template<class Impl> class TwobitBPredUnit diff --git a/cpu/o3/bpred_unit_impl.hh b/cpu/o3/bpred_unit_impl.hh index d20b31e55..c37df606b 100644 --- a/cpu/o3/bpred_unit_impl.hh +++ b/cpu/o3/bpred_unit_impl.hh @@ -26,13 +26,13 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <list> +#include <vector> + #include "base/trace.hh" #include "base/traceflags.hh" #include "cpu/o3/bpred_unit.hh" -#include <vector> -#include <list> - using namespace std; template<class Impl> diff --git a/cpu/o3/comm.hh b/cpu/o3/comm.hh index 1a8f394ca..c36c58d3d 100644 --- a/cpu/o3/comm.hh +++ b/cpu/o3/comm.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -169,10 +169,6 @@ struct TimeBufStruct { bool commitInsts; InstSeqNum squashSeqNum; - // Extra bit of information so that the LDSTQ only updates when it - // needs to. - bool commitIsLoad; - // Communication specifically to the IQ to tell the IQ that it can // schedule a non-speculative instruction. InstSeqNum nonSpecSeqNum; diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh index 73eccd2b0..66abf8dc6 100644 --- a/cpu/o3/commit.hh +++ b/cpu/o3/commit.hh @@ -30,10 +30,10 @@ #define __CPU_O3_COMMIT_HH__ #include "arch/faults.hh" -#include "cpu/inst_seq.hh" #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/exetrace.hh" +#include "cpu/inst_seq.hh" #include "mem/memory_interface.hh" template <class> @@ -59,8 +59,7 @@ class O3ThreadState; * squashing instruction's sequence number, and only broadcasting a * redirect if it corresponds to an older instruction. Commit also * supports multiple cycle squashing, to model a ROB that can only - * remove a certain number of instructions per cycle. Eventually traps - * and interrupts will most likely be handled here as well. + * remove a certain number of instructions per cycle. */ template<class Impl> class DefaultCommit diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh index 170f5b01f..346a8bc1c 100644 --- a/cpu/o3/commit_impl.hh +++ b/cpu/o3/commit_impl.hh @@ -27,12 +27,7 @@ */ #include <algorithm> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <iomanip> -#include <stdio.h> -#include <string.h> +#include <string> #include "base/loader/symtab.hh" #include "base/timebuf.hh" @@ -835,58 +830,6 @@ DefaultCommit<Impl>::commitInsts() unsigned num_committed = 0; DynInstPtr head_inst; -#if FULL_SYSTEM - // Not the best way to check if the front end is empty, but it should - // work. - // @todo: Try to avoid directly accessing fetch. - if (commitStatus[0] == FetchTrapPending && rob->isEmpty()) { - DPRINTF(Commit, "Fault from fetch is pending.\n"); - - fetchTrapWait++; - if (fetchTrapWait > 10000000) { - panic("Fetch trap has been pending for a long time!"); - } - if (fetchFaultTick > curTick) { - DPRINTF(Commit, "Not enough cycles since fault, fault will " - "happen on %lli\n", - fetchFaultTick); - cpu->activityThisCycle(); - return; - } else if (iewStage->hasStoresToWB()) { - DPRINTF(Commit, "IEW still has stores to WB. Waiting until " - "they are completed. fetchTrapWait:%i\n", - fetchTrapWait); - cpu->activityThisCycle(); - return; - } else if (cpu->inPalMode(readPC())) { - DPRINTF(Commit, "In pal mode right now. fetchTrapWait:%i\n", - fetchTrapWait); - return; - } else if (fetchStage->getYoungestSN() > youngestSeqNum[0]) { - DPRINTF(Commit, "Waiting for front end to drain. fetchTrapWait:%i\n", - fetchTrapWait); - return; - } - fetchTrapWait = 0; - DPRINTF(Commit, "ROB is empty, handling fetch trap.\n"); - - assert(!thread[0]->inSyscall); - - thread[0]->inSyscall = true; - - // Consider holding onto the trap and waiting until the trap event - // happens for this to be executed. - cpu->trap(fetchFault, 0); - - // Exit state update mode to avoid accidental updating. - thread[0]->inSyscall = false; - - commitStatus[0] = TrapPending; - // Set it up so that we squash next cycle - trapSquash[0] = true; - return; - } -#endif // Commit as many instructions as possible until the commit bandwidth // limit is reached, or it becomes impossible to commit any more. diff --git a/cpu/o3/decode.hh b/cpu/o3/decode.hh index 3f3f68247..3035b3387 100644 --- a/cpu/o3/decode.hh +++ b/cpu/o3/decode.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,11 +35,11 @@ #include "base/timebuf.hh" /** - * DefaultDecode class handles both single threaded and SMT decode. Its width is - * specified by the parameters; each cycles it tries to decode that many - * instructions. Because instructions are actually decoded when the StaticInst - * is created, this stage does not do much other than check any PC-relative - * branches. + * DefaultDecode class handles both single threaded and SMT + * decode. Its width is specified by the parameters; each cycles it + * tries to decode that many instructions. Because instructions are + * actually decoded when the StaticInst is created, this stage does + * not do much other than check any PC-relative branches. */ template<class Impl> class DefaultDecode diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh index a419a8932..2ed7ec6fc 100644 --- a/cpu/o3/decode_impl.hh +++ b/cpu/o3/decode_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,7 +39,6 @@ DefaultDecode<Impl>::DefaultDecode(Params *params) decodeWidth(params->decodeWidth), numThreads(params->numberOfThreads) { - DPRINTF(Decode, "decodeWidth=%i.\n", decodeWidth); _status = Inactive; for (int i = 0; i < numThreads; ++i) { @@ -249,8 +248,6 @@ template<class Impl> bool DefaultDecode<Impl>::unblock(unsigned tid) { - DPRINTF(Decode, "[tid:%u]: Trying to unblock.\n", tid); - // Decode is done unblocking only if the skid buffer is empty. if (skidBuffer[tid].empty()) { DPRINTF(Decode, "[tid:%u]: Done unblocking.\n", tid); @@ -261,6 +258,8 @@ DefaultDecode<Impl>::unblock(unsigned tid) return true; } + DPRINTF(Decode, "[tid:%u]: Currently unblocking.\n", tid); + return false; } @@ -318,6 +317,7 @@ DefaultDecode<Impl>::squash(unsigned tid) // In syscall emulation, we can have both a block and a squash due // to a syscall in the same cycle. This would cause both signals to // be high. This shouldn't happen in full system. + // @todo: Determine if this still happens. if (toFetch->decodeBlock[tid]) { toFetch->decodeBlock[tid] = 0; } else { @@ -372,7 +372,7 @@ DefaultDecode<Impl>::skidInsert(unsigned tid) skidBuffer[tid].push(inst); } - // Eventually need to enforce this by not letting a thread + // @todo: Eventually need to enforce this by not letting a thread // fetch past its skidbuffer assert(skidBuffer[tid].size() <= skidBufferMax); } @@ -436,10 +436,10 @@ void DefaultDecode<Impl>::sortInsts() { int insts_from_fetch = fromFetch->size; - +#ifdef DEBUG for (int i=0; i < numThreads; i++) assert(insts[i].empty()); - +#endif for (int i = 0; i < insts_from_fetch; ++i) { insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]); } diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh index b03d4afe3..3fcfdc3a1 100644 --- a/cpu/o3/fetch.hh +++ b/cpu/o3/fetch.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,12 +38,12 @@ class Sampler; /** - * DefaultFetch class handles both single threaded and SMT fetch. Its width is - * specified by the parameters; each cycle it tries to fetch that many - * instructions. It supports using a branch predictor to predict direction and - * targets. - * It supports the idling functionalitiy of the CPU by indicating to the CPU - * when it is active and inactive. + * DefaultFetch class handles both single threaded and SMT fetch. Its + * width is specified by the parameters; each cycle it tries to fetch + * that many instructions. It supports using a branch predictor to + * predict direction and targets. + * It supports the idling functionalitiy of the CPU by indicating to + * the CPU when it is active and inactive. */ template <class Impl> class DefaultFetch @@ -66,8 +66,8 @@ class DefaultFetch typedef TheISA::ExtMachInst ExtMachInst; public: - /** Overall fetch status. Used to determine if the CPU can deschedule itsef - * due to a lack of activity. + /** Overall fetch status. Used to determine if the CPU can + * deschedule itsef due to a lack of activity. */ enum FetchStatus { Active, @@ -174,13 +174,13 @@ class DefaultFetch void wakeFromQuiesce(); private: - /** Changes the status of this stage to active, and indicates this to the - * CPU. + /** Changes the status of this stage to active, and indicates this + * to the CPU. */ inline void switchToActive(); - /** Changes the status of this stage to inactive, and indicates this to the - * CPU. + /** Changes the status of this stage to inactive, and indicates + * this to the CPU. */ inline void switchToInactive(); @@ -373,11 +373,6 @@ class DefaultFetch bool switchedOut; - public: - InstSeqNum &getYoungestSN() { return youngestSN; } - private: - InstSeqNum youngestSN; - #if !FULL_SYSTEM /** Page table pointer. */ // PageTable *pTable; diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh index 523719945..1c5e508f6 100644 --- a/cpu/o3/fetch_impl.hh +++ b/cpu/o3/fetch_impl.hh @@ -938,10 +938,6 @@ DefaultFetch<Impl>::fetch(bool &status_change) DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " "decode.\n",tid); - ////////////////////////// - // Fetch first instruction - ////////////////////////// - // Need to keep track of whether or not a predicted branch // ended this fetch block. bool predicted_branch = false; @@ -1004,7 +1000,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetch_PC = next_PC; if (instruction->isQuiesce()) { - warn("%lli: Quiesce instruction encountered, halting fetch!", curTick); + warn("%lli: Quiesce instruction encountered, halting fetch!", + curTick); fetchStatus[tid] = QuiescePending; ++numInst; status_change = true; @@ -1022,24 +1019,20 @@ DefaultFetch<Impl>::fetch(bool &status_change) // Now that fetching is completed, update the PC to signify what the next // cycle will be. if (fault == NoFault) { - DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); - PC[tid] = next_PC; nextPC[tid] = next_PC + instSize; } else { - // If the issue was an icache miss, then we can just return and - // wait until it is handled. + // We shouldn't be in an icache miss and also have a fault (an ITB + // miss) if (fetchStatus[tid] == IcacheMissStall) { panic("Fetch should have exited prior to this!"); } - // Handle the fault. - // This stage will not be able to continue until all the ROB - // slots are empty, at which point the fault can be handled. - // The only other way it can wake up is if a squash comes along - // and changes the PC. + // Send the fault to commit. This thread will not do anything + // until commit handles the fault. The only other way it can + // wake up is if a squash comes along and changes the PC. #if FULL_SYSTEM assert(numInst != fetchWidth); // Get a sequence number. @@ -1067,20 +1060,12 @@ DefaultFetch<Impl>::fetch(bool &status_change) toDecode->insts[numInst] = instruction; toDecode->size++; - // Tell the commit stage the fault we had. -// toDecode->fetchFault = fault; -// toDecode->fetchFaultSN = cpu->globalSeqNum; - DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid); fetchStatus[tid] = TrapPending; status_change = true; warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); -// cpu->trap(fault); - // Send a signal to the ROB indicating that there's a trap from the - // fetch stage that needs to be handled. Need to indicate that - // there's a fault, and the fault type. #else // !FULL_SYSTEM fatal("fault (%d) detected @ PC %08p", fault, PC[tid]); #endif // FULL_SYSTEM diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh index d5f893e57..a1eeccbe7 100644 --- a/cpu/o3/lsq.hh +++ b/cpu/o3/lsq.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,10 +32,9 @@ #include <map> #include <queue> -#include "base/hashmap.hh" #include "config/full_system.hh" #include "cpu/inst_seq.hh" -#include "cpu/o3/cpu_policy.hh" +//#include "cpu/o3/cpu_policy.hh" #include "cpu/o3/lsq_unit.hh" #include "mem/mem_interface.hh" //#include "mem/page_table.hh" @@ -85,7 +84,8 @@ class LSQ { /** Ticks the LSQ. */ void tick(); /** Ticks a specific LSQ Unit. */ - void tick(unsigned tid); + void tick(unsigned tid) + { thread[tid].tick(); } /** Inserts a load into the LSQ. */ void insertLoad(DynInstPtr &load_inst); @@ -95,18 +95,23 @@ class LSQ { /** Executes a load. */ Fault executeLoad(DynInstPtr &inst); - Fault executeLoad(int lq_idx, unsigned tid); + Fault executeLoad(int lq_idx, unsigned tid) + { return thread[tid].executeLoad(lq_idx); } + /** Executes a store. */ Fault executeStore(DynInstPtr &inst); /** * Commits loads up until the given sequence number for a specific thread. */ - void commitLoads(InstSeqNum &youngest_inst, unsigned tid); + void commitLoads(InstSeqNum &youngest_inst, unsigned tid) + { thread[tid].commitLoads(youngest_inst); } + /** * Commits stores up until the given sequence number for a specific thread. */ - void commitStores(InstSeqNum &youngest_inst, unsigned tid); + void commitStores(InstSeqNum &youngest_inst, unsigned tid) + { thread[tid].commitStores(youngest_inst); } /** * Attempts to write back stores until all cache ports are used or the @@ -119,7 +124,8 @@ class LSQ { /** * Squash instructions from a thread until the specified sequence number. */ - void squash(const InstSeqNum &squashed_num, unsigned tid); + void squash(const InstSeqNum &squashed_num, unsigned tid) + { thread[tid].squash(squashed_num); } /** Returns whether or not there was a memory ordering violation. */ bool violation(); @@ -127,12 +133,14 @@ class LSQ { * Returns whether or not there was a memory ordering violation for a * specific thread. */ - bool violation(unsigned tid); + bool violation(unsigned tid) + { return thread[tid].violation(); } /** Returns if a load is blocked due to the memory system for a specific * thread. */ - bool loadBlocked(unsigned tid); + bool loadBlocked(unsigned tid) + { return thread[tid].loadBlocked(); } bool isLoadBlockedHandled(unsigned tid) { return thread[tid].isLoadBlockedHandled(); } @@ -141,10 +149,13 @@ class LSQ { { thread[tid].setLoadBlockedHandled(); } /** Gets the instruction that caused the memory ordering violation. */ - DynInstPtr getMemDepViolator(unsigned tid); + DynInstPtr getMemDepViolator(unsigned tid) + { return thread[tid].getMemDepViolator(); } /** Returns the head index of the load queue for a specific thread. */ - int getLoadHead(unsigned tid); + int getLoadHead(unsigned tid) + { return thread[tid].getLoadHead(); } + /** Returns the sequence number of the head of the load queue. */ InstSeqNum getLoadHeadSeqNum(unsigned tid) { @@ -152,7 +163,9 @@ class LSQ { } /** Returns the head index of the store queue. */ - int getStoreHead(unsigned tid); + int getStoreHead(unsigned tid) + { return thread[tid].getStoreHead(); } + /** Returns the sequence number of the head of the store queue. */ InstSeqNum getStoreHeadSeqNum(unsigned tid) { @@ -162,22 +175,26 @@ class LSQ { /** Returns the number of instructions in all of the queues. */ int getCount(); /** Returns the number of instructions in the queues of one thread. */ - int getCount(unsigned tid); + int getCount(unsigned tid) + { return thread[tid].getCount(); } /** Returns the total number of loads in the load queue. */ int numLoads(); /** Returns the total number of loads for a single thread. */ - int numLoads(unsigned tid); + int numLoads(unsigned tid) + { return thread[tid].numLoads(); } /** Returns the total number of stores in the store queue. */ int numStores(); /** Returns the total number of stores for a single thread. */ - int numStores(unsigned tid); + int numStores(unsigned tid) + { return thread[tid].numStores(); } /** Returns the total number of loads that are ready. */ int numLoadsReady(); /** Returns the number of loads that are ready for a single thread. */ - int numLoadsReady(unsigned tid); + int numLoadsReady(unsigned tid) + { return thread[tid].numLoadsReady(); } /** Returns the number of free entries. */ unsigned numFreeEntries(); @@ -215,24 +232,30 @@ class LSQ { /** Returns whether or not there are any stores to write back to memory. */ bool hasStoresToWB(); + /** Returns whether or not a specific thread has any stores to write back * to memory. */ - bool hasStoresToWB(unsigned tid); + bool hasStoresToWB(unsigned tid) + { return thread[tid].hasStoresToWB(); } + /** Returns the number of stores a specific thread has to write back. */ - int numStoresToWB(unsigned tid); + int numStoresToWB(unsigned tid) + { return thread[tid].numStoresToWB(); } /** Returns if the LSQ will write back to memory this cycle. */ bool willWB(); /** Returns if the LSQ of a specific thread will write back to memory this * cycle. */ - bool willWB(unsigned tid); + bool willWB(unsigned tid) + { return thread[tid].willWB(); } /** Debugging function to print out all instructions. */ void dumpInsts(); /** Debugging function to print out instructions from a specific thread. */ - void dumpInsts(unsigned tid); + void dumpInsts(unsigned tid) + { thread[tid].dumpInsts(); } /** Executes a read operation, using the load specified at the load index. */ template <class T> diff --git a/cpu/o3/lsq_impl.hh b/cpu/o3/lsq_impl.hh index c43c19619..a6ad27522 100644 --- a/cpu/o3/lsq_impl.hh +++ b/cpu/o3/lsq_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,6 +26,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <algorithm> +#include <string> + #include "cpu/o3/lsq.hh" using namespace std; @@ -89,7 +92,7 @@ LSQ<Impl>::LSQ(Params *params) //Initialize LSQs for (int tid=0; tid < numThreads; tid++) { - thread[tid].init(params, maxLQEntries+1, maxSQEntries+1, tid); + thread[tid].init(params, maxLQEntries, maxSQEntries, tid); } } @@ -228,13 +231,6 @@ LSQ<Impl>::tick() template<class Impl> void -LSQ<Impl>::tick(unsigned tid) -{ - thread[tid].tick(); -} - -template<class Impl> -void LSQ<Impl>::insertLoad(DynInstPtr &load_inst) { unsigned tid = load_inst->threadNumber; @@ -262,13 +258,6 @@ LSQ<Impl>::executeLoad(DynInstPtr &inst) template<class Impl> Fault -LSQ<Impl>::executeLoad(int lq_idx, unsigned tid) -{ - return thread[tid].executeLoad(lq_idx); -} - -template<class Impl> -Fault LSQ<Impl>::executeStore(DynInstPtr &inst) { unsigned tid = inst->threadNumber; @@ -278,20 +267,6 @@ LSQ<Impl>::executeStore(DynInstPtr &inst) template<class Impl> void -LSQ<Impl>::commitLoads(InstSeqNum &youngest_inst,unsigned tid) -{ - thread[tid].commitLoads(youngest_inst); -} - -template<class Impl> -void -LSQ<Impl>::commitStores(InstSeqNum &youngest_inst,unsigned tid) -{ - thread[tid].commitStores(youngest_inst); -} - -template<class Impl> -void LSQ<Impl>::writebackStores() { list<unsigned>::iterator active_threads = (*activeThreads).begin(); @@ -300,8 +275,8 @@ LSQ<Impl>::writebackStores() unsigned tid = *active_threads++; if (numStoresToWB(tid) > 0) { - DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores available" - " for Writeback.\n", tid, numStoresToWB(tid)); + DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " + "available for Writeback.\n", tid, numStoresToWB(tid)); } thread[tid].writebackStores(); @@ -309,20 +284,6 @@ LSQ<Impl>::writebackStores() } template<class Impl> -int -LSQ<Impl>::numStoresToWB(unsigned tid) -{ - return thread[tid].numStoresToWB(); -} - -template<class Impl> -void -LSQ<Impl>::squash(const InstSeqNum &squashed_num, unsigned tid) -{ - thread[tid].squash(squashed_num); -} - -template<class Impl> bool LSQ<Impl>::violation() { @@ -339,41 +300,6 @@ LSQ<Impl>::violation() } template<class Impl> -bool -LSQ<Impl>::violation(unsigned tid) -{ - return thread[tid].violation(); -} - -template<class Impl> -bool -LSQ<Impl>::loadBlocked(unsigned tid) -{ - return thread[tid].loadBlocked(); -} - -template<class Impl> -typename Impl::DynInstPtr -LSQ<Impl>::getMemDepViolator(unsigned tid) -{ - return thread[tid].getMemDepViolator(); -} - -template<class Impl> -int -LSQ<Impl>::getLoadHead(unsigned tid) -{ - return thread[tid].getLoadHead(); -} - -template<class Impl> -int -LSQ<Impl>::getStoreHead(unsigned tid) -{ - return thread[tid].getStoreHead(); -} - -template<class Impl> int LSQ<Impl>::getCount() { @@ -391,13 +317,6 @@ LSQ<Impl>::getCount() template<class Impl> int -LSQ<Impl>::getCount(unsigned tid) -{ - return thread[tid].getCount(); -} - -template<class Impl> -int LSQ<Impl>::numLoads() { unsigned total = 0; @@ -414,13 +333,6 @@ LSQ<Impl>::numLoads() template<class Impl> int -LSQ<Impl>::numLoads(unsigned tid) -{ - return thread[tid].numLoads(); -} - -template<class Impl> -int LSQ<Impl>::numStores() { unsigned total = 0; @@ -437,13 +349,6 @@ LSQ<Impl>::numStores() template<class Impl> int -LSQ<Impl>::numStores(unsigned tid) -{ - return thread[tid].numStores(); -} - -template<class Impl> -int LSQ<Impl>::numLoadsReady() { unsigned total = 0; @@ -459,13 +364,6 @@ LSQ<Impl>::numLoadsReady() } template<class Impl> -int -LSQ<Impl>::numLoadsReady(unsigned tid) -{ - return thread[tid].numLoadsReady(); -} - -template<class Impl> unsigned LSQ<Impl>::numFreeEntries() { @@ -612,14 +510,6 @@ LSQ<Impl>::hasStoresToWB() return true; } - -template<class Impl> -bool -LSQ<Impl>::hasStoresToWB(unsigned tid) -{ - return thread[tid].hasStoresToWB(); -} - template<class Impl> bool LSQ<Impl>::willWB() @@ -636,13 +526,6 @@ LSQ<Impl>::willWB() } template<class Impl> -bool -LSQ<Impl>::willWB(unsigned tid) -{ - return thread[tid].willWB(); -} - -template<class Impl> void LSQ<Impl>::dumpInsts() { @@ -653,10 +536,3 @@ LSQ<Impl>::dumpInsts() thread[tid].dumpInsts(); } } - -template<class Impl> -void -LSQ<Impl>::dumpInsts(unsigned tid) -{ - thread[tid].dumpInsts(); -} diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh index 623dbdb4b..942b4583d 100644 --- a/cpu/o3/lsq_unit.hh +++ b/cpu/o3/lsq_unit.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,29 +29,30 @@ #ifndef __CPU_O3_LSQ_UNIT_HH__ #define __CPU_O3_LSQ_UNIT_HH__ +#include <algorithm> #include <map> #include <queue> -#include <algorithm> +#include "arch/faults.hh" #include "config/full_system.hh" #include "base/hashmap.hh" #include "cpu/inst_seq.hh" #include "mem/mem_interface.hh" //#include "mem/page_table.hh" -#include "sim/debug.hh" -#include "sim/sim_object.hh" -#include "arch/faults.hh" +//#include "sim/debug.hh" +//#include "sim/sim_object.hh" /** - * Class that implements the actual LQ and SQ for each specific thread. - * Both are circular queues; load entries are freed upon committing, while - * store entries are freed once they writeback. The LSQUnit tracks if there - * are memory ordering violations, and also detects partial load to store - * forwarding cases (a store only has part of a load's data) that requires - * the load to wait until the store writes back. In the former case it - * holds onto the instruction until the dependence unit looks at it, and - * in the latter it stalls the LSQ until the store writes back. At that - * point the load is replayed. + * Class that implements the actual LQ and SQ for each specific + * thread. Both are circular queues; load entries are freed upon + * committing, while store entries are freed once they writeback. The + * LSQUnit tracks if there are memory ordering violations, and also + * detects partial load to store forwarding cases (a store only has + * part of a load's data) that requires the load to wait until the + * store writes back. In the former case it holds onto the instruction + * until the dependence unit looks at it, and in the latter it stalls + * the LSQ until the store writes back. At that point the load is + * replayed. */ template <class Impl> class LSQUnit { @@ -76,21 +77,19 @@ class LSQUnit { /** Returns the description of this event. */ const char *description(); - private: - /** The store index of the store being written back. */ - int storeIdx; /** The writeback event for the store. Needed for store * conditionals. */ - public: Event *wbEvent; + + private: + /** The store index of the store being written back. */ + int storeIdx; private: /** The pointer to the LSQ unit that issued the store. */ LSQUnit<Impl> *lsqPtr; }; - friend class StoreCompletionEvent; - public: /** Constructs an LSQ unit. init() must be called prior to use. */ LSQUnit(); @@ -136,14 +135,12 @@ class LSQUnit { /** Executes a load instruction. */ Fault executeLoad(DynInstPtr &inst); - Fault executeLoad(int lq_idx); + Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } /** Executes a store instruction. */ Fault executeStore(DynInstPtr &inst); /** Commits the head load. */ void commitLoad(); - /** Commits a specific load, given by the sequence number. */ - void commitLoad(InstSeqNum &inst); /** Commits loads older than a specific sequence number. */ void commitLoads(InstSeqNum &youngest_inst); @@ -179,9 +176,7 @@ class LSQUnit { /** Returns the memory ordering violator. */ DynInstPtr getMemDepViolator(); - /** Returns if a load became blocked due to the memory system. It clears - * the bool's value upon this being called. - */ + /** Returns if a load became blocked due to the memory system. */ bool loadBlocked() { return isLoadBlocked; } @@ -215,9 +210,6 @@ class LSQUnit { /** Returns if the SQ is full. */ bool sqFull() { return stores >= (SQEntries - 1); } - /** Debugging function to dump instructions in the LSQ. */ - void dumpInsts(); - /** Returns the number of instructions in the LSQ. */ unsigned getCount() { return loads + stores; } @@ -245,6 +237,10 @@ class LSQUnit { /** Decrements the given load index (circular queue). */ inline void decrLdIdx(int &load_idx); + public: + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + private: /** Pointer to the CPU. */ FullCPU *cpu; @@ -287,38 +283,29 @@ class LSQUnit { /** Whether or not the store is completed. */ bool completed; }; -/* - enum Status { - Running, - Idle, - DcacheMissStall, - DcacheMissSwitch - }; -*/ + private: /** The LSQUnit thread id. */ unsigned lsqID; - /** The status of the LSQ unit. */ -// Status _status; - /** The store queue. */ std::vector<SQEntry> storeQueue; /** The load queue. */ std::vector<DynInstPtr> loadQueue; - // Consider making these 16 bits - /** The number of LQ entries. */ + /** The number of LQ entries, plus a sentinel entry (circular queue). + * @todo: Consider having var that records the true number of LQ entries. + */ unsigned LQEntries; - /** The number of SQ entries. */ + /** The number of SQ entries, plus a sentinel entry (circular queue). + * @todo: Consider having var that records the true number of SQ entries. + */ unsigned SQEntries; /** The number of load instructions in the LQ. */ int loads; - /** The number of store instructions in the SQ (excludes those waiting to - * writeback). - */ + /** The number of store instructions in the SQ. */ int stores; /** The number of store instructions in the SQ waiting to writeback. */ int storesToWB; @@ -330,8 +317,8 @@ class LSQUnit { /** The index of the head instruction in the SQ. */ int storeHead; - /** The index of the first instruction that is ready to be written back, - * and has not yet been written back. + /** The index of the first instruction that may be ready to be + * written back, and has not yet been written back. */ int storeWBIdx; /** The index of the tail instruction in the SQ. */ @@ -348,13 +335,9 @@ class LSQUnit { //list<InstSeqNum> mshrSeqNums; - //Stats::Scalar<> dcacheStallCycles; - Counter lastDcacheStall; - /** Wire to read information from the issue stage time queue. */ typename TimeBuffer<IssueStruct>::wire fromIssue; - // Make these per thread? /** Whether or not the LSQ is stalled. */ bool stalled; /** The store that causes the stall due to partial store to load @@ -364,20 +347,13 @@ class LSQUnit { /** The index of the above store. */ int stallingLoadIdx; - /** Whether or not a load is blocked due to the memory system. It is - * cleared when this value is checked via loadBlocked(). - */ + /** Whether or not a load is blocked due to the memory system. */ bool isLoadBlocked; bool loadBlockedHandled; InstSeqNum blockedLoadSeqNum; - /** The oldest faulting load instruction. */ - DynInstPtr loadFaultInst; - /** The oldest faulting store instruction. */ - DynInstPtr storeFaultInst; - /** The oldest load that caused a memory ordering violation. */ DynInstPtr memDepViolator; @@ -447,23 +423,14 @@ template <class T> Fault LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) { - //Depending on issue2execute delay a squashed load could - //execute if it is found to be squashed in the same - //cycle it is scheduled to execute assert(loadQueue[load_idx]); - if (loadQueue[load_idx]->isExecuted()) { - panic("Should not reach this point with split ops!"); - memcpy(&data,req->data,req->size); - - return NoFault; - } + assert(!loadQueue[load_idx]->isExecuted()); // Make sure this isn't an uncacheable access // A bit of a hackish way to get uncached accesses to work only if they're // at the head of the LSQ and are ready to commit (at the head of the ROB // too). - // @todo: Fix uncached accesses. if (req->flags & UNCACHEABLE && (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) { iewStage->rescheduleMemInst(loadQueue[load_idx]); @@ -479,12 +446,16 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) "storeHead: %i addr: %#x\n", load_idx, store_idx, storeHead, req->paddr); -#ifdef FULL_SYSTEM +#if 0 if (req->flags & LOCKED) { cpu->lockAddr = req->paddr; cpu->lockFlag = true; } #endif + req->cmd = Read; + assert(!req->completionEvent); + req->completionEvent = NULL; + req->time = curTick; while (store_idx != -1) { // End once we've reached the top of the LSQ @@ -518,18 +489,14 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) // If the store's data has all of the data needed, we can forward. if (store_has_lower_limit && store_has_upper_limit) { - + // Get shift amount for offset into the store's data. int shift_amt = req->vaddr & (store_size - 1); - // Assumes byte addressing + // @todo: Magic number, assumes byte addressing shift_amt = shift_amt << 3; // Cast this to type T? data = storeQueue[store_idx].data >> shift_amt; - req->cmd = Read; - assert(!req->completionEvent); - req->completionEvent = NULL; - req->time = curTick; assert(!req->data); req->data = new uint8_t[64]; @@ -579,7 +546,6 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) // Do not generate a writeback event as this instruction is not // complete. - DPRINTF(LSQUnit, "Load-store forwarding mis-match. " "Store idx %i to load addr %#x\n", store_idx, req->vaddr); @@ -588,16 +554,13 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) } } - // If there's no forwarding case, then go access memory DynInstPtr inst = loadQueue[load_idx]; - DPRINTF(LSQUnit, "Doing functional access for inst PC %#x\n", - loadQueue[load_idx]->readPC()); + DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + loadQueue[load_idx]->seqNum, loadQueue[load_idx]->readPC()); + assert(!req->data); - req->cmd = Read; - req->completionEvent = NULL; - req->time = curTick; req->data = new uint8_t[64]; Fault fault = cpu->read(req, data); memcpy(req->data, &data, sizeof(T)); @@ -611,20 +574,19 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) return NoFault; + // Record that the load was blocked due to memory. This + // load will squash all instructions after it, be + // refetched, and re-executed. isLoadBlocked = true; loadBlockedHandled = false; blockedLoadSeqNum = inst->seqNum; // No fault occurred, even though the interface is blocked. return NoFault; } + DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", loadQueue[load_idx]->readPC()); -/* - Addr debug_addr = ULL(0xfffffc0000be81a8); - if (req->vaddr == debug_addr) { - debug_break(); - } -*/ + assert(!req->completionEvent); req->completionEvent = new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage); @@ -632,75 +594,16 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) assert(dcacheInterface->doEvents()); - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. if (result != MA_HIT) { DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", inst->seqNum); - - lastDcacheStall = curTick; - -// _status = DcacheMissStall; - } else { - DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", - inst->seqNum); - DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); - } - } -#if 0 - // if we have a cache, do cache access too - if (dcacheInterface) { - if (dcacheInterface->isBlocked()) { - isLoadBlocked = true; - // No fault occurred, even though the interface is blocked. - return NoFault; - } - - DPRINTF(LSQUnit, "LSQUnit: D-cache: PC:%#x reading from paddr:%#x " - "vaddr:%#x flags:%i\n", - inst->readPC(), req->paddr, req->vaddr, req->flags); - - // Setup MemReq pointer - req->cmd = Read; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - - assert(!req->completionEvent); - req->completionEvent = - new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage); - - // Do Cache Access - MemAccessResult result = dcacheInterface->access(req); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - // @todo: Probably should support having no events - if (result != MA_HIT) { - DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); - DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", - inst->seqNum); - - lastDcacheStall = curTick; - - _status = DcacheMissStall; - - } else { DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", inst->seqNum); - - DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); } - } else { - fatal("Must use D-cache with new memory system"); } -#endif return fault; } @@ -716,24 +619,11 @@ LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx) " | storeHead:%i [sn:%i]\n", store_idx, req->paddr, data, storeHead, storeQueue[store_idx].inst->seqNum); -/* - if (req->flags & LOCKED) { - if (req->flags & UNCACHEABLE) { - req->result = 2; - } else { - req->result = 1; - } - } -*/ + storeQueue[store_idx].req = req; storeQueue[store_idx].size = sizeof(T); storeQueue[store_idx].data = data; -/* - Addr debug_addr = ULL(0xfffffc0000be81a8); - if (req->vaddr == debug_addr) { - debug_break(); - } -*/ + // This function only writes the data to the store queue, so no fault // can happen here. return NoFault; diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh index dca808ac9..f0b4405ed 100644 --- a/cpu/o3/lsq_unit_impl.hh +++ b/cpu/o3/lsq_unit_impl.hh @@ -35,8 +35,8 @@ LSQUnit<Impl>::StoreCompletionEvent::StoreCompletionEvent(int store_idx, Event *wb_event, LSQUnit<Impl> *lsq_ptr) : Event(&mainEventQueue), - storeIdx(store_idx), wbEvent(wb_event), + storeIdx(store_idx), lsqPtr(lsq_ptr) { this->setFlags(Event::AutoDelete); @@ -86,15 +86,13 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, lsqID = id; - LQEntries = maxLQEntries; - SQEntries = maxSQEntries; + // Add 1 for the sentinel entry (they are circular queues). + LQEntries = maxLQEntries + 1; + SQEntries = maxSQEntries + 1; loadQueue.resize(LQEntries); storeQueue.resize(SQEntries); - - // May want to initialize these entries to NULL - loadHead = loadTail = 0; storeHead = storeWBIdx = storeTail = 0; @@ -104,7 +102,7 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, dcacheInterface = params->dcacheInterface; - loadFaultInst = storeFaultInst = memDepViolator = NULL; + memDepViolator = NULL; blockedLoadSeqNum = 0; } @@ -152,6 +150,8 @@ LSQUnit<Impl>::switchOut() for (int i = 0; i < loadQueue.size(); ++i) loadQueue[i] = NULL; + assert(storesToWB == 0); + while (storesToWB > 0 && storeWBIdx != storeTail && storeQueue[storeWBIdx].inst && @@ -218,7 +218,7 @@ LSQUnit<Impl>::takeOverFrom() usedPorts = 0; - loadFaultInst = storeFaultInst = memDepViolator = NULL; + memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -231,16 +231,17 @@ template<class Impl> void LSQUnit<Impl>::resizeLQ(unsigned size) { - assert( size >= LQEntries); + unsigned size_plus_sentinel = size + 1; + assert(size_plus_sentinel >= LQEntries); - if (size > LQEntries) { - while (size > loadQueue.size()) { + if (size_plus_sentinel > LQEntries) { + while (size_plus_sentinel > loadQueue.size()) { DynInstPtr dummy; loadQueue.push_back(dummy); LQEntries++; } } else { - LQEntries = size; + LQEntries = size_plus_sentinel; } } @@ -249,14 +250,15 @@ template<class Impl> void LSQUnit<Impl>::resizeSQ(unsigned size) { - if (size > SQEntries) { - while (size > storeQueue.size()) { + unsigned size_plus_sentinel = size + 1; + if (size_plus_sentinel > SQEntries) { + while (size_plus_sentinel > storeQueue.size()) { SQEntry dummy; storeQueue.push_back(dummy); SQEntries++; } } else { - SQEntries = size; + SQEntries = size_plus_sentinel; } } @@ -264,10 +266,8 @@ template <class Impl> void LSQUnit<Impl>::insert(DynInstPtr &inst) { - // Make sure we really have a memory reference. assert(inst->isMemRef()); - // Make sure it's one of the two classes of memory references. assert(inst->isLoad() || inst->isStore()); if (inst->isLoad()) { @@ -283,7 +283,8 @@ template <class Impl> void LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst) { - assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries); + assert((loadTail + 1) % LQEntries != loadHead); + assert(loads < LQEntries); DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n", load_inst->readPC(), loadTail, load_inst->seqNum); @@ -322,7 +323,6 @@ LSQUnit<Impl>::insertStore(DynInstPtr &store_inst) incrStIdx(storeTail); ++stores; - } template <class Impl> @@ -370,39 +370,6 @@ LSQUnit<Impl>::numLoadsReady() return retval; } -#if 0 -template <class Impl> -Fault -LSQUnit<Impl>::executeLoad() -{ - Fault load_fault = NoFault; - DynInstPtr load_inst; - - assert(readyLoads.size() != 0); - - // Execute a ready load. - LdMapIt ready_it = readyLoads.begin(); - - load_inst = (*ready_it).second; - - // Execute the instruction, which is held in the data portion of the - // iterator. - load_fault = load_inst->execute(); - - // If it executed successfully, then switch it over to the executed - // loads list. - if (load_fault == NoFault) { - executedLoads[load_inst->seqNum] = load_inst; - - readyLoads.erase(ready_it); - } else { - loadFaultInst = load_inst; - } - - return load_fault; -} -#endif - template <class Impl> Fault LSQUnit<Impl>::executeLoad(DynInstPtr &inst) @@ -413,33 +380,14 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst) DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n", inst->readPC(),inst->seqNum); - // Make sure it's really in the list. - // Normally it should always be in the list. However, - /* due to a syscall it may not be the list. -#ifdef DEBUG - int i = loadHead; - while (1) { - if (i == loadTail && !find(inst)) { - assert(0 && "Load not in the queue!"); - } else if (loadQueue[i] == inst) { - break; - } - - i = i + 1; - if (i >= LQEntries) { - i = 0; - } - } -#endif // DEBUG*/ - // load_fault = inst->initiateAcc(); load_fault = inst->execute(); // If the instruction faulted, then we need to send it along to commit // without the instruction completing. if (load_fault != NoFault) { - // Maybe just set it as can commit here, although that might cause - // some other problems with sending traps to the ROB too quickly. + // Send this instruction to commit, also make sure iew stage + // realizes there is activity. iewStage->instToCommit(inst); iewStage->activityThisCycle(); } @@ -449,20 +397,6 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst) template <class Impl> Fault -LSQUnit<Impl>::executeLoad(int lq_idx) -{ - // Very hackish. Not sure the best way to check that this - // instruction is at the head of the ROB. I should have some sort - // of extra information here so that I'm not overloading the - // canCommit signal for 15 different things. - loadQueue[lq_idx]->setCanCommit(); - Fault ret_fault = executeLoad(loadQueue[lq_idx]); - loadQueue[lq_idx]->clearCanCommit(); - return ret_fault; -} - -template <class Impl> -Fault LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) { using namespace TheISA; @@ -481,11 +415,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) Fault store_fault = store_inst->initiateAcc(); // Fault store_fault = store_inst->execute(); - // Store size should now be available. Use it to get proper offset for - // addr comparisons. - int size = storeQueue[store_idx].size; - - if (size == 0) { + if (storeQueue[store_idx].size == 0) { DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", store_inst->readPC(),store_inst->seqNum); @@ -494,30 +424,25 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) assert(store_fault == NoFault); - if (!storeFaultInst) { - if (store_fault != NoFault) { - panic("Fault in a store instruction!"); - storeFaultInst = store_inst; - } else if (store_inst->isNonSpeculative()) { - // Nonspeculative accesses (namely store conditionals) - // need to set themselves as able to writeback if we - // haven't had a fault by here. - storeQueue[store_idx].canWB = true; + if (store_inst->isNonSpeculative()) { + // Nonspeculative accesses (namely store conditionals) + // need to set themselves as able to writeback if we + // haven't had a fault by here. + storeQueue[store_idx].canWB = true; - ++storesToWB; - } + ++storesToWB; } if (!memDepViolator) { while (load_idx != loadTail) { - // Actually should only check loads that have actually executed - // Might be safe because effAddr is set to InvalAddr when the - // dyn inst is created. - - // Must actually check all addrs in the proper size range - // Which is more correct than needs to be. What if for now we just - // assume all loads are quad-word loads, and do the addr based - // on that. + // Really only need to check loads that have actually executed + // It's safe to check all loads because effAddr is set to + // InvalAddr when the dyn inst is created. + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + // @todo: Fix this, magic number being used here if ((loadQueue[load_idx]->effAddr >> 8) == (store_inst->effAddr >> 8)) { @@ -557,32 +482,6 @@ LSQUnit<Impl>::commitLoad() template <class Impl> void -LSQUnit<Impl>::commitLoad(InstSeqNum &inst) -{ - // Hopefully I don't use this function too much - panic("Don't use this function!"); - - int i = loadHead; - while (1) { - if (i == loadTail) { - assert(0 && "Load not in the queue!"); - } else if (loadQueue[i]->seqNum == inst) { - break; - } - - ++i; - if (i >= LQEntries) { - i = 0; - } - } - - loadQueue[i]->removeInLSQ(); - loadQueue[i] = NULL; - --loads; -} - -template <class Impl> -void LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst) { assert(loads == 0 || loadQueue[loadHead]); @@ -602,6 +501,8 @@ LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst) while (store_idx != storeTail) { assert(storeQueue[store_idx].inst); + // Mark any stores that are now committed and have not yet + // been marked as able to write back. if (!storeQueue[store_idx].canWB) { if (storeQueue[store_idx].inst->seqNum > youngest_inst) { break; @@ -613,7 +514,6 @@ LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst) storeQueue[store_idx].canWB = true; -// --stores; ++storesToWB; } @@ -631,6 +531,8 @@ LSQUnit<Impl>::writebackStores() storeQueue[storeWBIdx].canWB && usedPorts < cachePorts) { + // Store didn't write any data so no need to write it back to + // memory. if (storeQueue[storeWBIdx].size == 0) { completeStore(storeWBIdx); @@ -659,7 +561,6 @@ LSQUnit<Impl>::writebackStores() MemReqPtr req = storeQueue[storeWBIdx].req; storeQueue[storeWBIdx].committed = true; -// Fault fault = cpu->translateDataWriteReq(req); req->cmd = Write; req->completionEvent = NULL; req->time = curTick; @@ -689,6 +590,12 @@ LSQUnit<Impl>::writebackStores() default: panic("Unexpected store size!\n"); } + + // Stores other than store conditionals are completed at this + // time. Mark them as completed and, if we have a checker, + // tell it that the instruction is completed. + // @todo: Figure out what time I can say stores are complete in + // the timing memory. if (!(req->flags & LOCKED)) { storeQueue[storeWBIdx].inst->setCompleted(); if (cpu->checker) { @@ -714,57 +621,35 @@ LSQUnit<Impl>::writebackStores() iewStage->replayMemInst(loadQueue[stallingLoadIdx]); } - if (result != MA_HIT && dcacheInterface->doEvents()) { - typename IEW::LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { - // Stx_C should not generate a system port transaction, - // but that might be hard to accomplish. - wb = new typename - IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, + typename IEW::LdWritebackEvent *wb = NULL; + if (req->flags & LOCKED) { + // Stx_C should not generate a system port transaction + // if it misses in the cache, but that might be hard + // to accomplish without explicit cache support. + wb = new typename + IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, iewStage); - store_event->wbEvent = wb; - } + store_event->wbEvent = wb; + } - DPRINTF(LSQUnit,"D-Cache Write Miss!\n"); + if (result != MA_HIT && dcacheInterface->doEvents()) { + DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n", + storeWBIdx); DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", storeQueue[storeWBIdx].inst->seqNum); - lastDcacheStall = curTick; - -// _status = DcacheMissStall; - //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size()); - // Increment stat here or something + // @todo: Increment stat here. } else { DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n", storeWBIdx); DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", storeQueue[storeWBIdx].inst->seqNum); - - - if (req->flags & LOCKED) { - // Stx_C does not generate a system port transaction. -/* - if (req->flags & UNCACHEABLE) { - req->result = 2; - } else { - if (cpu->lockFlag && cpu->lockAddr == req->paddr) { - req->result=1; - } else { - req->result = 0; - } - } -*/ - typename IEW::LdWritebackEvent *wb = - new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, - iewStage); - store_event->wbEvent = wb; - } } incrStIdx(storeWBIdx); @@ -798,14 +683,12 @@ void LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) { DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" - "(Loads:%i Stores:%i)\n",squashed_num,loads,stores); + "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); int load_idx = loadTail; decrLdIdx(load_idx); while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { - - // Clear the smart pointer to make sure it is decremented. DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, " "[sn:%lli]\n", loadQueue[load_idx]->readPC(), @@ -817,6 +700,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) stallingLoadIdx = 0; } + // Clear the smart pointer to make sure it is decremented. loadQueue[load_idx]->squashed = true; loadQueue[load_idx] = NULL; --loads; @@ -840,19 +724,18 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) while (stores != 0 && storeQueue[store_idx].inst->seqNum > squashed_num) { - + // Instructions marked as can WB are already committed. if (storeQueue[store_idx].canWB) { break; } - // Clear the smart pointer to make sure it is decremented. DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, " "idx:%i [sn:%lli]\n", storeQueue[store_idx].inst->readPC(), store_idx, storeQueue[store_idx].inst->seqNum); - // I don't think this can happen. It should have been cleared by the - // stalling load. + // I don't think this can happen. It should have been cleared + // by the stalling load. if (isStalled() && storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { panic("Is stalled should have been cleared by stalling load!\n"); @@ -860,13 +743,17 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) stallingStoreIsn = 0; } + // Clear the smart pointer to make sure it is decremented. storeQueue[store_idx].inst->squashed = true; storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; if (storeQueue[store_idx].req) { + // There should not be a completion event if the store has + // not yet committed. assert(!storeQueue[store_idx].req->completionEvent); } + storeQueue[store_idx].req = NULL; --stores; @@ -879,36 +766,6 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) template <class Impl> void -LSQUnit<Impl>::dumpInsts() -{ - cprintf("Load store queue: Dumping instructions.\n"); - cprintf("Load queue size: %i\n", loads); - cprintf("Load queue: "); - - int load_idx = loadHead; - - while (load_idx != loadTail && loadQueue[load_idx]) { - cprintf("%#x ", loadQueue[load_idx]->readPC()); - - incrLdIdx(load_idx); - } - - cprintf("Store queue size: %i\n", stores); - cprintf("Store queue: "); - - int store_idx = storeHead; - - while (store_idx != storeTail && storeQueue[store_idx].inst) { - cprintf("%#x ", storeQueue[store_idx].inst->readPC()); - - incrStIdx(store_idx); - } - - cprintf("\n"); -} - -template <class Impl> -void LSQUnit<Impl>::completeStore(int store_idx) { assert(storeQueue[store_idx].inst); @@ -930,7 +787,9 @@ LSQUnit<Impl>::completeStore(int store_idx) iewStage->updateLSQNextCycle = true; } - DPRINTF(LSQUnit, "Store head idx:%i\n", storeHead); + DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " + "idx:%i\n", + storeQueue[store_idx].inst->seqNum, store_idx, storeHead); if (isStalled() && storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { @@ -943,6 +802,10 @@ LSQUnit<Impl>::completeStore(int store_idx) } storeQueue[store_idx].inst->setCompleted(); + + // Tell the checker we've completed this instruction. Some stores + // may get reported twice to the checker, but the checker can + // handle that case. if (cpu->checker) { cpu->checker->tick(storeQueue[store_idx].inst); } @@ -979,3 +842,33 @@ LSQUnit<Impl>::decrLdIdx(int &load_idx) if (--load_idx < 0) load_idx += LQEntries; } + +template <class Impl> +void +LSQUnit<Impl>::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + int load_idx = loadHead; + + while (load_idx != loadTail && loadQueue[load_idx]) { + cprintf("%#x ", loadQueue[load_idx]->readPC()); + + incrLdIdx(load_idx); + } + + cprintf("Store queue size: %i\n", stores); + cprintf("Store queue: "); + + int store_idx = storeHead; + + while (store_idx != storeTail && storeQueue[store_idx].inst) { + cprintf("%#x ", storeQueue[store_idx].inst->readPC()); + + incrStIdx(store_idx); + } + + cprintf("\n"); +} diff --git a/cpu/o3/mem_dep_unit.hh b/cpu/o3/mem_dep_unit.hh index 141e0fdc4..acbe08ec2 100644 --- a/cpu/o3/mem_dep_unit.hh +++ b/cpu/o3/mem_dep_unit.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -201,13 +201,6 @@ class MemDepUnit { static int memdep_erase; }; - struct ltMemDepEntry { - bool operator() (const MemDepEntryPtr &lhs, const MemDepEntryPtr &rhs) - { - return lhs->inst->seqNum < rhs->inst->seqNum; - } - }; - /** Finds the memory dependence entry in the hash map. */ inline MemDepEntryPtr &findInHash(const DynInstPtr &inst); diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh index 05a33685d..8b195baab 100644 --- a/cpu/o3/mem_dep_unit_impl.hh +++ b/cpu/o3/mem_dep_unit_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -141,12 +141,12 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst) std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry)); MemDepEntry::memdep_insert++; - // Add the instruction to the instruction list. instList[tid].push_back(inst); inst_entry->listIt = --(instList[tid].end()); - // Check the dependence predictor for any producing stores. + // Check any barriers and the dependence predictor for any + // producing stores. InstSeqNum producing_store; if (inst->isLoad() && loadBarrier) { producing_store = loadBarrierSN; @@ -181,7 +181,7 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst) moveToReady(inst_entry); } } else { - // Otherwise make the instruction dependent on the store. + // Otherwise make the instruction dependent on the store/barrier. DPRINTF(MemDepUnit, "Adding to dependency list; " "inst PC %#x is dependent on [sn:%lli].\n", inst->readPC(), producing_store); @@ -193,8 +193,6 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst) // Add this instruction to the list of dependents. store_entry->dependInsts.push_back(inst_entry); -// inst_entry->producingStore = store_entry; - if (inst->isLoad()) { ++conflictingLoads; } else { @@ -370,8 +368,6 @@ MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst) instList[tid].erase((*hash_it).second->listIt); -// (*hash_it).second->inst = NULL; - (*hash_it).second = NULL; memDepHash.erase(hash_it); @@ -416,7 +412,6 @@ MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst) if (!woken_inst->inst) { // Potentially removed mem dep entries could be on this list -// inst_entry->dependInsts[i] = NULL; continue; } @@ -429,7 +424,6 @@ MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst) } else { woken_inst->memDepReady = true; } -// inst_entry->dependInsts[i] = NULL; } inst_entry->dependInsts.clear(); @@ -468,13 +462,7 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num, assert(hash_it != memDepHash.end()); (*hash_it).second->squashed = true; -/* - for (int i = 0; i < (*hash_it).second->dependInsts.size(); ++i) { - (*hash_it).second->dependInsts[i] = NULL; - } - (*hash_it).second->inst = NULL; -*/ (*hash_it).second = NULL; memDepHash.erase(hash_it); diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh index dd2cb0c18..3f1a27bb5 100644 --- a/cpu/o3/rename.hh +++ b/cpu/o3/rename.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,15 +35,16 @@ #include "base/timebuf.hh" /** - * DefaultRename handles both single threaded and SMT rename. Its width is - * specified by the parameters; each cycle it tries to rename that many - * instructions. It holds onto the rename history of all instructions with - * destination registers, storing the arch. register, the new physical - * register, and the old physical register, to allow for undoing of mappings - * if squashing happens, or freeing up registers upon commit. Rename handles - * blocking if the ROB, IQ, or LSQ is going to be full. Rename also handles - * barriers, and does so by stalling on the instruction until the ROB is - * empty and there are no instructions in flight to the ROB. + * DefaultRename handles both single threaded and SMT rename. Its + * width is specified by the parameters; each cycle it tries to rename + * that many instructions. It holds onto the rename history of all + * instructions with destination registers, storing the + * arch. register, the new physical register, and the old physical + * register, to allow for undoing of mappings if squashing happens, or + * freeing up registers upon commit. Rename handles blocking if the + * ROB, IQ, or LSQ is going to be full. Rename also handles barriers, + * and does so by stalling on the instruction until the ROB is empty + * and there are no instructions in flight to the ROB. */ template<class Impl> class DefaultRename @@ -68,14 +69,15 @@ class DefaultRename // Typedefs from the ISA. typedef TheISA::RegIndex RegIndex; - // A deque is used to queue the instructions. Barrier insts must be - // added to the front of the deque, which is the only reason for using - // a deque instead of a queue. (Most other stages use a queue) + // A list is used to queue the instructions. Barrier insts must + // be added to the front of the list, which is the only reason for + // using a list instead of a queue. (Most other stages use a + // queue) typedef std::list<DynInstPtr> InstQueue; public: - /** Overall rename status. Used to determine if the CPU can deschedule - * itself due to a lack of activity. + /** Overall rename status. Used to determine if the CPU can + * deschedule itself due to a lack of activity. */ enum RenameStatus { Active, diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh index db4bb2ffe..081581c92 100644 --- a/cpu/o3/rename_impl.hh +++ b/cpu/o3/rename_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -209,17 +209,13 @@ template <class Impl> void DefaultRename<Impl>::initStage() { + // Grab the number of free entries directly from the stages. for (int tid=0; tid < numThreads; tid++) { freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid); freeEntries[tid].lsqEntries = iew_ptr->ldstQueue.numFreeEntries(tid); freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid); emptyROB[tid] = true; } - - // Clear these pointers so they are not accidentally used in - // non-initialization code. -// iew_ptr = NULL; -// commit_ptr = NULL; } template<class Impl> @@ -299,6 +295,7 @@ DefaultRename<Impl>::takeOverFrom() _status = Inactive; initStage(); + // Reset all state prior to taking over from the other CPU. for (int i=0; i< numThreads; i++) { renameStatus[i] = Idle; @@ -326,7 +323,7 @@ DefaultRename<Impl>::squash(unsigned tid) if (renameStatus[tid] == Blocked || renameStatus[tid] == Unblocking || renameStatus[tid] == SerializeStall) { -#if !FULL_SYSTEM +#if 0 // In syscall emulation, we can have both a block and a squash due // to a syscall in the same cycle. This would cause both signals to // be high. This shouldn't happen in full system. @@ -344,7 +341,7 @@ DefaultRename<Impl>::squash(unsigned tid) // Set the status to Squashing. renameStatus[tid] = Squashing; - // Clear the skid buffer in case it has any data in it. + // Squash any instructions from decode. unsigned squashCount = 0; for (int i=0; i<fromDecode->size; i++) { @@ -367,9 +364,6 @@ template <class Impl> void DefaultRename<Impl>::tick() { - // Rename will need to try to rename as many instructions as it - // has bandwidth, unless it is blocked. - wroteToTimeBuffer = false; blockThisCycle = false; @@ -454,8 +448,6 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid) } else if (renameStatus[tid] == Unblocking) { renameInsts(tid); -// ++renameUnblockCycles; - if (validInsts()) { // Add the current inputs to the skid buffer so they can be // reprocessed when this stage unblocks. @@ -575,7 +567,6 @@ DefaultRename<Impl>::renameInsts(unsigned tid) insts_to_rename.pop_front(); - //Use skidBuffer with oldest instructions if (renameStatus[tid] == Unblocking) { DPRINTF(Rename,"[tid:%u]: Removing [sn:%lli] PC:%#x from rename " "skidBuffer\n", @@ -711,10 +702,10 @@ void DefaultRename<Impl>::sortInsts() { int insts_from_decode = fromDecode->size; - +#ifdef DEBUG for (int i=0; i < numThreads; i++) assert(insts[i].empty()); - +#endif for (int i = 0; i < insts_from_decode; ++i) { DynInstPtr inst = fromDecode->insts[i]; insts[inst->threadNumber].push_back(inst); @@ -794,8 +785,8 @@ DefaultRename<Impl>::block(unsigned tid) wroteToTimeBuffer = true; } - // Rename can not go from SerializeStall to Blocked, otherwise it would - // not know to complete the serialize stall. + // Rename can not go from SerializeStall to Blocked, otherwise + // it would not know to complete the serialize stall. if (renameStatus[tid] != SerializeStall) { // Set status to Blocked. renameStatus[tid] = Blocked; @@ -835,15 +826,11 @@ DefaultRename<Impl>::doSquash(unsigned tid) InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; -//#if FULL_SYSTEM -// assert(!historyBuffer[tid].empty()); -//#else // After a syscall squashes everything, the history buffer may be empty // but the ROB may still be squashing instructions. if (historyBuffer[tid].empty()) { return; } -//#endif // FULL_SYSTEM // Go through the most recent instructions, undoing the mappings // they did and freeing up the registers. @@ -896,8 +883,8 @@ DefaultRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num, unsigned tid) hb_it != historyBuffer[tid].end() && (*hb_it).instSeqNum <= inst_seq_num) { - DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, sequence" - " number %i.\n", + DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, " + "[sn:%lli].\n", tid, (*hb_it).prevPhysReg, (*hb_it).instSeqNum); freeList->addReg((*hb_it).prevPhysReg); diff --git a/cpu/o3/rename_map.cc b/cpu/o3/rename_map.cc index 8ba632e65..fc59058a1 100644 --- a/cpu/o3/rename_map.cc +++ b/cpu/o3/rename_map.cc @@ -32,18 +32,12 @@ using namespace std; -// Todo: Consider making functions inline. Avoid having things that are -// using the zero register or misc registers from adding on the registers -// to the free list. Possibly remove the direct communication between -// this and the freelist. Considering making inline bool functions that -// determine if the register is a logical int, logical fp, physical int, -// physical fp, etc. +// @todo: Consider making inline bool functions that determine if the +// register is a logical int, logical fp, physical int, physical fp, +// etc. SimpleRenameMap::~SimpleRenameMap() { - // Delete the rename maps as they were allocated with new. - //delete [] intRenameMap; - //delete [] floatRenameMap; } void @@ -105,7 +99,8 @@ SimpleRenameMap::init(unsigned _numLogicalIntRegs, // Although the index refers purely to architected registers, because // the floating reg indices come after the integer reg indices, they // may exceed the size of a normal RegIndex (short). - for (PhysRegIndex index = numLogicalIntRegs; index < numLogicalRegs; ++index) + for (PhysRegIndex index = numLogicalIntRegs; + index < numLogicalRegs; ++index) { floatRenameMap[index].physical_reg = freg_idx++; } @@ -132,14 +127,10 @@ SimpleRenameMap::init(unsigned _numLogicalIntRegs, void SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr) { - //Setup the interface to the freelist. freeList = fl_ptr; } -// Don't allow this stage to fault; force that check to the rename stage. -// Simply ask to rename a logical register and get back a new physical -// register index. SimpleRenameMap::RenameInfo SimpleRenameMap::rename(RegIndex arch_reg) { @@ -152,13 +143,11 @@ SimpleRenameMap::rename(RegIndex arch_reg) // requested architected register. prev_reg = intRenameMap[arch_reg].physical_reg; - // If it's not referencing the zero register, then mark the register - // as not ready. + // If it's not referencing the zero register, then rename the + // register. if (arch_reg != intZeroReg) { - // Get a free physical register to rename to. renamed_reg = freeList->getIntReg(); - // Update the integer rename map. intRenameMap[arch_reg].physical_reg = renamed_reg; assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs); @@ -168,20 +157,15 @@ SimpleRenameMap::rename(RegIndex arch_reg) renamed_reg = intZeroReg; } } else if (arch_reg < numLogicalRegs) { - // Subtract off the base offset for floating point registers. -// arch_reg = arch_reg - numLogicalIntRegs; - // Record the current physical register that is renamed to the // requested architected register. prev_reg = floatRenameMap[arch_reg].physical_reg; - // If it's not referencing the zero register, then mark the register - // as not ready. + // If it's not referencing the zero register, then rename the + // register. if (arch_reg != floatZeroReg) { - // Get a free floating point register to rename to. renamed_reg = freeList->getFloatReg(); - // Update the floating point rename map. floatRenameMap[arch_reg].physical_reg = renamed_reg; assert(renamed_reg < numPhysicalRegs && @@ -194,10 +178,10 @@ SimpleRenameMap::rename(RegIndex arch_reg) // Subtract off the base offset for miscellaneous registers. arch_reg = arch_reg - numLogicalRegs; - // No renaming happens to the misc. registers. They are simply the - // registers that come after all the physical registers; thus - // take the base architected register and add the physical registers - // to it. + // No renaming happens to the misc. registers. They are + // simply the registers that come after all the physical + // registers; thus take the base architected register and add + // the physical registers to it. renamed_reg = arch_reg + numPhysicalRegs; // Set the previous register to the same register; mainly it must be @@ -211,17 +195,12 @@ SimpleRenameMap::rename(RegIndex arch_reg) return RenameInfo(renamed_reg, prev_reg); } -//Perhaps give this a pair as a return value, of the physical register -//and whether or not it's ready. PhysRegIndex SimpleRenameMap::lookup(RegIndex arch_reg) { if (arch_reg < numLogicalIntRegs) { return intRenameMap[arch_reg].physical_reg; } else if (arch_reg < numLogicalRegs) { - // Subtract off the base FP offset. -// arch_reg = arch_reg - numLogicalIntRegs; - return floatRenameMap[arch_reg].physical_reg; } else { // Subtract off the misc registers offset. @@ -233,51 +212,23 @@ SimpleRenameMap::lookup(RegIndex arch_reg) } } -// In this implementation the miscellaneous registers do not actually rename, -// so this function does not allow you to try to change their mappings. void SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg) { + // In this implementation the miscellaneous registers do not + // actually rename, so this function does not allow you to try to + // change their mappings. if (arch_reg < numLogicalIntRegs) { DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n", (int)arch_reg, renamed_reg); intRenameMap[arch_reg].physical_reg = renamed_reg; } else if (arch_reg < numLogicalIntRegs + numLogicalFloatRegs) { - - DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n", (int)arch_reg - numLogicalIntRegs, renamed_reg); floatRenameMap[arch_reg].physical_reg = renamed_reg; } - - //assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); -} - -void -SimpleRenameMap::squash(vector<RegIndex> freed_regs, - vector<UnmapInfo> unmaps) -{ - panic("Not sure this function should be called."); - - // Not sure the rename map should be able to access the free list - // like this. - while (!freed_regs.empty()) { - RegIndex free_register = freed_regs.back(); - - if (free_register < numPhysicalIntRegs) { - freeList->addIntReg(free_register); - } else { - // Subtract off the base FP dependence tag. - free_register = free_register - numPhysicalIntRegs; - freeList->addFloatReg(free_register); - } - - freed_regs.pop_back(); - } - - // Take unmap info and roll back the rename map. } int diff --git a/cpu/o3/rename_map.hh b/cpu/o3/rename_map.hh index 3ecbe45c3..d7e49ae83 100644 --- a/cpu/o3/rename_map.hh +++ b/cpu/o3/rename_map.hh @@ -101,9 +101,6 @@ class SimpleRenameMap */ void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg); - void squash(std::vector<RegIndex> freed_regs, - std::vector<UnmapInfo> unmaps); - int numFreeEntries(); private: @@ -153,7 +150,7 @@ class SimpleRenameMap }; //Change this to private - public: + private: /** Integer rename map. */ std::vector<RenameEntry> intRenameMap; diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh index 0748850ea..e05eebe5a 100644 --- a/cpu/o3/rob.hh +++ b/cpu/o3/rob.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,9 +53,7 @@ class ROB enum Status { Running, Idle, - ROBSquashing, - DcacheMissStall, - DcacheMissComplete + ROBSquashing }; /** SMT ROB Sharing Policy */ @@ -112,7 +110,7 @@ class ROB * no guarantee as to the return value if the ROB is empty. * @retval Pointer to the DynInst that is at the head of the ROB. */ - DynInstPtr readHeadInst(); +// DynInstPtr readHeadInst(); /** Returns a pointer to the head instruction of a specific thread within * the ROB. @@ -124,7 +122,7 @@ class ROB * no guarantee as to the return value if the ROB is empty. * @retval Pointer to the DynInst that is at the tail of the ROB. */ - DynInstPtr readTailInst(); +// DynInstPtr readTailInst(); /** Returns a pointer to the tail instruction of a specific thread within * the ROB. @@ -133,7 +131,7 @@ class ROB DynInstPtr readTailInst(unsigned tid); /** Retires the head instruction, removing it from the ROB. */ - void retireHead(); +// void retireHead(); /** Retires the head instruction of a specific thread, removing it from the * ROB. @@ -141,7 +139,7 @@ class ROB void retireHead(unsigned tid); /** Is the oldest instruction across all threads ready. */ - bool isHeadReady(); +// bool isHeadReady(); /** Is the oldest instruction across a particular thread ready. */ bool isHeadReady(unsigned tid); @@ -200,35 +198,35 @@ class ROB void updateTail(); /** Reads the PC of the oldest head instruction. */ - uint64_t readHeadPC(); +// uint64_t readHeadPC(); /** Reads the PC of the head instruction of a specific thread. */ - uint64_t readHeadPC(unsigned tid); +// uint64_t readHeadPC(unsigned tid); /** Reads the next PC of the oldest head instruction. */ - uint64_t readHeadNextPC(); +// uint64_t readHeadNextPC(); /** Reads the next PC of the head instruction of a specific thread. */ - uint64_t readHeadNextPC(unsigned tid); +// uint64_t readHeadNextPC(unsigned tid); /** Reads the sequence number of the oldest head instruction. */ - InstSeqNum readHeadSeqNum(); +// InstSeqNum readHeadSeqNum(); /** Reads the sequence number of the head instruction of a specific thread. */ - InstSeqNum readHeadSeqNum(unsigned tid); +// InstSeqNum readHeadSeqNum(unsigned tid); /** Reads the PC of the youngest tail instruction. */ - uint64_t readTailPC(); +// uint64_t readTailPC(); /** Reads the PC of the tail instruction of a specific thread. */ - uint64_t readTailPC(unsigned tid); +// uint64_t readTailPC(unsigned tid); /** Reads the sequence number of the youngest tail instruction. */ - InstSeqNum readTailSeqNum(); +// InstSeqNum readTailSeqNum(); /** Reads the sequence number of tail instruction of a specific thread. */ - InstSeqNum readTailSeqNum(unsigned tid); +// InstSeqNum readTailSeqNum(unsigned tid); /** Checks if the ROB is still in the process of squashing instructions. * @retval Whether or not the ROB is done squashing. diff --git a/cpu/o3/rob_impl.hh b/cpu/o3/rob_impl.hh index 02a4bfbee..25e0c80fd 100644 --- a/cpu/o3/rob_impl.hh +++ b/cpu/o3/rob_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -201,20 +201,15 @@ template <class Impl> void ROB<Impl>::insertInst(DynInstPtr &inst) { - // Make sure we have the right number of instructions. //assert(numInstsInROB == countInsts()); - - // Make sure the instruction is valid. assert(inst); DPRINTF(ROB, "Adding inst PC %#x to the ROB.\n", inst->readPC()); - // If the ROB is full then exit. assert(numInstsInROB != numEntries); int tid = inst->threadNumber; - // Place into ROB instList[tid].push_back(inst); //Set Up head iterator if this is the 1st instruction in the ROB @@ -228,10 +223,8 @@ ROB<Impl>::insertInst(DynInstPtr &inst) tail = instList[tid].end(); tail--; - // Mark as set in ROB inst->setInROB(); - // Increment ROB count ++numInstsInROB; ++threadEntries[tid]; @@ -242,6 +235,7 @@ ROB<Impl>::insertInst(DynInstPtr &inst) // Whatever calls this function needs to ensure that it properly frees up // registers prior to this function. +/* template <class Impl> void ROB<Impl>::retireHead() @@ -249,7 +243,6 @@ ROB<Impl>::retireHead() //assert(numInstsInROB == countInsts()); assert(numInstsInROB > 0); - // Get the head ROB instruction's TID. int tid = (*head)->threadNumber; retireHead(tid); @@ -258,6 +251,7 @@ ROB<Impl>::retireHead() tail = instList[tid].end(); } } +*/ template <class Impl> void @@ -271,18 +265,15 @@ ROB<Impl>::retireHead(unsigned tid) DynInstPtr head_inst = (*head_it); - // Make certain this can retire. assert(head_inst->readyToCommit()); DPRINTF(ROB, "[tid:%u]: Retiring head instruction, " "instruction PC %#x,[sn:%lli]\n", tid, head_inst->readPC(), head_inst->seqNum); - // Keep track of how many instructions are in the ROB. --numInstsInROB; --threadEntries[tid]; - //Mark DynInstFlags head_inst->removeInROB(); head_inst->setCommitted(); @@ -291,12 +282,12 @@ ROB<Impl>::retireHead(unsigned tid) //Update "Global" Head of ROB updateHead(); - // A special case is needed if the instruction being retired is the - // only instruction in the ROB; otherwise the tail iterator will become - // invalidated. + // @todo: A special case is needed if the instruction being + // retired is the only instruction in the ROB; otherwise the tail + // iterator will become invalidated. cpu->removeFrontInst(head_inst); } - +/* template <class Impl> bool ROB<Impl>::isHeadReady() @@ -307,7 +298,7 @@ ROB<Impl>::isHeadReady() return false; } - +*/ template <class Impl> bool ROB<Impl>::isHeadReady(unsigned tid) @@ -537,7 +528,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid) doSquash(tid); } } - +/* template <class Impl> typename Impl::DynInstPtr ROB<Impl>::readHeadInst() @@ -549,7 +540,7 @@ ROB<Impl>::readHeadInst() return dummyInst; } } - +*/ template <class Impl> typename Impl::DynInstPtr ROB<Impl>::readHeadInst(unsigned tid) @@ -564,7 +555,7 @@ ROB<Impl>::readHeadInst(unsigned tid) return dummyInst; } } - +/* template <class Impl> uint64_t ROB<Impl>::readHeadPC() @@ -608,7 +599,6 @@ ROB<Impl>::readHeadNextPC(unsigned tid) return (*head_thread)->readNextPC(); } - template <class Impl> InstSeqNum ROB<Impl>::readHeadSeqNum() @@ -637,7 +627,7 @@ ROB<Impl>::readTailInst() return (*tail); } - +*/ template <class Impl> typename Impl::DynInstPtr ROB<Impl>::readTailInst(unsigned tid) @@ -650,7 +640,7 @@ ROB<Impl>::readTailInst(unsigned tid) return *tail_thread; } - +/* template <class Impl> uint64_t ROB<Impl>::readTailPC() @@ -698,4 +688,4 @@ ROB<Impl>::readTailSeqNum(unsigned tid) return (*tail_thread)->seqNum; } - +*/ diff --git a/cpu/o3/scoreboard.cc b/cpu/o3/scoreboard.cc index 87b0aee94..b0e433620 100644 --- a/cpu/o3/scoreboard.cc +++ b/cpu/o3/scoreboard.cc @@ -99,6 +99,7 @@ Scoreboard::unsetReg(PhysRegIndex ready_reg) if (ready_reg == zeroRegIdx || ready_reg == (zeroRegIdx + numPhysicalIntRegs)) { // Don't do anything if int or fp zero reg. + return; } regScoreBoard[ready_reg] = 0; diff --git a/cpu/o3/store_set.cc b/cpu/o3/store_set.cc index a685646f3..0c957c8c7 100644 --- a/cpu/o3/store_set.cc +++ b/cpu/o3/store_set.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -278,11 +278,6 @@ StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) void StoreSet::squash(InstSeqNum squashed_num, unsigned tid) { - // Not really sure how to do this well. - // Generally this is small enough that it should be okay; short circuit - // evaluation should take care of invalid entries. - // Maybe keep a list of valid LFST's? Really ugly either way... - DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n", squashed_num); diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh index 17719bdeb..2c9788e4b 100644 --- a/cpu/o3/thread_state.hh +++ b/cpu/o3/thread_state.hh @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #ifndef __CPU_O3_THREAD_STATE_HH__ #define __CPU_O3_THREAD_STATE_HH__ @@ -15,27 +42,17 @@ class EndQuiesceEvent; class FunctionProfile; class ProfileNode; #else -class Process; class FunctionalMemory; +class Process; #endif -// In the new CPU case this may be quite small...It depends on what I define -// ThreadState to be. Currently it's only the state that exists within -// ExecContext basically. Leaves the interface and manipulation up to the -// CPU. Not sure this is useful/flexible...probably can be if I can avoid -// including state here that parts of the pipeline can't modify directly, -// or at least don't let them. The only problem is for state that's needed -// per thread, per structure. I.e. rename table, memreqs. -// On the other hand, it might be nice to not have to pay the extra pointer -// lookup to get frequently used state such as a memreq (that isn't used much -// elsewhere)... - -// Maybe this ozone thread state should only really have committed state? -// I need to think about why I'm using this and what it's useful for. Clearly -// has benefits for SMT; basically serves same use as CPUExecContext. -// Makes the ExecContext proxy easier. Gives organization/central access point -// to state of a thread that can be accessed normally (i.e. not in-flight -// stuff within a OoO processor). Does this need an XC proxy within it? +/** + * Class that has various thread state, such as the status, the + * current instruction being processed, whether or not the thread has + * a trap pending or is being externally updated, the ExecContext + * proxy pointer, etc. It also handles anything related to a specific + * thread's process, such as syscalls and checking valid addresses. + */ template <class Impl> struct O3ThreadState : public ThreadState { typedef ExecContext::Status Status; @@ -43,7 +60,7 @@ struct O3ThreadState : public ThreadState { Status _status; - // Current instruction? + // Current instruction TheISA::MachInst inst; private: FullCPU *cpu; @@ -80,51 +97,11 @@ struct O3ThreadState : public ThreadState { void setStatus(Status new_status) { _status = new_status; } #if !FULL_SYSTEM - - Fault dummyTranslation(MemReqPtr &req) - { -#if 0 - assert((req->vaddr >> 48 & 0xffff) == 0); -#endif - - // put the asid in the upper 16 bits of the paddr - req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); - req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; - return NoFault; - } - Fault translateInstReq(MemReqPtr &req) - { - return dummyTranslation(req); - } - Fault translateDataReadReq(MemReqPtr &req) - { - return dummyTranslation(req); - } - Fault translateDataWriteReq(MemReqPtr &req) - { - return dummyTranslation(req); - } - bool validInstAddr(Addr addr) { return process->validInstAddr(addr); } bool validDataAddr(Addr addr) { return process->validDataAddr(addr); } -#else - Fault translateInstReq(MemReqPtr &req) - { - return cpu->itb->translate(req); - } - - Fault translateDataReadReq(MemReqPtr &req) - { - return cpu->dtb->translate(req, false); - } - - Fault translateDataWriteReq(MemReqPtr &req) - { - return cpu->dtb->translate(req, true); - } #endif bool misspeculating() { return false; } |