summaryrefslogtreecommitdiff
path: root/cpu
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2006-05-19 15:53:17 -0400
committerKevin Lim <ktlim@umich.edu>2006-05-19 15:53:17 -0400
commite3d5588ca70c88318c1e41e438102034c92c561e (patch)
tree75886333b22b474893e57254fa00df7a4055df68 /cpu
parent1a6f21b8d23494752cdc9d3a8d1c1a2adfd85ccf (diff)
downloadgem5-e3d5588ca70c88318c1e41e438102034c92c561e.tar.xz
O3 code update/cleanup.
cpu/o3/commit_impl.hh: O3 code update/cleanup. Fetch fault code no longer needed (see previous checkin). --HG-- extra : convert_revision : f602e7f978e19b8900dce482f38f9c7a195e94da
Diffstat (limited to 'cpu')
-rw-r--r--cpu/o3/2bit_local_pred.cc2
-rw-r--r--cpu/o3/2bit_local_pred.hh2
-rw-r--r--cpu/o3/alpha_cpu.hh18
-rw-r--r--cpu/o3/bpred_unit.cc2
-rw-r--r--cpu/o3/bpred_unit.hh7
-rw-r--r--cpu/o3/bpred_unit_impl.hh6
-rw-r--r--cpu/o3/comm.hh6
-rw-r--r--cpu/o3/commit.hh5
-rw-r--r--cpu/o3/commit_impl.hh59
-rw-r--r--cpu/o3/decode.hh12
-rw-r--r--cpu/o3/decode_impl.hh14
-rw-r--r--cpu/o3/fetch.hh31
-rw-r--r--cpu/o3/fetch_impl.hh29
-rw-r--r--cpu/o3/lsq.hh65
-rw-r--r--cpu/o3/lsq_impl.hh138
-rw-r--r--cpu/o3/lsq_unit.hh218
-rw-r--r--cpu/o3/lsq_unit_impl.hh317
-rw-r--r--cpu/o3/mem_dep_unit.hh9
-rw-r--r--cpu/o3/mem_dep_unit_impl.hh20
-rw-r--r--cpu/o3/rename.hh32
-rw-r--r--cpu/o3/rename_impl.hh35
-rw-r--r--cpu/o3/rename_map.cc81
-rw-r--r--cpu/o3/rename_map.hh5
-rw-r--r--cpu/o3/rob.hh34
-rw-r--r--cpu/o3/rob_impl.hh38
-rw-r--r--cpu/o3/scoreboard.cc1
-rw-r--r--cpu/o3/store_set.cc7
-rw-r--r--cpu/o3/thread_state.hh95
28 files changed, 381 insertions, 907 deletions
diff --git a/cpu/o3/2bit_local_pred.cc b/cpu/o3/2bit_local_pred.cc
index eab98531d..c3fb2fdb8 100644
--- a/cpu/o3/2bit_local_pred.cc
+++ b/cpu/o3/2bit_local_pred.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/cpu/o3/2bit_local_pred.hh b/cpu/o3/2bit_local_pred.hh
index 0dfe53819..cd65978ca 100644
--- a/cpu/o3/2bit_local_pred.hh
+++ b/cpu/o3/2bit_local_pred.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/cpu/o3/alpha_cpu.hh b/cpu/o3/alpha_cpu.hh
index f70793aaa..78ad5f7d8 100644
--- a/cpu/o3/alpha_cpu.hh
+++ b/cpu/o3/alpha_cpu.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -87,7 +87,8 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual Status status() const { return thread->status(); }
- virtual void setStatus(Status new_status) { thread->setStatus(new_status); }
+ virtual void setStatus(Status new_status)
+ { thread->setStatus(new_status); }
/// Set the status to Active. Optional delay indicates number of
/// cycles to wait before beginning execution.
@@ -168,12 +169,15 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
// @todo: Figure out where these store cond failures should go.
- virtual unsigned readStCondFailures() { return thread->storeCondFailures; }
+ virtual unsigned readStCondFailures()
+ { return thread->storeCondFailures; }
- virtual void setStCondFailures(unsigned sc_failures) { thread->storeCondFailures = sc_failures; }
+ virtual void setStCondFailures(unsigned sc_failures)
+ { thread->storeCondFailures = sc_failures; }
#if FULL_SYSTEM
- virtual bool inPalMode() { return TheISA::PcPAL(cpu->readPC(thread->tid)); }
+ virtual bool inPalMode()
+ { return TheISA::PcPAL(cpu->readPC(thread->tid)); }
#endif
// Only really makes sense for old CPU model. Lots of code
@@ -194,10 +198,6 @@ class AlphaFullCPU : public FullO3CPU<Impl>
#endif
};
-// friend class AlphaXC;
-
-// std::vector<ExecContext *> xcProxies;
-
#if FULL_SYSTEM
/** ITB pointer. */
AlphaITB *itb;
diff --git a/cpu/o3/bpred_unit.cc b/cpu/o3/bpred_unit.cc
index a78dcf463..92344111f 100644
--- a/cpu/o3/bpred_unit.cc
+++ b/cpu/o3/bpred_unit.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/cpu/o3/bpred_unit.hh b/cpu/o3/bpred_unit.hh
index ee7ffc183..b7814b2e9 100644
--- a/cpu/o3/bpred_unit.hh
+++ b/cpu/o3/bpred_unit.hh
@@ -43,12 +43,7 @@
/**
* Basically a wrapper class to hold both the branch predictor
- * and the BTB. Right now I'm unsure of the implementation; it would
- * be nicer to have something closer to the CPUPolicy or the Impl where
- * this is just typedefs, but it forces the upper level stages to be
- * aware of the constructors of the BP and the BTB. The nicer thing
- * to do is have this templated on the Impl, accept the usual Params
- * object, and be able to call the constructors on the BP and BTB.
+ * and the BTB.
*/
template<class Impl>
class TwobitBPredUnit
diff --git a/cpu/o3/bpred_unit_impl.hh b/cpu/o3/bpred_unit_impl.hh
index d20b31e55..c37df606b 100644
--- a/cpu/o3/bpred_unit_impl.hh
+++ b/cpu/o3/bpred_unit_impl.hh
@@ -26,13 +26,13 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <list>
+#include <vector>
+
#include "base/trace.hh"
#include "base/traceflags.hh"
#include "cpu/o3/bpred_unit.hh"
-#include <vector>
-#include <list>
-
using namespace std;
template<class Impl>
diff --git a/cpu/o3/comm.hh b/cpu/o3/comm.hh
index 1a8f394ca..c36c58d3d 100644
--- a/cpu/o3/comm.hh
+++ b/cpu/o3/comm.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -169,10 +169,6 @@ struct TimeBufStruct {
bool commitInsts;
InstSeqNum squashSeqNum;
- // Extra bit of information so that the LDSTQ only updates when it
- // needs to.
- bool commitIsLoad;
-
// Communication specifically to the IQ to tell the IQ that it can
// schedule a non-speculative instruction.
InstSeqNum nonSpecSeqNum;
diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh
index 73eccd2b0..66abf8dc6 100644
--- a/cpu/o3/commit.hh
+++ b/cpu/o3/commit.hh
@@ -30,10 +30,10 @@
#define __CPU_O3_COMMIT_HH__
#include "arch/faults.hh"
-#include "cpu/inst_seq.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/exetrace.hh"
+#include "cpu/inst_seq.hh"
#include "mem/memory_interface.hh"
template <class>
@@ -59,8 +59,7 @@ class O3ThreadState;
* squashing instruction's sequence number, and only broadcasting a
* redirect if it corresponds to an older instruction. Commit also
* supports multiple cycle squashing, to model a ROB that can only
- * remove a certain number of instructions per cycle. Eventually traps
- * and interrupts will most likely be handled here as well.
+ * remove a certain number of instructions per cycle.
*/
template<class Impl>
class DefaultCommit
diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh
index 170f5b01f..346a8bc1c 100644
--- a/cpu/o3/commit_impl.hh
+++ b/cpu/o3/commit_impl.hh
@@ -27,12 +27,7 @@
*/
#include <algorithm>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <iomanip>
-#include <stdio.h>
-#include <string.h>
+#include <string>
#include "base/loader/symtab.hh"
#include "base/timebuf.hh"
@@ -835,58 +830,6 @@ DefaultCommit<Impl>::commitInsts()
unsigned num_committed = 0;
DynInstPtr head_inst;
-#if FULL_SYSTEM
- // Not the best way to check if the front end is empty, but it should
- // work.
- // @todo: Try to avoid directly accessing fetch.
- if (commitStatus[0] == FetchTrapPending && rob->isEmpty()) {
- DPRINTF(Commit, "Fault from fetch is pending.\n");
-
- fetchTrapWait++;
- if (fetchTrapWait > 10000000) {
- panic("Fetch trap has been pending for a long time!");
- }
- if (fetchFaultTick > curTick) {
- DPRINTF(Commit, "Not enough cycles since fault, fault will "
- "happen on %lli\n",
- fetchFaultTick);
- cpu->activityThisCycle();
- return;
- } else if (iewStage->hasStoresToWB()) {
- DPRINTF(Commit, "IEW still has stores to WB. Waiting until "
- "they are completed. fetchTrapWait:%i\n",
- fetchTrapWait);
- cpu->activityThisCycle();
- return;
- } else if (cpu->inPalMode(readPC())) {
- DPRINTF(Commit, "In pal mode right now. fetchTrapWait:%i\n",
- fetchTrapWait);
- return;
- } else if (fetchStage->getYoungestSN() > youngestSeqNum[0]) {
- DPRINTF(Commit, "Waiting for front end to drain. fetchTrapWait:%i\n",
- fetchTrapWait);
- return;
- }
- fetchTrapWait = 0;
- DPRINTF(Commit, "ROB is empty, handling fetch trap.\n");
-
- assert(!thread[0]->inSyscall);
-
- thread[0]->inSyscall = true;
-
- // Consider holding onto the trap and waiting until the trap event
- // happens for this to be executed.
- cpu->trap(fetchFault, 0);
-
- // Exit state update mode to avoid accidental updating.
- thread[0]->inSyscall = false;
-
- commitStatus[0] = TrapPending;
- // Set it up so that we squash next cycle
- trapSquash[0] = true;
- return;
- }
-#endif
// Commit as many instructions as possible until the commit bandwidth
// limit is reached, or it becomes impossible to commit any more.
diff --git a/cpu/o3/decode.hh b/cpu/o3/decode.hh
index 3f3f68247..3035b3387 100644
--- a/cpu/o3/decode.hh
+++ b/cpu/o3/decode.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,11 +35,11 @@
#include "base/timebuf.hh"
/**
- * DefaultDecode class handles both single threaded and SMT decode. Its width is
- * specified by the parameters; each cycles it tries to decode that many
- * instructions. Because instructions are actually decoded when the StaticInst
- * is created, this stage does not do much other than check any PC-relative
- * branches.
+ * DefaultDecode class handles both single threaded and SMT
+ * decode. Its width is specified by the parameters; each cycles it
+ * tries to decode that many instructions. Because instructions are
+ * actually decoded when the StaticInst is created, this stage does
+ * not do much other than check any PC-relative branches.
*/
template<class Impl>
class DefaultDecode
diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh
index a419a8932..2ed7ec6fc 100644
--- a/cpu/o3/decode_impl.hh
+++ b/cpu/o3/decode_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,7 +39,6 @@ DefaultDecode<Impl>::DefaultDecode(Params *params)
decodeWidth(params->decodeWidth),
numThreads(params->numberOfThreads)
{
- DPRINTF(Decode, "decodeWidth=%i.\n", decodeWidth);
_status = Inactive;
for (int i = 0; i < numThreads; ++i) {
@@ -249,8 +248,6 @@ template<class Impl>
bool
DefaultDecode<Impl>::unblock(unsigned tid)
{
- DPRINTF(Decode, "[tid:%u]: Trying to unblock.\n", tid);
-
// Decode is done unblocking only if the skid buffer is empty.
if (skidBuffer[tid].empty()) {
DPRINTF(Decode, "[tid:%u]: Done unblocking.\n", tid);
@@ -261,6 +258,8 @@ DefaultDecode<Impl>::unblock(unsigned tid)
return true;
}
+ DPRINTF(Decode, "[tid:%u]: Currently unblocking.\n", tid);
+
return false;
}
@@ -318,6 +317,7 @@ DefaultDecode<Impl>::squash(unsigned tid)
// In syscall emulation, we can have both a block and a squash due
// to a syscall in the same cycle. This would cause both signals to
// be high. This shouldn't happen in full system.
+ // @todo: Determine if this still happens.
if (toFetch->decodeBlock[tid]) {
toFetch->decodeBlock[tid] = 0;
} else {
@@ -372,7 +372,7 @@ DefaultDecode<Impl>::skidInsert(unsigned tid)
skidBuffer[tid].push(inst);
}
- // Eventually need to enforce this by not letting a thread
+ // @todo: Eventually need to enforce this by not letting a thread
// fetch past its skidbuffer
assert(skidBuffer[tid].size() <= skidBufferMax);
}
@@ -436,10 +436,10 @@ void
DefaultDecode<Impl>::sortInsts()
{
int insts_from_fetch = fromFetch->size;
-
+#ifdef DEBUG
for (int i=0; i < numThreads; i++)
assert(insts[i].empty());
-
+#endif
for (int i = 0; i < insts_from_fetch; ++i) {
insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
}
diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh
index b03d4afe3..3fcfdc3a1 100644
--- a/cpu/o3/fetch.hh
+++ b/cpu/o3/fetch.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,12 +38,12 @@
class Sampler;
/**
- * DefaultFetch class handles both single threaded and SMT fetch. Its width is
- * specified by the parameters; each cycle it tries to fetch that many
- * instructions. It supports using a branch predictor to predict direction and
- * targets.
- * It supports the idling functionalitiy of the CPU by indicating to the CPU
- * when it is active and inactive.
+ * DefaultFetch class handles both single threaded and SMT fetch. Its
+ * width is specified by the parameters; each cycle it tries to fetch
+ * that many instructions. It supports using a branch predictor to
+ * predict direction and targets.
+ * It supports the idling functionalitiy of the CPU by indicating to
+ * the CPU when it is active and inactive.
*/
template <class Impl>
class DefaultFetch
@@ -66,8 +66,8 @@ class DefaultFetch
typedef TheISA::ExtMachInst ExtMachInst;
public:
- /** Overall fetch status. Used to determine if the CPU can deschedule itsef
- * due to a lack of activity.
+ /** Overall fetch status. Used to determine if the CPU can
+ * deschedule itsef due to a lack of activity.
*/
enum FetchStatus {
Active,
@@ -174,13 +174,13 @@ class DefaultFetch
void wakeFromQuiesce();
private:
- /** Changes the status of this stage to active, and indicates this to the
- * CPU.
+ /** Changes the status of this stage to active, and indicates this
+ * to the CPU.
*/
inline void switchToActive();
- /** Changes the status of this stage to inactive, and indicates this to the
- * CPU.
+ /** Changes the status of this stage to inactive, and indicates
+ * this to the CPU.
*/
inline void switchToInactive();
@@ -373,11 +373,6 @@ class DefaultFetch
bool switchedOut;
- public:
- InstSeqNum &getYoungestSN() { return youngestSN; }
- private:
- InstSeqNum youngestSN;
-
#if !FULL_SYSTEM
/** Page table pointer. */
// PageTable *pTable;
diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh
index 523719945..1c5e508f6 100644
--- a/cpu/o3/fetch_impl.hh
+++ b/cpu/o3/fetch_impl.hh
@@ -938,10 +938,6 @@ DefaultFetch<Impl>::fetch(bool &status_change)
DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
"decode.\n",tid);
- //////////////////////////
- // Fetch first instruction
- //////////////////////////
-
// Need to keep track of whether or not a predicted branch
// ended this fetch block.
bool predicted_branch = false;
@@ -1004,7 +1000,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetch_PC = next_PC;
if (instruction->isQuiesce()) {
- warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
+ warn("%lli: Quiesce instruction encountered, halting fetch!",
+ curTick);
fetchStatus[tid] = QuiescePending;
++numInst;
status_change = true;
@@ -1022,24 +1019,20 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// Now that fetching is completed, update the PC to signify what the next
// cycle will be.
if (fault == NoFault) {
-
DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
-
PC[tid] = next_PC;
nextPC[tid] = next_PC + instSize;
} else {
- // If the issue was an icache miss, then we can just return and
- // wait until it is handled.
+ // We shouldn't be in an icache miss and also have a fault (an ITB
+ // miss)
if (fetchStatus[tid] == IcacheMissStall) {
panic("Fetch should have exited prior to this!");
}
- // Handle the fault.
- // This stage will not be able to continue until all the ROB
- // slots are empty, at which point the fault can be handled.
- // The only other way it can wake up is if a squash comes along
- // and changes the PC.
+ // Send the fault to commit. This thread will not do anything
+ // until commit handles the fault. The only other way it can
+ // wake up is if a squash comes along and changes the PC.
#if FULL_SYSTEM
assert(numInst != fetchWidth);
// Get a sequence number.
@@ -1067,20 +1060,12 @@ DefaultFetch<Impl>::fetch(bool &status_change)
toDecode->insts[numInst] = instruction;
toDecode->size++;
- // Tell the commit stage the fault we had.
-// toDecode->fetchFault = fault;
-// toDecode->fetchFaultSN = cpu->globalSeqNum;
-
DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid);
fetchStatus[tid] = TrapPending;
status_change = true;
warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
-// cpu->trap(fault);
- // Send a signal to the ROB indicating that there's a trap from the
- // fetch stage that needs to be handled. Need to indicate that
- // there's a fault, and the fault type.
#else // !FULL_SYSTEM
fatal("fault (%d) detected @ PC %08p", fault, PC[tid]);
#endif // FULL_SYSTEM
diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh
index d5f893e57..a1eeccbe7 100644
--- a/cpu/o3/lsq.hh
+++ b/cpu/o3/lsq.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,10 +32,9 @@
#include <map>
#include <queue>
-#include "base/hashmap.hh"
#include "config/full_system.hh"
#include "cpu/inst_seq.hh"
-#include "cpu/o3/cpu_policy.hh"
+//#include "cpu/o3/cpu_policy.hh"
#include "cpu/o3/lsq_unit.hh"
#include "mem/mem_interface.hh"
//#include "mem/page_table.hh"
@@ -85,7 +84,8 @@ class LSQ {
/** Ticks the LSQ. */
void tick();
/** Ticks a specific LSQ Unit. */
- void tick(unsigned tid);
+ void tick(unsigned tid)
+ { thread[tid].tick(); }
/** Inserts a load into the LSQ. */
void insertLoad(DynInstPtr &load_inst);
@@ -95,18 +95,23 @@ class LSQ {
/** Executes a load. */
Fault executeLoad(DynInstPtr &inst);
- Fault executeLoad(int lq_idx, unsigned tid);
+ Fault executeLoad(int lq_idx, unsigned tid)
+ { return thread[tid].executeLoad(lq_idx); }
+
/** Executes a store. */
Fault executeStore(DynInstPtr &inst);
/**
* Commits loads up until the given sequence number for a specific thread.
*/
- void commitLoads(InstSeqNum &youngest_inst, unsigned tid);
+ void commitLoads(InstSeqNum &youngest_inst, unsigned tid)
+ { thread[tid].commitLoads(youngest_inst); }
+
/**
* Commits stores up until the given sequence number for a specific thread.
*/
- void commitStores(InstSeqNum &youngest_inst, unsigned tid);
+ void commitStores(InstSeqNum &youngest_inst, unsigned tid)
+ { thread[tid].commitStores(youngest_inst); }
/**
* Attempts to write back stores until all cache ports are used or the
@@ -119,7 +124,8 @@ class LSQ {
/**
* Squash instructions from a thread until the specified sequence number.
*/
- void squash(const InstSeqNum &squashed_num, unsigned tid);
+ void squash(const InstSeqNum &squashed_num, unsigned tid)
+ { thread[tid].squash(squashed_num); }
/** Returns whether or not there was a memory ordering violation. */
bool violation();
@@ -127,12 +133,14 @@ class LSQ {
* Returns whether or not there was a memory ordering violation for a
* specific thread.
*/
- bool violation(unsigned tid);
+ bool violation(unsigned tid)
+ { return thread[tid].violation(); }
/** Returns if a load is blocked due to the memory system for a specific
* thread.
*/
- bool loadBlocked(unsigned tid);
+ bool loadBlocked(unsigned tid)
+ { return thread[tid].loadBlocked(); }
bool isLoadBlockedHandled(unsigned tid)
{ return thread[tid].isLoadBlockedHandled(); }
@@ -141,10 +149,13 @@ class LSQ {
{ thread[tid].setLoadBlockedHandled(); }
/** Gets the instruction that caused the memory ordering violation. */
- DynInstPtr getMemDepViolator(unsigned tid);
+ DynInstPtr getMemDepViolator(unsigned tid)
+ { return thread[tid].getMemDepViolator(); }
/** Returns the head index of the load queue for a specific thread. */
- int getLoadHead(unsigned tid);
+ int getLoadHead(unsigned tid)
+ { return thread[tid].getLoadHead(); }
+
/** Returns the sequence number of the head of the load queue. */
InstSeqNum getLoadHeadSeqNum(unsigned tid)
{
@@ -152,7 +163,9 @@ class LSQ {
}
/** Returns the head index of the store queue. */
- int getStoreHead(unsigned tid);
+ int getStoreHead(unsigned tid)
+ { return thread[tid].getStoreHead(); }
+
/** Returns the sequence number of the head of the store queue. */
InstSeqNum getStoreHeadSeqNum(unsigned tid)
{
@@ -162,22 +175,26 @@ class LSQ {
/** Returns the number of instructions in all of the queues. */
int getCount();
/** Returns the number of instructions in the queues of one thread. */
- int getCount(unsigned tid);
+ int getCount(unsigned tid)
+ { return thread[tid].getCount(); }
/** Returns the total number of loads in the load queue. */
int numLoads();
/** Returns the total number of loads for a single thread. */
- int numLoads(unsigned tid);
+ int numLoads(unsigned tid)
+ { return thread[tid].numLoads(); }
/** Returns the total number of stores in the store queue. */
int numStores();
/** Returns the total number of stores for a single thread. */
- int numStores(unsigned tid);
+ int numStores(unsigned tid)
+ { return thread[tid].numStores(); }
/** Returns the total number of loads that are ready. */
int numLoadsReady();
/** Returns the number of loads that are ready for a single thread. */
- int numLoadsReady(unsigned tid);
+ int numLoadsReady(unsigned tid)
+ { return thread[tid].numLoadsReady(); }
/** Returns the number of free entries. */
unsigned numFreeEntries();
@@ -215,24 +232,30 @@ class LSQ {
/** Returns whether or not there are any stores to write back to memory. */
bool hasStoresToWB();
+
/** Returns whether or not a specific thread has any stores to write back
* to memory.
*/
- bool hasStoresToWB(unsigned tid);
+ bool hasStoresToWB(unsigned tid)
+ { return thread[tid].hasStoresToWB(); }
+
/** Returns the number of stores a specific thread has to write back. */
- int numStoresToWB(unsigned tid);
+ int numStoresToWB(unsigned tid)
+ { return thread[tid].numStoresToWB(); }
/** Returns if the LSQ will write back to memory this cycle. */
bool willWB();
/** Returns if the LSQ of a specific thread will write back to memory this
* cycle.
*/
- bool willWB(unsigned tid);
+ bool willWB(unsigned tid)
+ { return thread[tid].willWB(); }
/** Debugging function to print out all instructions. */
void dumpInsts();
/** Debugging function to print out instructions from a specific thread. */
- void dumpInsts(unsigned tid);
+ void dumpInsts(unsigned tid)
+ { thread[tid].dumpInsts(); }
/** Executes a read operation, using the load specified at the load index. */
template <class T>
diff --git a/cpu/o3/lsq_impl.hh b/cpu/o3/lsq_impl.hh
index c43c19619..a6ad27522 100644
--- a/cpu/o3/lsq_impl.hh
+++ b/cpu/o3/lsq_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,6 +26,9 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <algorithm>
+#include <string>
+
#include "cpu/o3/lsq.hh"
using namespace std;
@@ -89,7 +92,7 @@ LSQ<Impl>::LSQ(Params *params)
//Initialize LSQs
for (int tid=0; tid < numThreads; tid++) {
- thread[tid].init(params, maxLQEntries+1, maxSQEntries+1, tid);
+ thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
}
}
@@ -228,13 +231,6 @@ LSQ<Impl>::tick()
template<class Impl>
void
-LSQ<Impl>::tick(unsigned tid)
-{
- thread[tid].tick();
-}
-
-template<class Impl>
-void
LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
{
unsigned tid = load_inst->threadNumber;
@@ -262,13 +258,6 @@ LSQ<Impl>::executeLoad(DynInstPtr &inst)
template<class Impl>
Fault
-LSQ<Impl>::executeLoad(int lq_idx, unsigned tid)
-{
- return thread[tid].executeLoad(lq_idx);
-}
-
-template<class Impl>
-Fault
LSQ<Impl>::executeStore(DynInstPtr &inst)
{
unsigned tid = inst->threadNumber;
@@ -278,20 +267,6 @@ LSQ<Impl>::executeStore(DynInstPtr &inst)
template<class Impl>
void
-LSQ<Impl>::commitLoads(InstSeqNum &youngest_inst,unsigned tid)
-{
- thread[tid].commitLoads(youngest_inst);
-}
-
-template<class Impl>
-void
-LSQ<Impl>::commitStores(InstSeqNum &youngest_inst,unsigned tid)
-{
- thread[tid].commitStores(youngest_inst);
-}
-
-template<class Impl>
-void
LSQ<Impl>::writebackStores()
{
list<unsigned>::iterator active_threads = (*activeThreads).begin();
@@ -300,8 +275,8 @@ LSQ<Impl>::writebackStores()
unsigned tid = *active_threads++;
if (numStoresToWB(tid) > 0) {
- DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores available"
- " for Writeback.\n", tid, numStoresToWB(tid));
+ DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
+ "available for Writeback.\n", tid, numStoresToWB(tid));
}
thread[tid].writebackStores();
@@ -309,20 +284,6 @@ LSQ<Impl>::writebackStores()
}
template<class Impl>
-int
-LSQ<Impl>::numStoresToWB(unsigned tid)
-{
- return thread[tid].numStoresToWB();
-}
-
-template<class Impl>
-void
-LSQ<Impl>::squash(const InstSeqNum &squashed_num, unsigned tid)
-{
- thread[tid].squash(squashed_num);
-}
-
-template<class Impl>
bool
LSQ<Impl>::violation()
{
@@ -339,41 +300,6 @@ LSQ<Impl>::violation()
}
template<class Impl>
-bool
-LSQ<Impl>::violation(unsigned tid)
-{
- return thread[tid].violation();
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::loadBlocked(unsigned tid)
-{
- return thread[tid].loadBlocked();
-}
-
-template<class Impl>
-typename Impl::DynInstPtr
-LSQ<Impl>::getMemDepViolator(unsigned tid)
-{
- return thread[tid].getMemDepViolator();
-}
-
-template<class Impl>
-int
-LSQ<Impl>::getLoadHead(unsigned tid)
-{
- return thread[tid].getLoadHead();
-}
-
-template<class Impl>
-int
-LSQ<Impl>::getStoreHead(unsigned tid)
-{
- return thread[tid].getStoreHead();
-}
-
-template<class Impl>
int
LSQ<Impl>::getCount()
{
@@ -391,13 +317,6 @@ LSQ<Impl>::getCount()
template<class Impl>
int
-LSQ<Impl>::getCount(unsigned tid)
-{
- return thread[tid].getCount();
-}
-
-template<class Impl>
-int
LSQ<Impl>::numLoads()
{
unsigned total = 0;
@@ -414,13 +333,6 @@ LSQ<Impl>::numLoads()
template<class Impl>
int
-LSQ<Impl>::numLoads(unsigned tid)
-{
- return thread[tid].numLoads();
-}
-
-template<class Impl>
-int
LSQ<Impl>::numStores()
{
unsigned total = 0;
@@ -437,13 +349,6 @@ LSQ<Impl>::numStores()
template<class Impl>
int
-LSQ<Impl>::numStores(unsigned tid)
-{
- return thread[tid].numStores();
-}
-
-template<class Impl>
-int
LSQ<Impl>::numLoadsReady()
{
unsigned total = 0;
@@ -459,13 +364,6 @@ LSQ<Impl>::numLoadsReady()
}
template<class Impl>
-int
-LSQ<Impl>::numLoadsReady(unsigned tid)
-{
- return thread[tid].numLoadsReady();
-}
-
-template<class Impl>
unsigned
LSQ<Impl>::numFreeEntries()
{
@@ -612,14 +510,6 @@ LSQ<Impl>::hasStoresToWB()
return true;
}
-
-template<class Impl>
-bool
-LSQ<Impl>::hasStoresToWB(unsigned tid)
-{
- return thread[tid].hasStoresToWB();
-}
-
template<class Impl>
bool
LSQ<Impl>::willWB()
@@ -636,13 +526,6 @@ LSQ<Impl>::willWB()
}
template<class Impl>
-bool
-LSQ<Impl>::willWB(unsigned tid)
-{
- return thread[tid].willWB();
-}
-
-template<class Impl>
void
LSQ<Impl>::dumpInsts()
{
@@ -653,10 +536,3 @@ LSQ<Impl>::dumpInsts()
thread[tid].dumpInsts();
}
}
-
-template<class Impl>
-void
-LSQ<Impl>::dumpInsts(unsigned tid)
-{
- thread[tid].dumpInsts();
-}
diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh
index 623dbdb4b..942b4583d 100644
--- a/cpu/o3/lsq_unit.hh
+++ b/cpu/o3/lsq_unit.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,29 +29,30 @@
#ifndef __CPU_O3_LSQ_UNIT_HH__
#define __CPU_O3_LSQ_UNIT_HH__
+#include <algorithm>
#include <map>
#include <queue>
-#include <algorithm>
+#include "arch/faults.hh"
#include "config/full_system.hh"
#include "base/hashmap.hh"
#include "cpu/inst_seq.hh"
#include "mem/mem_interface.hh"
//#include "mem/page_table.hh"
-#include "sim/debug.hh"
-#include "sim/sim_object.hh"
-#include "arch/faults.hh"
+//#include "sim/debug.hh"
+//#include "sim/sim_object.hh"
/**
- * Class that implements the actual LQ and SQ for each specific thread.
- * Both are circular queues; load entries are freed upon committing, while
- * store entries are freed once they writeback. The LSQUnit tracks if there
- * are memory ordering violations, and also detects partial load to store
- * forwarding cases (a store only has part of a load's data) that requires
- * the load to wait until the store writes back. In the former case it
- * holds onto the instruction until the dependence unit looks at it, and
- * in the latter it stalls the LSQ until the store writes back. At that
- * point the load is replayed.
+ * Class that implements the actual LQ and SQ for each specific
+ * thread. Both are circular queues; load entries are freed upon
+ * committing, while store entries are freed once they writeback. The
+ * LSQUnit tracks if there are memory ordering violations, and also
+ * detects partial load to store forwarding cases (a store only has
+ * part of a load's data) that requires the load to wait until the
+ * store writes back. In the former case it holds onto the instruction
+ * until the dependence unit looks at it, and in the latter it stalls
+ * the LSQ until the store writes back. At that point the load is
+ * replayed.
*/
template <class Impl>
class LSQUnit {
@@ -76,21 +77,19 @@ class LSQUnit {
/** Returns the description of this event. */
const char *description();
- private:
- /** The store index of the store being written back. */
- int storeIdx;
/** The writeback event for the store. Needed for store
* conditionals.
*/
- public:
Event *wbEvent;
+
+ private:
+ /** The store index of the store being written back. */
+ int storeIdx;
private:
/** The pointer to the LSQ unit that issued the store. */
LSQUnit<Impl> *lsqPtr;
};
- friend class StoreCompletionEvent;
-
public:
/** Constructs an LSQ unit. init() must be called prior to use. */
LSQUnit();
@@ -136,14 +135,12 @@ class LSQUnit {
/** Executes a load instruction. */
Fault executeLoad(DynInstPtr &inst);
- Fault executeLoad(int lq_idx);
+ Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
/** Executes a store instruction. */
Fault executeStore(DynInstPtr &inst);
/** Commits the head load. */
void commitLoad();
- /** Commits a specific load, given by the sequence number. */
- void commitLoad(InstSeqNum &inst);
/** Commits loads older than a specific sequence number. */
void commitLoads(InstSeqNum &youngest_inst);
@@ -179,9 +176,7 @@ class LSQUnit {
/** Returns the memory ordering violator. */
DynInstPtr getMemDepViolator();
- /** Returns if a load became blocked due to the memory system. It clears
- * the bool's value upon this being called.
- */
+ /** Returns if a load became blocked due to the memory system. */
bool loadBlocked()
{ return isLoadBlocked; }
@@ -215,9 +210,6 @@ class LSQUnit {
/** Returns if the SQ is full. */
bool sqFull() { return stores >= (SQEntries - 1); }
- /** Debugging function to dump instructions in the LSQ. */
- void dumpInsts();
-
/** Returns the number of instructions in the LSQ. */
unsigned getCount() { return loads + stores; }
@@ -245,6 +237,10 @@ class LSQUnit {
/** Decrements the given load index (circular queue). */
inline void decrLdIdx(int &load_idx);
+ public:
+ /** Debugging function to dump instructions in the LSQ. */
+ void dumpInsts();
+
private:
/** Pointer to the CPU. */
FullCPU *cpu;
@@ -287,38 +283,29 @@ class LSQUnit {
/** Whether or not the store is completed. */
bool completed;
};
-/*
- enum Status {
- Running,
- Idle,
- DcacheMissStall,
- DcacheMissSwitch
- };
-*/
+
private:
/** The LSQUnit thread id. */
unsigned lsqID;
- /** The status of the LSQ unit. */
-// Status _status;
-
/** The store queue. */
std::vector<SQEntry> storeQueue;
/** The load queue. */
std::vector<DynInstPtr> loadQueue;
- // Consider making these 16 bits
- /** The number of LQ entries. */
+ /** The number of LQ entries, plus a sentinel entry (circular queue).
+ * @todo: Consider having var that records the true number of LQ entries.
+ */
unsigned LQEntries;
- /** The number of SQ entries. */
+ /** The number of SQ entries, plus a sentinel entry (circular queue).
+ * @todo: Consider having var that records the true number of SQ entries.
+ */
unsigned SQEntries;
/** The number of load instructions in the LQ. */
int loads;
- /** The number of store instructions in the SQ (excludes those waiting to
- * writeback).
- */
+ /** The number of store instructions in the SQ. */
int stores;
/** The number of store instructions in the SQ waiting to writeback. */
int storesToWB;
@@ -330,8 +317,8 @@ class LSQUnit {
/** The index of the head instruction in the SQ. */
int storeHead;
- /** The index of the first instruction that is ready to be written back,
- * and has not yet been written back.
+ /** The index of the first instruction that may be ready to be
+ * written back, and has not yet been written back.
*/
int storeWBIdx;
/** The index of the tail instruction in the SQ. */
@@ -348,13 +335,9 @@ class LSQUnit {
//list<InstSeqNum> mshrSeqNums;
- //Stats::Scalar<> dcacheStallCycles;
- Counter lastDcacheStall;
-
/** Wire to read information from the issue stage time queue. */
typename TimeBuffer<IssueStruct>::wire fromIssue;
- // Make these per thread?
/** Whether or not the LSQ is stalled. */
bool stalled;
/** The store that causes the stall due to partial store to load
@@ -364,20 +347,13 @@ class LSQUnit {
/** The index of the above store. */
int stallingLoadIdx;
- /** Whether or not a load is blocked due to the memory system. It is
- * cleared when this value is checked via loadBlocked().
- */
+ /** Whether or not a load is blocked due to the memory system. */
bool isLoadBlocked;
bool loadBlockedHandled;
InstSeqNum blockedLoadSeqNum;
- /** The oldest faulting load instruction. */
- DynInstPtr loadFaultInst;
- /** The oldest faulting store instruction. */
- DynInstPtr storeFaultInst;
-
/** The oldest load that caused a memory ordering violation. */
DynInstPtr memDepViolator;
@@ -447,23 +423,14 @@ template <class T>
Fault
LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
{
- //Depending on issue2execute delay a squashed load could
- //execute if it is found to be squashed in the same
- //cycle it is scheduled to execute
assert(loadQueue[load_idx]);
- if (loadQueue[load_idx]->isExecuted()) {
- panic("Should not reach this point with split ops!");
- memcpy(&data,req->data,req->size);
-
- return NoFault;
- }
+ assert(!loadQueue[load_idx]->isExecuted());
// Make sure this isn't an uncacheable access
// A bit of a hackish way to get uncached accesses to work only if they're
// at the head of the LSQ and are ready to commit (at the head of the ROB
// too).
- // @todo: Fix uncached accesses.
if (req->flags & UNCACHEABLE &&
(load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) {
iewStage->rescheduleMemInst(loadQueue[load_idx]);
@@ -479,12 +446,16 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
"storeHead: %i addr: %#x\n",
load_idx, store_idx, storeHead, req->paddr);
-#ifdef FULL_SYSTEM
+#if 0
if (req->flags & LOCKED) {
cpu->lockAddr = req->paddr;
cpu->lockFlag = true;
}
#endif
+ req->cmd = Read;
+ assert(!req->completionEvent);
+ req->completionEvent = NULL;
+ req->time = curTick;
while (store_idx != -1) {
// End once we've reached the top of the LSQ
@@ -518,18 +489,14 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// If the store's data has all of the data needed, we can forward.
if (store_has_lower_limit && store_has_upper_limit) {
-
+ // Get shift amount for offset into the store's data.
int shift_amt = req->vaddr & (store_size - 1);
- // Assumes byte addressing
+ // @todo: Magic number, assumes byte addressing
shift_amt = shift_amt << 3;
// Cast this to type T?
data = storeQueue[store_idx].data >> shift_amt;
- req->cmd = Read;
- assert(!req->completionEvent);
- req->completionEvent = NULL;
- req->time = curTick;
assert(!req->data);
req->data = new uint8_t[64];
@@ -579,7 +546,6 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// Do not generate a writeback event as this instruction is not
// complete.
-
DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
"Store idx %i to load addr %#x\n",
store_idx, req->vaddr);
@@ -588,16 +554,13 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
}
}
-
// If there's no forwarding case, then go access memory
DynInstPtr inst = loadQueue[load_idx];
- DPRINTF(LSQUnit, "Doing functional access for inst PC %#x\n",
- loadQueue[load_idx]->readPC());
+ DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
+ loadQueue[load_idx]->seqNum, loadQueue[load_idx]->readPC());
+
assert(!req->data);
- req->cmd = Read;
- req->completionEvent = NULL;
- req->time = curTick;
req->data = new uint8_t[64];
Fault fault = cpu->read(req, data);
memcpy(req->data, &data, sizeof(T));
@@ -611,20 +574,19 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
return NoFault;
+ // Record that the load was blocked due to memory. This
+ // load will squash all instructions after it, be
+ // refetched, and re-executed.
isLoadBlocked = true;
loadBlockedHandled = false;
blockedLoadSeqNum = inst->seqNum;
// No fault occurred, even though the interface is blocked.
return NoFault;
}
+
DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
loadQueue[load_idx]->readPC());
-/*
- Addr debug_addr = ULL(0xfffffc0000be81a8);
- if (req->vaddr == debug_addr) {
- debug_break();
- }
-*/
+
assert(!req->completionEvent);
req->completionEvent =
new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
@@ -632,75 +594,16 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
assert(dcacheInterface->doEvents());
- // Ugly hack to get an event scheduled *only* if the access is
- // a miss. We really should add first-class support for this
- // at some point.
if (result != MA_HIT) {
DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
inst->seqNum);
-
- lastDcacheStall = curTick;
-
-// _status = DcacheMissStall;
-
} else {
- DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
- inst->seqNum);
-
DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
- }
- }
-#if 0
- // if we have a cache, do cache access too
- if (dcacheInterface) {
- if (dcacheInterface->isBlocked()) {
- isLoadBlocked = true;
- // No fault occurred, even though the interface is blocked.
- return NoFault;
- }
-
- DPRINTF(LSQUnit, "LSQUnit: D-cache: PC:%#x reading from paddr:%#x "
- "vaddr:%#x flags:%i\n",
- inst->readPC(), req->paddr, req->vaddr, req->flags);
-
- // Setup MemReq pointer
- req->cmd = Read;
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
-
- assert(!req->completionEvent);
- req->completionEvent =
- new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
-
- // Do Cache Access
- MemAccessResult result = dcacheInterface->access(req);
-
- // Ugly hack to get an event scheduled *only* if the access is
- // a miss. We really should add first-class support for this
- // at some point.
- // @todo: Probably should support having no events
- if (result != MA_HIT) {
- DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
- DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
- inst->seqNum);
-
- lastDcacheStall = curTick;
-
- _status = DcacheMissStall;
-
- } else {
DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
inst->seqNum);
-
- DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
}
- } else {
- fatal("Must use D-cache with new memory system");
}
-#endif
return fault;
}
@@ -716,24 +619,11 @@ LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx)
" | storeHead:%i [sn:%i]\n",
store_idx, req->paddr, data, storeHead,
storeQueue[store_idx].inst->seqNum);
-/*
- if (req->flags & LOCKED) {
- if (req->flags & UNCACHEABLE) {
- req->result = 2;
- } else {
- req->result = 1;
- }
- }
-*/
+
storeQueue[store_idx].req = req;
storeQueue[store_idx].size = sizeof(T);
storeQueue[store_idx].data = data;
-/*
- Addr debug_addr = ULL(0xfffffc0000be81a8);
- if (req->vaddr == debug_addr) {
- debug_break();
- }
-*/
+
// This function only writes the data to the store queue, so no fault
// can happen here.
return NoFault;
diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh
index dca808ac9..f0b4405ed 100644
--- a/cpu/o3/lsq_unit_impl.hh
+++ b/cpu/o3/lsq_unit_impl.hh
@@ -35,8 +35,8 @@ LSQUnit<Impl>::StoreCompletionEvent::StoreCompletionEvent(int store_idx,
Event *wb_event,
LSQUnit<Impl> *lsq_ptr)
: Event(&mainEventQueue),
- storeIdx(store_idx),
wbEvent(wb_event),
+ storeIdx(store_idx),
lsqPtr(lsq_ptr)
{
this->setFlags(Event::AutoDelete);
@@ -86,15 +86,13 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
lsqID = id;
- LQEntries = maxLQEntries;
- SQEntries = maxSQEntries;
+ // Add 1 for the sentinel entry (they are circular queues).
+ LQEntries = maxLQEntries + 1;
+ SQEntries = maxSQEntries + 1;
loadQueue.resize(LQEntries);
storeQueue.resize(SQEntries);
-
- // May want to initialize these entries to NULL
-
loadHead = loadTail = 0;
storeHead = storeWBIdx = storeTail = 0;
@@ -104,7 +102,7 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
dcacheInterface = params->dcacheInterface;
- loadFaultInst = storeFaultInst = memDepViolator = NULL;
+ memDepViolator = NULL;
blockedLoadSeqNum = 0;
}
@@ -152,6 +150,8 @@ LSQUnit<Impl>::switchOut()
for (int i = 0; i < loadQueue.size(); ++i)
loadQueue[i] = NULL;
+ assert(storesToWB == 0);
+
while (storesToWB > 0 &&
storeWBIdx != storeTail &&
storeQueue[storeWBIdx].inst &&
@@ -218,7 +218,7 @@ LSQUnit<Impl>::takeOverFrom()
usedPorts = 0;
- loadFaultInst = storeFaultInst = memDepViolator = NULL;
+ memDepViolator = NULL;
blockedLoadSeqNum = 0;
@@ -231,16 +231,17 @@ template<class Impl>
void
LSQUnit<Impl>::resizeLQ(unsigned size)
{
- assert( size >= LQEntries);
+ unsigned size_plus_sentinel = size + 1;
+ assert(size_plus_sentinel >= LQEntries);
- if (size > LQEntries) {
- while (size > loadQueue.size()) {
+ if (size_plus_sentinel > LQEntries) {
+ while (size_plus_sentinel > loadQueue.size()) {
DynInstPtr dummy;
loadQueue.push_back(dummy);
LQEntries++;
}
} else {
- LQEntries = size;
+ LQEntries = size_plus_sentinel;
}
}
@@ -249,14 +250,15 @@ template<class Impl>
void
LSQUnit<Impl>::resizeSQ(unsigned size)
{
- if (size > SQEntries) {
- while (size > storeQueue.size()) {
+ unsigned size_plus_sentinel = size + 1;
+ if (size_plus_sentinel > SQEntries) {
+ while (size_plus_sentinel > storeQueue.size()) {
SQEntry dummy;
storeQueue.push_back(dummy);
SQEntries++;
}
} else {
- SQEntries = size;
+ SQEntries = size_plus_sentinel;
}
}
@@ -264,10 +266,8 @@ template <class Impl>
void
LSQUnit<Impl>::insert(DynInstPtr &inst)
{
- // Make sure we really have a memory reference.
assert(inst->isMemRef());
- // Make sure it's one of the two classes of memory references.
assert(inst->isLoad() || inst->isStore());
if (inst->isLoad()) {
@@ -283,7 +283,8 @@ template <class Impl>
void
LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
{
- assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries);
+ assert((loadTail + 1) % LQEntries != loadHead);
+ assert(loads < LQEntries);
DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
load_inst->readPC(), loadTail, load_inst->seqNum);
@@ -322,7 +323,6 @@ LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
incrStIdx(storeTail);
++stores;
-
}
template <class Impl>
@@ -370,39 +370,6 @@ LSQUnit<Impl>::numLoadsReady()
return retval;
}
-#if 0
-template <class Impl>
-Fault
-LSQUnit<Impl>::executeLoad()
-{
- Fault load_fault = NoFault;
- DynInstPtr load_inst;
-
- assert(readyLoads.size() != 0);
-
- // Execute a ready load.
- LdMapIt ready_it = readyLoads.begin();
-
- load_inst = (*ready_it).second;
-
- // Execute the instruction, which is held in the data portion of the
- // iterator.
- load_fault = load_inst->execute();
-
- // If it executed successfully, then switch it over to the executed
- // loads list.
- if (load_fault == NoFault) {
- executedLoads[load_inst->seqNum] = load_inst;
-
- readyLoads.erase(ready_it);
- } else {
- loadFaultInst = load_inst;
- }
-
- return load_fault;
-}
-#endif
-
template <class Impl>
Fault
LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
@@ -413,33 +380,14 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
inst->readPC(),inst->seqNum);
- // Make sure it's really in the list.
- // Normally it should always be in the list. However,
- /* due to a syscall it may not be the list.
-#ifdef DEBUG
- int i = loadHead;
- while (1) {
- if (i == loadTail && !find(inst)) {
- assert(0 && "Load not in the queue!");
- } else if (loadQueue[i] == inst) {
- break;
- }
-
- i = i + 1;
- if (i >= LQEntries) {
- i = 0;
- }
- }
-#endif // DEBUG*/
-
// load_fault = inst->initiateAcc();
load_fault = inst->execute();
// If the instruction faulted, then we need to send it along to commit
// without the instruction completing.
if (load_fault != NoFault) {
- // Maybe just set it as can commit here, although that might cause
- // some other problems with sending traps to the ROB too quickly.
+ // Send this instruction to commit, also make sure iew stage
+ // realizes there is activity.
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
}
@@ -449,20 +397,6 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
template <class Impl>
Fault
-LSQUnit<Impl>::executeLoad(int lq_idx)
-{
- // Very hackish. Not sure the best way to check that this
- // instruction is at the head of the ROB. I should have some sort
- // of extra information here so that I'm not overloading the
- // canCommit signal for 15 different things.
- loadQueue[lq_idx]->setCanCommit();
- Fault ret_fault = executeLoad(loadQueue[lq_idx]);
- loadQueue[lq_idx]->clearCanCommit();
- return ret_fault;
-}
-
-template <class Impl>
-Fault
LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
{
using namespace TheISA;
@@ -481,11 +415,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
Fault store_fault = store_inst->initiateAcc();
// Fault store_fault = store_inst->execute();
- // Store size should now be available. Use it to get proper offset for
- // addr comparisons.
- int size = storeQueue[store_idx].size;
-
- if (size == 0) {
+ if (storeQueue[store_idx].size == 0) {
DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
store_inst->readPC(),store_inst->seqNum);
@@ -494,30 +424,25 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
assert(store_fault == NoFault);
- if (!storeFaultInst) {
- if (store_fault != NoFault) {
- panic("Fault in a store instruction!");
- storeFaultInst = store_inst;
- } else if (store_inst->isNonSpeculative()) {
- // Nonspeculative accesses (namely store conditionals)
- // need to set themselves as able to writeback if we
- // haven't had a fault by here.
- storeQueue[store_idx].canWB = true;
+ if (store_inst->isNonSpeculative()) {
+ // Nonspeculative accesses (namely store conditionals)
+ // need to set themselves as able to writeback if we
+ // haven't had a fault by here.
+ storeQueue[store_idx].canWB = true;
- ++storesToWB;
- }
+ ++storesToWB;
}
if (!memDepViolator) {
while (load_idx != loadTail) {
- // Actually should only check loads that have actually executed
- // Might be safe because effAddr is set to InvalAddr when the
- // dyn inst is created.
-
- // Must actually check all addrs in the proper size range
- // Which is more correct than needs to be. What if for now we just
- // assume all loads are quad-word loads, and do the addr based
- // on that.
+ // Really only need to check loads that have actually executed
+ // It's safe to check all loads because effAddr is set to
+ // InvalAddr when the dyn inst is created.
+
+ // @todo: For now this is extra conservative, detecting a
+ // violation if the addresses match assuming all accesses
+ // are quad word accesses.
+
// @todo: Fix this, magic number being used here
if ((loadQueue[load_idx]->effAddr >> 8) ==
(store_inst->effAddr >> 8)) {
@@ -557,32 +482,6 @@ LSQUnit<Impl>::commitLoad()
template <class Impl>
void
-LSQUnit<Impl>::commitLoad(InstSeqNum &inst)
-{
- // Hopefully I don't use this function too much
- panic("Don't use this function!");
-
- int i = loadHead;
- while (1) {
- if (i == loadTail) {
- assert(0 && "Load not in the queue!");
- } else if (loadQueue[i]->seqNum == inst) {
- break;
- }
-
- ++i;
- if (i >= LQEntries) {
- i = 0;
- }
- }
-
- loadQueue[i]->removeInLSQ();
- loadQueue[i] = NULL;
- --loads;
-}
-
-template <class Impl>
-void
LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
{
assert(loads == 0 || loadQueue[loadHead]);
@@ -602,6 +501,8 @@ LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
while (store_idx != storeTail) {
assert(storeQueue[store_idx].inst);
+ // Mark any stores that are now committed and have not yet
+ // been marked as able to write back.
if (!storeQueue[store_idx].canWB) {
if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
break;
@@ -613,7 +514,6 @@ LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
storeQueue[store_idx].canWB = true;
-// --stores;
++storesToWB;
}
@@ -631,6 +531,8 @@ LSQUnit<Impl>::writebackStores()
storeQueue[storeWBIdx].canWB &&
usedPorts < cachePorts) {
+ // Store didn't write any data so no need to write it back to
+ // memory.
if (storeQueue[storeWBIdx].size == 0) {
completeStore(storeWBIdx);
@@ -659,7 +561,6 @@ LSQUnit<Impl>::writebackStores()
MemReqPtr req = storeQueue[storeWBIdx].req;
storeQueue[storeWBIdx].committed = true;
-// Fault fault = cpu->translateDataWriteReq(req);
req->cmd = Write;
req->completionEvent = NULL;
req->time = curTick;
@@ -689,6 +590,12 @@ LSQUnit<Impl>::writebackStores()
default:
panic("Unexpected store size!\n");
}
+
+ // Stores other than store conditionals are completed at this
+ // time. Mark them as completed and, if we have a checker,
+ // tell it that the instruction is completed.
+ // @todo: Figure out what time I can say stores are complete in
+ // the timing memory.
if (!(req->flags & LOCKED)) {
storeQueue[storeWBIdx].inst->setCompleted();
if (cpu->checker) {
@@ -714,57 +621,35 @@ LSQUnit<Impl>::writebackStores()
iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
}
- if (result != MA_HIT && dcacheInterface->doEvents()) {
- typename IEW::LdWritebackEvent *wb = NULL;
- if (req->flags & LOCKED) {
- // Stx_C should not generate a system port transaction,
- // but that might be hard to accomplish.
- wb = new typename
- IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
+ typename IEW::LdWritebackEvent *wb = NULL;
+ if (req->flags & LOCKED) {
+ // Stx_C should not generate a system port transaction
+ // if it misses in the cache, but that might be hard
+ // to accomplish without explicit cache support.
+ wb = new typename
+ IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
iewStage);
- store_event->wbEvent = wb;
- }
+ store_event->wbEvent = wb;
+ }
- DPRINTF(LSQUnit,"D-Cache Write Miss!\n");
+ if (result != MA_HIT && dcacheInterface->doEvents()) {
+ DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
+ storeWBIdx);
DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
storeQueue[storeWBIdx].inst->seqNum);
- lastDcacheStall = curTick;
-
-// _status = DcacheMissStall;
-
//mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
//DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size());
- // Increment stat here or something
+ // @todo: Increment stat here.
} else {
DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n",
storeWBIdx);
DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
storeQueue[storeWBIdx].inst->seqNum);
-
-
- if (req->flags & LOCKED) {
- // Stx_C does not generate a system port transaction.
-/*
- if (req->flags & UNCACHEABLE) {
- req->result = 2;
- } else {
- if (cpu->lockFlag && cpu->lockAddr == req->paddr) {
- req->result=1;
- } else {
- req->result = 0;
- }
- }
-*/
- typename IEW::LdWritebackEvent *wb =
- new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
- iewStage);
- store_event->wbEvent = wb;
- }
}
incrStIdx(storeWBIdx);
@@ -798,14 +683,12 @@ void
LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
{
DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
- "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+ "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
int load_idx = loadTail;
decrLdIdx(load_idx);
while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
-
- // Clear the smart pointer to make sure it is decremented.
DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, "
"[sn:%lli]\n",
loadQueue[load_idx]->readPC(),
@@ -817,6 +700,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
stallingLoadIdx = 0;
}
+ // Clear the smart pointer to make sure it is decremented.
loadQueue[load_idx]->squashed = true;
loadQueue[load_idx] = NULL;
--loads;
@@ -840,19 +724,18 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
while (stores != 0 &&
storeQueue[store_idx].inst->seqNum > squashed_num) {
-
+ // Instructions marked as can WB are already committed.
if (storeQueue[store_idx].canWB) {
break;
}
- // Clear the smart pointer to make sure it is decremented.
DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, "
"idx:%i [sn:%lli]\n",
storeQueue[store_idx].inst->readPC(),
store_idx, storeQueue[store_idx].inst->seqNum);
- // I don't think this can happen. It should have been cleared by the
- // stalling load.
+ // I don't think this can happen. It should have been cleared
+ // by the stalling load.
if (isStalled() &&
storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
panic("Is stalled should have been cleared by stalling load!\n");
@@ -860,13 +743,17 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
stallingStoreIsn = 0;
}
+ // Clear the smart pointer to make sure it is decremented.
storeQueue[store_idx].inst->squashed = true;
storeQueue[store_idx].inst = NULL;
storeQueue[store_idx].canWB = 0;
if (storeQueue[store_idx].req) {
+ // There should not be a completion event if the store has
+ // not yet committed.
assert(!storeQueue[store_idx].req->completionEvent);
}
+
storeQueue[store_idx].req = NULL;
--stores;
@@ -879,36 +766,6 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
template <class Impl>
void
-LSQUnit<Impl>::dumpInsts()
-{
- cprintf("Load store queue: Dumping instructions.\n");
- cprintf("Load queue size: %i\n", loads);
- cprintf("Load queue: ");
-
- int load_idx = loadHead;
-
- while (load_idx != loadTail && loadQueue[load_idx]) {
- cprintf("%#x ", loadQueue[load_idx]->readPC());
-
- incrLdIdx(load_idx);
- }
-
- cprintf("Store queue size: %i\n", stores);
- cprintf("Store queue: ");
-
- int store_idx = storeHead;
-
- while (store_idx != storeTail && storeQueue[store_idx].inst) {
- cprintf("%#x ", storeQueue[store_idx].inst->readPC());
-
- incrStIdx(store_idx);
- }
-
- cprintf("\n");
-}
-
-template <class Impl>
-void
LSQUnit<Impl>::completeStore(int store_idx)
{
assert(storeQueue[store_idx].inst);
@@ -930,7 +787,9 @@ LSQUnit<Impl>::completeStore(int store_idx)
iewStage->updateLSQNextCycle = true;
}
- DPRINTF(LSQUnit, "Store head idx:%i\n", storeHead);
+ DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
+ "idx:%i\n",
+ storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
if (isStalled() &&
storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
@@ -943,6 +802,10 @@ LSQUnit<Impl>::completeStore(int store_idx)
}
storeQueue[store_idx].inst->setCompleted();
+
+ // Tell the checker we've completed this instruction. Some stores
+ // may get reported twice to the checker, but the checker can
+ // handle that case.
if (cpu->checker) {
cpu->checker->tick(storeQueue[store_idx].inst);
}
@@ -979,3 +842,33 @@ LSQUnit<Impl>::decrLdIdx(int &load_idx)
if (--load_idx < 0)
load_idx += LQEntries;
}
+
+template <class Impl>
+void
+LSQUnit<Impl>::dumpInsts()
+{
+ cprintf("Load store queue: Dumping instructions.\n");
+ cprintf("Load queue size: %i\n", loads);
+ cprintf("Load queue: ");
+
+ int load_idx = loadHead;
+
+ while (load_idx != loadTail && loadQueue[load_idx]) {
+ cprintf("%#x ", loadQueue[load_idx]->readPC());
+
+ incrLdIdx(load_idx);
+ }
+
+ cprintf("Store queue size: %i\n", stores);
+ cprintf("Store queue: ");
+
+ int store_idx = storeHead;
+
+ while (store_idx != storeTail && storeQueue[store_idx].inst) {
+ cprintf("%#x ", storeQueue[store_idx].inst->readPC());
+
+ incrStIdx(store_idx);
+ }
+
+ cprintf("\n");
+}
diff --git a/cpu/o3/mem_dep_unit.hh b/cpu/o3/mem_dep_unit.hh
index 141e0fdc4..acbe08ec2 100644
--- a/cpu/o3/mem_dep_unit.hh
+++ b/cpu/o3/mem_dep_unit.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -201,13 +201,6 @@ class MemDepUnit {
static int memdep_erase;
};
- struct ltMemDepEntry {
- bool operator() (const MemDepEntryPtr &lhs, const MemDepEntryPtr &rhs)
- {
- return lhs->inst->seqNum < rhs->inst->seqNum;
- }
- };
-
/** Finds the memory dependence entry in the hash map. */
inline MemDepEntryPtr &findInHash(const DynInstPtr &inst);
diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh
index 05a33685d..8b195baab 100644
--- a/cpu/o3/mem_dep_unit_impl.hh
+++ b/cpu/o3/mem_dep_unit_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -141,12 +141,12 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
MemDepEntry::memdep_insert++;
- // Add the instruction to the instruction list.
instList[tid].push_back(inst);
inst_entry->listIt = --(instList[tid].end());
- // Check the dependence predictor for any producing stores.
+ // Check any barriers and the dependence predictor for any
+ // producing stores.
InstSeqNum producing_store;
if (inst->isLoad() && loadBarrier) {
producing_store = loadBarrierSN;
@@ -181,7 +181,7 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
moveToReady(inst_entry);
}
} else {
- // Otherwise make the instruction dependent on the store.
+ // Otherwise make the instruction dependent on the store/barrier.
DPRINTF(MemDepUnit, "Adding to dependency list; "
"inst PC %#x is dependent on [sn:%lli].\n",
inst->readPC(), producing_store);
@@ -193,8 +193,6 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
// Add this instruction to the list of dependents.
store_entry->dependInsts.push_back(inst_entry);
-// inst_entry->producingStore = store_entry;
-
if (inst->isLoad()) {
++conflictingLoads;
} else {
@@ -370,8 +368,6 @@ MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
instList[tid].erase((*hash_it).second->listIt);
-// (*hash_it).second->inst = NULL;
-
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
@@ -416,7 +412,6 @@ MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
if (!woken_inst->inst) {
// Potentially removed mem dep entries could be on this list
-// inst_entry->dependInsts[i] = NULL;
continue;
}
@@ -429,7 +424,6 @@ MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
} else {
woken_inst->memDepReady = true;
}
-// inst_entry->dependInsts[i] = NULL;
}
inst_entry->dependInsts.clear();
@@ -468,13 +462,7 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
assert(hash_it != memDepHash.end());
(*hash_it).second->squashed = true;
-/*
- for (int i = 0; i < (*hash_it).second->dependInsts.size(); ++i) {
- (*hash_it).second->dependInsts[i] = NULL;
- }
- (*hash_it).second->inst = NULL;
-*/
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh
index dd2cb0c18..3f1a27bb5 100644
--- a/cpu/o3/rename.hh
+++ b/cpu/o3/rename.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,15 +35,16 @@
#include "base/timebuf.hh"
/**
- * DefaultRename handles both single threaded and SMT rename. Its width is
- * specified by the parameters; each cycle it tries to rename that many
- * instructions. It holds onto the rename history of all instructions with
- * destination registers, storing the arch. register, the new physical
- * register, and the old physical register, to allow for undoing of mappings
- * if squashing happens, or freeing up registers upon commit. Rename handles
- * blocking if the ROB, IQ, or LSQ is going to be full. Rename also handles
- * barriers, and does so by stalling on the instruction until the ROB is
- * empty and there are no instructions in flight to the ROB.
+ * DefaultRename handles both single threaded and SMT rename. Its
+ * width is specified by the parameters; each cycle it tries to rename
+ * that many instructions. It holds onto the rename history of all
+ * instructions with destination registers, storing the
+ * arch. register, the new physical register, and the old physical
+ * register, to allow for undoing of mappings if squashing happens, or
+ * freeing up registers upon commit. Rename handles blocking if the
+ * ROB, IQ, or LSQ is going to be full. Rename also handles barriers,
+ * and does so by stalling on the instruction until the ROB is empty
+ * and there are no instructions in flight to the ROB.
*/
template<class Impl>
class DefaultRename
@@ -68,14 +69,15 @@ class DefaultRename
// Typedefs from the ISA.
typedef TheISA::RegIndex RegIndex;
- // A deque is used to queue the instructions. Barrier insts must be
- // added to the front of the deque, which is the only reason for using
- // a deque instead of a queue. (Most other stages use a queue)
+ // A list is used to queue the instructions. Barrier insts must
+ // be added to the front of the list, which is the only reason for
+ // using a list instead of a queue. (Most other stages use a
+ // queue)
typedef std::list<DynInstPtr> InstQueue;
public:
- /** Overall rename status. Used to determine if the CPU can deschedule
- * itself due to a lack of activity.
+ /** Overall rename status. Used to determine if the CPU can
+ * deschedule itself due to a lack of activity.
*/
enum RenameStatus {
Active,
diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh
index db4bb2ffe..081581c92 100644
--- a/cpu/o3/rename_impl.hh
+++ b/cpu/o3/rename_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -209,17 +209,13 @@ template <class Impl>
void
DefaultRename<Impl>::initStage()
{
+ // Grab the number of free entries directly from the stages.
for (int tid=0; tid < numThreads; tid++) {
freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid);
freeEntries[tid].lsqEntries = iew_ptr->ldstQueue.numFreeEntries(tid);
freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid);
emptyROB[tid] = true;
}
-
- // Clear these pointers so they are not accidentally used in
- // non-initialization code.
-// iew_ptr = NULL;
-// commit_ptr = NULL;
}
template<class Impl>
@@ -299,6 +295,7 @@ DefaultRename<Impl>::takeOverFrom()
_status = Inactive;
initStage();
+ // Reset all state prior to taking over from the other CPU.
for (int i=0; i< numThreads; i++) {
renameStatus[i] = Idle;
@@ -326,7 +323,7 @@ DefaultRename<Impl>::squash(unsigned tid)
if (renameStatus[tid] == Blocked ||
renameStatus[tid] == Unblocking ||
renameStatus[tid] == SerializeStall) {
-#if !FULL_SYSTEM
+#if 0
// In syscall emulation, we can have both a block and a squash due
// to a syscall in the same cycle. This would cause both signals to
// be high. This shouldn't happen in full system.
@@ -344,7 +341,7 @@ DefaultRename<Impl>::squash(unsigned tid)
// Set the status to Squashing.
renameStatus[tid] = Squashing;
- // Clear the skid buffer in case it has any data in it.
+ // Squash any instructions from decode.
unsigned squashCount = 0;
for (int i=0; i<fromDecode->size; i++) {
@@ -367,9 +364,6 @@ template <class Impl>
void
DefaultRename<Impl>::tick()
{
- // Rename will need to try to rename as many instructions as it
- // has bandwidth, unless it is blocked.
-
wroteToTimeBuffer = false;
blockThisCycle = false;
@@ -454,8 +448,6 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
} else if (renameStatus[tid] == Unblocking) {
renameInsts(tid);
-// ++renameUnblockCycles;
-
if (validInsts()) {
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
@@ -575,7 +567,6 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
insts_to_rename.pop_front();
- //Use skidBuffer with oldest instructions
if (renameStatus[tid] == Unblocking) {
DPRINTF(Rename,"[tid:%u]: Removing [sn:%lli] PC:%#x from rename "
"skidBuffer\n",
@@ -711,10 +702,10 @@ void
DefaultRename<Impl>::sortInsts()
{
int insts_from_decode = fromDecode->size;
-
+#ifdef DEBUG
for (int i=0; i < numThreads; i++)
assert(insts[i].empty());
-
+#endif
for (int i = 0; i < insts_from_decode; ++i) {
DynInstPtr inst = fromDecode->insts[i];
insts[inst->threadNumber].push_back(inst);
@@ -794,8 +785,8 @@ DefaultRename<Impl>::block(unsigned tid)
wroteToTimeBuffer = true;
}
- // Rename can not go from SerializeStall to Blocked, otherwise it would
- // not know to complete the serialize stall.
+ // Rename can not go from SerializeStall to Blocked, otherwise
+ // it would not know to complete the serialize stall.
if (renameStatus[tid] != SerializeStall) {
// Set status to Blocked.
renameStatus[tid] = Blocked;
@@ -835,15 +826,11 @@ DefaultRename<Impl>::doSquash(unsigned tid)
InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
-//#if FULL_SYSTEM
-// assert(!historyBuffer[tid].empty());
-//#else
// After a syscall squashes everything, the history buffer may be empty
// but the ROB may still be squashing instructions.
if (historyBuffer[tid].empty()) {
return;
}
-//#endif // FULL_SYSTEM
// Go through the most recent instructions, undoing the mappings
// they did and freeing up the registers.
@@ -896,8 +883,8 @@ DefaultRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num, unsigned tid)
hb_it != historyBuffer[tid].end() &&
(*hb_it).instSeqNum <= inst_seq_num) {
- DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, sequence"
- " number %i.\n",
+ DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, "
+ "[sn:%lli].\n",
tid, (*hb_it).prevPhysReg, (*hb_it).instSeqNum);
freeList->addReg((*hb_it).prevPhysReg);
diff --git a/cpu/o3/rename_map.cc b/cpu/o3/rename_map.cc
index 8ba632e65..fc59058a1 100644
--- a/cpu/o3/rename_map.cc
+++ b/cpu/o3/rename_map.cc
@@ -32,18 +32,12 @@
using namespace std;
-// Todo: Consider making functions inline. Avoid having things that are
-// using the zero register or misc registers from adding on the registers
-// to the free list. Possibly remove the direct communication between
-// this and the freelist. Considering making inline bool functions that
-// determine if the register is a logical int, logical fp, physical int,
-// physical fp, etc.
+// @todo: Consider making inline bool functions that determine if the
+// register is a logical int, logical fp, physical int, physical fp,
+// etc.
SimpleRenameMap::~SimpleRenameMap()
{
- // Delete the rename maps as they were allocated with new.
- //delete [] intRenameMap;
- //delete [] floatRenameMap;
}
void
@@ -105,7 +99,8 @@ SimpleRenameMap::init(unsigned _numLogicalIntRegs,
// Although the index refers purely to architected registers, because
// the floating reg indices come after the integer reg indices, they
// may exceed the size of a normal RegIndex (short).
- for (PhysRegIndex index = numLogicalIntRegs; index < numLogicalRegs; ++index)
+ for (PhysRegIndex index = numLogicalIntRegs;
+ index < numLogicalRegs; ++index)
{
floatRenameMap[index].physical_reg = freg_idx++;
}
@@ -132,14 +127,10 @@ SimpleRenameMap::init(unsigned _numLogicalIntRegs,
void
SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr)
{
- //Setup the interface to the freelist.
freeList = fl_ptr;
}
-// Don't allow this stage to fault; force that check to the rename stage.
-// Simply ask to rename a logical register and get back a new physical
-// register index.
SimpleRenameMap::RenameInfo
SimpleRenameMap::rename(RegIndex arch_reg)
{
@@ -152,13 +143,11 @@ SimpleRenameMap::rename(RegIndex arch_reg)
// requested architected register.
prev_reg = intRenameMap[arch_reg].physical_reg;
- // If it's not referencing the zero register, then mark the register
- // as not ready.
+ // If it's not referencing the zero register, then rename the
+ // register.
if (arch_reg != intZeroReg) {
- // Get a free physical register to rename to.
renamed_reg = freeList->getIntReg();
- // Update the integer rename map.
intRenameMap[arch_reg].physical_reg = renamed_reg;
assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs);
@@ -168,20 +157,15 @@ SimpleRenameMap::rename(RegIndex arch_reg)
renamed_reg = intZeroReg;
}
} else if (arch_reg < numLogicalRegs) {
- // Subtract off the base offset for floating point registers.
-// arch_reg = arch_reg - numLogicalIntRegs;
-
// Record the current physical register that is renamed to the
// requested architected register.
prev_reg = floatRenameMap[arch_reg].physical_reg;
- // If it's not referencing the zero register, then mark the register
- // as not ready.
+ // If it's not referencing the zero register, then rename the
+ // register.
if (arch_reg != floatZeroReg) {
- // Get a free floating point register to rename to.
renamed_reg = freeList->getFloatReg();
- // Update the floating point rename map.
floatRenameMap[arch_reg].physical_reg = renamed_reg;
assert(renamed_reg < numPhysicalRegs &&
@@ -194,10 +178,10 @@ SimpleRenameMap::rename(RegIndex arch_reg)
// Subtract off the base offset for miscellaneous registers.
arch_reg = arch_reg - numLogicalRegs;
- // No renaming happens to the misc. registers. They are simply the
- // registers that come after all the physical registers; thus
- // take the base architected register and add the physical registers
- // to it.
+ // No renaming happens to the misc. registers. They are
+ // simply the registers that come after all the physical
+ // registers; thus take the base architected register and add
+ // the physical registers to it.
renamed_reg = arch_reg + numPhysicalRegs;
// Set the previous register to the same register; mainly it must be
@@ -211,17 +195,12 @@ SimpleRenameMap::rename(RegIndex arch_reg)
return RenameInfo(renamed_reg, prev_reg);
}
-//Perhaps give this a pair as a return value, of the physical register
-//and whether or not it's ready.
PhysRegIndex
SimpleRenameMap::lookup(RegIndex arch_reg)
{
if (arch_reg < numLogicalIntRegs) {
return intRenameMap[arch_reg].physical_reg;
} else if (arch_reg < numLogicalRegs) {
- // Subtract off the base FP offset.
-// arch_reg = arch_reg - numLogicalIntRegs;
-
return floatRenameMap[arch_reg].physical_reg;
} else {
// Subtract off the misc registers offset.
@@ -233,51 +212,23 @@ SimpleRenameMap::lookup(RegIndex arch_reg)
}
}
-// In this implementation the miscellaneous registers do not actually rename,
-// so this function does not allow you to try to change their mappings.
void
SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg)
{
+ // In this implementation the miscellaneous registers do not
+ // actually rename, so this function does not allow you to try to
+ // change their mappings.
if (arch_reg < numLogicalIntRegs) {
DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n",
(int)arch_reg, renamed_reg);
intRenameMap[arch_reg].physical_reg = renamed_reg;
} else if (arch_reg < numLogicalIntRegs + numLogicalFloatRegs) {
-
-
DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n",
(int)arch_reg - numLogicalIntRegs, renamed_reg);
floatRenameMap[arch_reg].physical_reg = renamed_reg;
}
-
- //assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs));
-}
-
-void
-SimpleRenameMap::squash(vector<RegIndex> freed_regs,
- vector<UnmapInfo> unmaps)
-{
- panic("Not sure this function should be called.");
-
- // Not sure the rename map should be able to access the free list
- // like this.
- while (!freed_regs.empty()) {
- RegIndex free_register = freed_regs.back();
-
- if (free_register < numPhysicalIntRegs) {
- freeList->addIntReg(free_register);
- } else {
- // Subtract off the base FP dependence tag.
- free_register = free_register - numPhysicalIntRegs;
- freeList->addFloatReg(free_register);
- }
-
- freed_regs.pop_back();
- }
-
- // Take unmap info and roll back the rename map.
}
int
diff --git a/cpu/o3/rename_map.hh b/cpu/o3/rename_map.hh
index 3ecbe45c3..d7e49ae83 100644
--- a/cpu/o3/rename_map.hh
+++ b/cpu/o3/rename_map.hh
@@ -101,9 +101,6 @@ class SimpleRenameMap
*/
void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg);
- void squash(std::vector<RegIndex> freed_regs,
- std::vector<UnmapInfo> unmaps);
-
int numFreeEntries();
private:
@@ -153,7 +150,7 @@ class SimpleRenameMap
};
//Change this to private
- public:
+ private:
/** Integer rename map. */
std::vector<RenameEntry> intRenameMap;
diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh
index 0748850ea..e05eebe5a 100644
--- a/cpu/o3/rob.hh
+++ b/cpu/o3/rob.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -53,9 +53,7 @@ class ROB
enum Status {
Running,
Idle,
- ROBSquashing,
- DcacheMissStall,
- DcacheMissComplete
+ ROBSquashing
};
/** SMT ROB Sharing Policy */
@@ -112,7 +110,7 @@ class ROB
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the head of the ROB.
*/
- DynInstPtr readHeadInst();
+// DynInstPtr readHeadInst();
/** Returns a pointer to the head instruction of a specific thread within
* the ROB.
@@ -124,7 +122,7 @@ class ROB
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the tail of the ROB.
*/
- DynInstPtr readTailInst();
+// DynInstPtr readTailInst();
/** Returns a pointer to the tail instruction of a specific thread within
* the ROB.
@@ -133,7 +131,7 @@ class ROB
DynInstPtr readTailInst(unsigned tid);
/** Retires the head instruction, removing it from the ROB. */
- void retireHead();
+// void retireHead();
/** Retires the head instruction of a specific thread, removing it from the
* ROB.
@@ -141,7 +139,7 @@ class ROB
void retireHead(unsigned tid);
/** Is the oldest instruction across all threads ready. */
- bool isHeadReady();
+// bool isHeadReady();
/** Is the oldest instruction across a particular thread ready. */
bool isHeadReady(unsigned tid);
@@ -200,35 +198,35 @@ class ROB
void updateTail();
/** Reads the PC of the oldest head instruction. */
- uint64_t readHeadPC();
+// uint64_t readHeadPC();
/** Reads the PC of the head instruction of a specific thread. */
- uint64_t readHeadPC(unsigned tid);
+// uint64_t readHeadPC(unsigned tid);
/** Reads the next PC of the oldest head instruction. */
- uint64_t readHeadNextPC();
+// uint64_t readHeadNextPC();
/** Reads the next PC of the head instruction of a specific thread. */
- uint64_t readHeadNextPC(unsigned tid);
+// uint64_t readHeadNextPC(unsigned tid);
/** Reads the sequence number of the oldest head instruction. */
- InstSeqNum readHeadSeqNum();
+// InstSeqNum readHeadSeqNum();
/** Reads the sequence number of the head instruction of a specific thread.
*/
- InstSeqNum readHeadSeqNum(unsigned tid);
+// InstSeqNum readHeadSeqNum(unsigned tid);
/** Reads the PC of the youngest tail instruction. */
- uint64_t readTailPC();
+// uint64_t readTailPC();
/** Reads the PC of the tail instruction of a specific thread. */
- uint64_t readTailPC(unsigned tid);
+// uint64_t readTailPC(unsigned tid);
/** Reads the sequence number of the youngest tail instruction. */
- InstSeqNum readTailSeqNum();
+// InstSeqNum readTailSeqNum();
/** Reads the sequence number of tail instruction of a specific thread. */
- InstSeqNum readTailSeqNum(unsigned tid);
+// InstSeqNum readTailSeqNum(unsigned tid);
/** Checks if the ROB is still in the process of squashing instructions.
* @retval Whether or not the ROB is done squashing.
diff --git a/cpu/o3/rob_impl.hh b/cpu/o3/rob_impl.hh
index 02a4bfbee..25e0c80fd 100644
--- a/cpu/o3/rob_impl.hh
+++ b/cpu/o3/rob_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -201,20 +201,15 @@ template <class Impl>
void
ROB<Impl>::insertInst(DynInstPtr &inst)
{
- // Make sure we have the right number of instructions.
//assert(numInstsInROB == countInsts());
-
- // Make sure the instruction is valid.
assert(inst);
DPRINTF(ROB, "Adding inst PC %#x to the ROB.\n", inst->readPC());
- // If the ROB is full then exit.
assert(numInstsInROB != numEntries);
int tid = inst->threadNumber;
- // Place into ROB
instList[tid].push_back(inst);
//Set Up head iterator if this is the 1st instruction in the ROB
@@ -228,10 +223,8 @@ ROB<Impl>::insertInst(DynInstPtr &inst)
tail = instList[tid].end();
tail--;
- // Mark as set in ROB
inst->setInROB();
- // Increment ROB count
++numInstsInROB;
++threadEntries[tid];
@@ -242,6 +235,7 @@ ROB<Impl>::insertInst(DynInstPtr &inst)
// Whatever calls this function needs to ensure that it properly frees up
// registers prior to this function.
+/*
template <class Impl>
void
ROB<Impl>::retireHead()
@@ -249,7 +243,6 @@ ROB<Impl>::retireHead()
//assert(numInstsInROB == countInsts());
assert(numInstsInROB > 0);
- // Get the head ROB instruction's TID.
int tid = (*head)->threadNumber;
retireHead(tid);
@@ -258,6 +251,7 @@ ROB<Impl>::retireHead()
tail = instList[tid].end();
}
}
+*/
template <class Impl>
void
@@ -271,18 +265,15 @@ ROB<Impl>::retireHead(unsigned tid)
DynInstPtr head_inst = (*head_it);
- // Make certain this can retire.
assert(head_inst->readyToCommit());
DPRINTF(ROB, "[tid:%u]: Retiring head instruction, "
"instruction PC %#x,[sn:%lli]\n", tid, head_inst->readPC(),
head_inst->seqNum);
- // Keep track of how many instructions are in the ROB.
--numInstsInROB;
--threadEntries[tid];
- //Mark DynInstFlags
head_inst->removeInROB();
head_inst->setCommitted();
@@ -291,12 +282,12 @@ ROB<Impl>::retireHead(unsigned tid)
//Update "Global" Head of ROB
updateHead();
- // A special case is needed if the instruction being retired is the
- // only instruction in the ROB; otherwise the tail iterator will become
- // invalidated.
+ // @todo: A special case is needed if the instruction being
+ // retired is the only instruction in the ROB; otherwise the tail
+ // iterator will become invalidated.
cpu->removeFrontInst(head_inst);
}
-
+/*
template <class Impl>
bool
ROB<Impl>::isHeadReady()
@@ -307,7 +298,7 @@ ROB<Impl>::isHeadReady()
return false;
}
-
+*/
template <class Impl>
bool
ROB<Impl>::isHeadReady(unsigned tid)
@@ -537,7 +528,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid)
doSquash(tid);
}
}
-
+/*
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::readHeadInst()
@@ -549,7 +540,7 @@ ROB<Impl>::readHeadInst()
return dummyInst;
}
}
-
+*/
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::readHeadInst(unsigned tid)
@@ -564,7 +555,7 @@ ROB<Impl>::readHeadInst(unsigned tid)
return dummyInst;
}
}
-
+/*
template <class Impl>
uint64_t
ROB<Impl>::readHeadPC()
@@ -608,7 +599,6 @@ ROB<Impl>::readHeadNextPC(unsigned tid)
return (*head_thread)->readNextPC();
}
-
template <class Impl>
InstSeqNum
ROB<Impl>::readHeadSeqNum()
@@ -637,7 +627,7 @@ ROB<Impl>::readTailInst()
return (*tail);
}
-
+*/
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::readTailInst(unsigned tid)
@@ -650,7 +640,7 @@ ROB<Impl>::readTailInst(unsigned tid)
return *tail_thread;
}
-
+/*
template <class Impl>
uint64_t
ROB<Impl>::readTailPC()
@@ -698,4 +688,4 @@ ROB<Impl>::readTailSeqNum(unsigned tid)
return (*tail_thread)->seqNum;
}
-
+*/
diff --git a/cpu/o3/scoreboard.cc b/cpu/o3/scoreboard.cc
index 87b0aee94..b0e433620 100644
--- a/cpu/o3/scoreboard.cc
+++ b/cpu/o3/scoreboard.cc
@@ -99,6 +99,7 @@ Scoreboard::unsetReg(PhysRegIndex ready_reg)
if (ready_reg == zeroRegIdx ||
ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
// Don't do anything if int or fp zero reg.
+ return;
}
regScoreBoard[ready_reg] = 0;
diff --git a/cpu/o3/store_set.cc b/cpu/o3/store_set.cc
index a685646f3..0c957c8c7 100644
--- a/cpu/o3/store_set.cc
+++ b/cpu/o3/store_set.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -278,11 +278,6 @@ StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store)
void
StoreSet::squash(InstSeqNum squashed_num, unsigned tid)
{
- // Not really sure how to do this well.
- // Generally this is small enough that it should be okay; short circuit
- // evaluation should take care of invalid entries.
- // Maybe keep a list of valid LFST's? Really ugly either way...
-
DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n",
squashed_num);
diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh
index 17719bdeb..2c9788e4b 100644
--- a/cpu/o3/thread_state.hh
+++ b/cpu/o3/thread_state.hh
@@ -1,3 +1,30 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
#ifndef __CPU_O3_THREAD_STATE_HH__
#define __CPU_O3_THREAD_STATE_HH__
@@ -15,27 +42,17 @@ class EndQuiesceEvent;
class FunctionProfile;
class ProfileNode;
#else
-class Process;
class FunctionalMemory;
+class Process;
#endif
-// In the new CPU case this may be quite small...It depends on what I define
-// ThreadState to be. Currently it's only the state that exists within
-// ExecContext basically. Leaves the interface and manipulation up to the
-// CPU. Not sure this is useful/flexible...probably can be if I can avoid
-// including state here that parts of the pipeline can't modify directly,
-// or at least don't let them. The only problem is for state that's needed
-// per thread, per structure. I.e. rename table, memreqs.
-// On the other hand, it might be nice to not have to pay the extra pointer
-// lookup to get frequently used state such as a memreq (that isn't used much
-// elsewhere)...
-
-// Maybe this ozone thread state should only really have committed state?
-// I need to think about why I'm using this and what it's useful for. Clearly
-// has benefits for SMT; basically serves same use as CPUExecContext.
-// Makes the ExecContext proxy easier. Gives organization/central access point
-// to state of a thread that can be accessed normally (i.e. not in-flight
-// stuff within a OoO processor). Does this need an XC proxy within it?
+/**
+ * Class that has various thread state, such as the status, the
+ * current instruction being processed, whether or not the thread has
+ * a trap pending or is being externally updated, the ExecContext
+ * proxy pointer, etc. It also handles anything related to a specific
+ * thread's process, such as syscalls and checking valid addresses.
+ */
template <class Impl>
struct O3ThreadState : public ThreadState {
typedef ExecContext::Status Status;
@@ -43,7 +60,7 @@ struct O3ThreadState : public ThreadState {
Status _status;
- // Current instruction?
+ // Current instruction
TheISA::MachInst inst;
private:
FullCPU *cpu;
@@ -80,51 +97,11 @@ struct O3ThreadState : public ThreadState {
void setStatus(Status new_status) { _status = new_status; }
#if !FULL_SYSTEM
-
- Fault dummyTranslation(MemReqPtr &req)
- {
-#if 0
- assert((req->vaddr >> 48 & 0xffff) == 0);
-#endif
-
- // put the asid in the upper 16 bits of the paddr
- req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
- req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
- return NoFault;
- }
- Fault translateInstReq(MemReqPtr &req)
- {
- return dummyTranslation(req);
- }
- Fault translateDataReadReq(MemReqPtr &req)
- {
- return dummyTranslation(req);
- }
- Fault translateDataWriteReq(MemReqPtr &req)
- {
- return dummyTranslation(req);
- }
-
bool validInstAddr(Addr addr)
{ return process->validInstAddr(addr); }
bool validDataAddr(Addr addr)
{ return process->validDataAddr(addr); }
-#else
- Fault translateInstReq(MemReqPtr &req)
- {
- return cpu->itb->translate(req);
- }
-
- Fault translateDataReadReq(MemReqPtr &req)
- {
- return cpu->dtb->translate(req, false);
- }
-
- Fault translateDataWriteReq(MemReqPtr &req)
- {
- return cpu->dtb->translate(req, true);
- }
#endif
bool misspeculating() { return false; }