summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2006-08-02 12:05:34 -0400
committerKevin Lim <ktlim@umich.edu>2006-08-02 12:05:34 -0400
commitcbfbb7bc56630ddefb95625a6da87b3c1da9599d (patch)
tree3abd77fddcc27cba0ac492368d6b3b37538857a8
parent8d220c5c1024bc80c4f1365bc4ef542480acaac5 (diff)
downloadgem5-cbfbb7bc56630ddefb95625a6da87b3c1da9599d.tar.xz
Updates to bring CPU portion of m5 up-to-date with newmem.
--HG-- extra : convert_revision : 00e6eefb24e6ffd9c7c5d8165db26fbf6199fdc4
-rw-r--r--base/timebuf.hh5
-rw-r--r--cpu/base_dyn_inst.cc35
-rw-r--r--cpu/base_dyn_inst.hh193
-rw-r--r--cpu/o3/alpha_cpu_builder.cc33
-rw-r--r--cpu/o3/alpha_cpu_impl.hh2
-rw-r--r--cpu/o3/alpha_params.hh14
-rw-r--r--cpu/o3/commit.hh13
-rw-r--r--cpu/o3/commit_impl.hh62
-rw-r--r--cpu/o3/cpu.cc66
-rw-r--r--cpu/o3/cpu.hh6
-rw-r--r--cpu/o3/decode_impl.hh9
-rw-r--r--cpu/o3/fetch.hh6
-rw-r--r--cpu/o3/fetch_impl.hh19
-rw-r--r--cpu/o3/iew.hh107
-rw-r--r--cpu/o3/iew_impl.hh157
-rw-r--r--cpu/o3/inst_queue.hh2
-rw-r--r--cpu/o3/inst_queue_impl.hh23
-rw-r--r--cpu/o3/lsq.hh3
-rw-r--r--cpu/o3/lsq_impl.hh10
-rw-r--r--cpu/o3/lsq_unit.hh53
-rw-r--r--cpu/o3/lsq_unit_impl.hh47
-rw-r--r--cpu/o3/mem_dep_unit.cc2
-rw-r--r--cpu/o3/mem_dep_unit_impl.hh14
-rw-r--r--cpu/o3/regfile.hh16
-rw-r--r--cpu/o3/rename_impl.hh4
-rw-r--r--cpu/o3/rob.hh2
-rw-r--r--cpu/o3/rob_impl.hh14
-rw-r--r--cpu/ozone/cpu.hh45
-rw-r--r--cpu/ozone/cpu_impl.hh53
-rw-r--r--cpu/ozone/inorder_back_end_impl.hh2
-rw-r--r--cpu/ozone/inst_queue_impl.hh8
-rw-r--r--cpu/ozone/lw_back_end.hh78
-rw-r--r--cpu/ozone/lw_back_end_impl.hh138
-rw-r--r--cpu/ozone/lw_lsq.hh2
-rw-r--r--cpu/ozone/thread_state.hh2
-rw-r--r--cpu/thread_state.hh1
-rw-r--r--python/m5/objects/AlphaFullCPU.py13
37 files changed, 685 insertions, 574 deletions
diff --git a/base/timebuf.hh b/base/timebuf.hh
index f6b5b2781..db34528d8 100644
--- a/base/timebuf.hh
+++ b/base/timebuf.hh
@@ -212,6 +212,11 @@ class TimeBuffer
{
return wire(this, 0);
}
+
+ int getSize()
+ {
+ return size;
+ }
};
#endif // __BASE_TIMEBUF_HH__
diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc
index 64a995689..1a52279cc 100644
--- a/cpu/base_dyn_inst.cc
+++ b/cpu/base_dyn_inst.cc
@@ -100,32 +100,15 @@ BaseDynInst<Impl>::initVars()
readyRegs = 0;
- completed = false;
- resultReady = false;
- canIssue = false;
- issued = false;
- executed = false;
- canCommit = false;
- committed = false;
- squashed = false;
- squashedInIQ = false;
- squashedInLSQ = false;
- squashedInROB = false;
+ instResult.integer = 0;
+
+ status.reset();
+
eaCalcDone = false;
memOpDone = false;
+
lqIdx = -1;
sqIdx = -1;
- reachedCommit = false;
-
- blockingInst = false;
- recoverInst = false;
-
- iqEntry = false;
- robEntry = false;
-
- serializeBefore = false;
- serializeAfter = false;
- serializeHandled = false;
// Eventually make this a parameter.
threadNumber = 0;
@@ -395,7 +378,7 @@ void
BaseDynInst<Impl>::markSrcRegReady()
{
if (++readyRegs == numSrcRegs()) {
- canIssue = true;
+ status.set(CanIssue);
}
}
@@ -403,13 +386,9 @@ template <class Impl>
void
BaseDynInst<Impl>::markSrcRegReady(RegIndex src_idx)
{
- ++readyRegs;
-
_readySrcRegIdx[src_idx] = true;
- if (readyRegs == numSrcRegs()) {
- canIssue = true;
- }
+ markSrcRegReady();
}
template <class Impl>
diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh
index 388ea4a8d..01f6be185 100644
--- a/cpu/base_dyn_inst.hh
+++ b/cpu/base_dyn_inst.hh
@@ -127,56 +127,34 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** The sequence number of the instruction. */
InstSeqNum seqNum;
- /** Is the instruction in the IQ */
- bool iqEntry;
-
- /** Is the instruction in the ROB */
- bool robEntry;
-
- /** Is the instruction in the LSQ */
- bool lsqEntry;
-
- /** Is the instruction completed. */
- bool completed;
-
- /** Is the instruction's result ready. */
- bool resultReady;
-
- /** Can this instruction issue. */
- bool canIssue;
-
- /** Has this instruction issued. */
- bool issued;
-
- /** Has this instruction executed (or made it through execute) yet. */
- bool executed;
-
- /** Can this instruction commit. */
- bool canCommit;
-
- /** Is this instruction committed. */
- bool committed;
-
- /** Is this instruction squashed. */
- bool squashed;
-
- /** Is this instruction squashed in the instruction queue. */
- bool squashedInIQ;
-
- /** Is this instruction squashed in the instruction queue. */
- bool squashedInLSQ;
-
- /** Is this instruction squashed in the instruction queue. */
- bool squashedInROB;
-
- /** Is this a recover instruction. */
- bool recoverInst;
-
- /** Is this a thread blocking instruction. */
- bool blockingInst; /* this inst has called thread_block() */
+ enum Status {
+ IqEntry, /// Instruction is in the IQ
+ RobEntry, /// Instruction is in the ROB
+ LsqEntry, /// Instruction is in the LSQ
+ Completed, /// Instruction has completed
+ ResultReady, /// Instruction has its result
+ CanIssue, /// Instruction can issue and execute
+ Issued, /// Instruction has issued
+ Executed, /// Instruction has executed
+ CanCommit, /// Instruction can commit
+ AtCommit, /// Instruction has reached commit
+ Committed, /// Instruction has committed
+ Squashed, /// Instruction is squashed
+ SquashedInIQ, /// Instruction is squashed in the IQ
+ SquashedInLSQ, /// Instruction is squashed in the LSQ
+ SquashedInROB, /// Instruction is squashed in the ROB
+ RecoverInst, /// Is a recover instruction
+ BlockingInst, /// Is a blocking instruction
+ ThreadsyncWait, /// Is a thread synchronization instruction
+ SerializeBefore, /// Needs to serialize on
+ /// instructions ahead of it
+ SerializeAfter, /// Needs to serialize instructions behind it
+ SerializeHandled, /// Serialization has been handled
+ NumStatus
+ };
- /** Is this a thread syncrhonization instruction. */
- bool threadsyncWait;
+ /** The status of this BaseDynInst. Several bits can be set. */
+ std::bitset<NumStatus> status;
/** The thread this instruction is from. */
short threadNumber;
@@ -351,9 +329,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
bool isThreadSync() const { return staticInst->isThreadSync(); }
bool isSerializing() const { return staticInst->isSerializing(); }
bool isSerializeBefore() const
- { return staticInst->isSerializeBefore() || serializeBefore; }
+ { return staticInst->isSerializeBefore() || status[SerializeBefore]; }
bool isSerializeAfter() const
- { return staticInst->isSerializeAfter() || serializeAfter; }
+ { return staticInst->isSerializeAfter() || status[SerializeAfter]; }
bool isMemBarrier() const { return staticInst->isMemBarrier(); }
bool isWriteBarrier() const { return staticInst->isWriteBarrier(); }
bool isNonSpeculative() const { return staticInst->isNonSpeculative(); }
@@ -362,41 +340,32 @@ class BaseDynInst : public FastAlloc, public RefCounted
bool isUnverifiable() const { return staticInst->isUnverifiable(); }
/** Temporarily sets this instruction as a serialize before instruction. */
- void setSerializeBefore() { serializeBefore = true; }
+ void setSerializeBefore() { status.set(SerializeBefore); }
/** Clears the serializeBefore part of this instruction. */
- void clearSerializeBefore() { serializeBefore = false; }
+ void clearSerializeBefore() { status.reset(SerializeBefore); }
/** Checks if this serializeBefore is only temporarily set. */
- bool isTempSerializeBefore() { return serializeBefore; }
-
- /** Tracks if instruction has been externally set as serializeBefore. */
- bool serializeBefore;
+ bool isTempSerializeBefore() { return status[SerializeBefore]; }
/** Temporarily sets this instruction as a serialize after instruction. */
- void setSerializeAfter() { serializeAfter = true; }
+ void setSerializeAfter() { status.set(SerializeAfter); }
/** Clears the serializeAfter part of this instruction.*/
- void clearSerializeAfter() { serializeAfter = false; }
+ void clearSerializeAfter() { status.reset(SerializeAfter); }
/** Checks if this serializeAfter is only temporarily set. */
- bool isTempSerializeAfter() { return serializeAfter; }
+ bool isTempSerializeAfter() { return status[SerializeAfter]; }
- /** Tracks if instruction has been externally set as serializeAfter. */
- bool serializeAfter;
+ /** Sets the serialization part of this instruction as handled. */
+ void setSerializeHandled() { status.set(SerializeHandled); }
/** Checks if the serialization part of this instruction has been
* handled. This does not apply to the temporary serializing
* state; it only applies to this instruction's own permanent
* serializing state.
*/
- bool isSerializeHandled() { return serializeHandled; }
-
- /** Sets the serialization part of this instruction as handled. */
- void setSerializeHandled() { serializeHandled = true; }
-
- /** Whether or not the serialization of this instruction has been handled. */
- bool serializeHandled;
+ bool isSerializeHandled() { return status[SerializeHandled]; }
/** Returns the opclass of this instruction. */
OpClass opClass() const { return staticInst->opClass(); }
@@ -463,106 +432,112 @@ class BaseDynInst : public FastAlloc, public RefCounted
}
/** Sets this instruction as completed. */
- void setCompleted() { completed = true; }
+ void setCompleted() { status.set(Completed); }
/** Returns whether or not this instruction is completed. */
- bool isCompleted() const { return completed; }
+ bool isCompleted() const { return status[Completed]; }
- void setResultReady() { resultReady = true; }
+ /** Marks the result as ready. */
+ void setResultReady() { status.set(ResultReady); }
- bool isResultReady() const { return resultReady; }
+ /** Returns whether or not the result is ready. */
+ bool isResultReady() const { return status[ResultReady]; }
/** Sets this instruction as ready to issue. */
- void setCanIssue() { canIssue = true; }
+ void setCanIssue() { status.set(CanIssue); }
/** Returns whether or not this instruction is ready to issue. */
- bool readyToIssue() const { return canIssue; }
+ bool readyToIssue() const { return status[CanIssue]; }
/** Sets this instruction as issued from the IQ. */
- void setIssued() { issued = true; }
+ void setIssued() { status.set(Issued); }
/** Returns whether or not this instruction has issued. */
- bool isIssued() const { return issued; }
+ bool isIssued() const { return status[Issued]; }
/** Sets this instruction as executed. */
- void setExecuted() { executed = true; }
+ void setExecuted() { status.set(Executed); }
/** Returns whether or not this instruction has executed. */
- bool isExecuted() const { return executed; }
+ bool isExecuted() const { return status[Executed]; }
/** Sets this instruction as ready to commit. */
- void setCanCommit() { canCommit = true; }
+ void setCanCommit() { status.set(CanCommit); }
/** Clears this instruction as being ready to commit. */
- void clearCanCommit() { canCommit = false; }
+ void clearCanCommit() { status.reset(CanCommit); }
/** Returns whether or not this instruction is ready to commit. */
- bool readyToCommit() const { return canCommit; }
+ bool readyToCommit() const { return status[CanCommit]; }
+
+ void setAtCommit() { status.set(AtCommit); }
+
+ bool isAtCommit() { return status[AtCommit]; }
/** Sets this instruction as committed. */
- void setCommitted() { committed = true; }
+ void setCommitted() { status.set(Committed); }
/** Returns whether or not this instruction is committed. */
- bool isCommitted() const { return committed; }
+ bool isCommitted() const { return status[Committed]; }
/** Sets this instruction as squashed. */
- void setSquashed() { squashed = true; }
+ void setSquashed() { status.set(Squashed); }
/** Returns whether or not this instruction is squashed. */
- bool isSquashed() const { return squashed; }
+ bool isSquashed() const { return status[Squashed]; }
//Instruction Queue Entry
//-----------------------
/** Sets this instruction as a entry the IQ. */
- void setInIQ() { iqEntry = true; }
+ void setInIQ() { status.set(IqEntry); }
/** Sets this instruction as a entry the IQ. */
- void removeInIQ() { iqEntry = false; }
+ void clearInIQ() { status.reset(IqEntry); }
+
+ /** Returns whether or not this instruction has issued. */
+ bool isInIQ() const { return status[IqEntry]; }
/** Sets this instruction as squashed in the IQ. */
- void setSquashedInIQ() { squashedInIQ = true; squashed = true;}
+ void setSquashedInIQ() { status.set(SquashedInIQ); status.set(Squashed);}
/** Returns whether or not this instruction is squashed in the IQ. */
- bool isSquashedInIQ() const { return squashedInIQ; }
-
- /** Returns whether or not this instruction has issued. */
- bool isInIQ() const { return iqEntry; }
+ bool isSquashedInIQ() const { return status[SquashedInIQ]; }
//Load / Store Queue Functions
//-----------------------
/** Sets this instruction as a entry the LSQ. */
- void setInLSQ() { lsqEntry = true; }
+ void setInLSQ() { status.set(LsqEntry); }
/** Sets this instruction as a entry the LSQ. */
- void removeInLSQ() { lsqEntry = false; }
+ void removeInLSQ() { status.reset(LsqEntry); }
+
+ /** Returns whether or not this instruction is in the LSQ. */
+ bool isInLSQ() const { return status[LsqEntry]; }
/** Sets this instruction as squashed in the LSQ. */
- void setSquashedInLSQ() { squashedInLSQ = true;}
+ void setSquashedInLSQ() { status.set(SquashedInLSQ);}
/** Returns whether or not this instruction is squashed in the LSQ. */
- bool isSquashedInLSQ() const { return squashedInLSQ; }
-
- /** Returns whether or not this instruction is in the LSQ. */
- bool isInLSQ() const { return lsqEntry; }
+ bool isSquashedInLSQ() const { return status[SquashedInLSQ]; }
//Reorder Buffer Functions
//-----------------------
/** Sets this instruction as a entry the ROB. */
- void setInROB() { robEntry = true; }
+ void setInROB() { status.set(RobEntry); }
/** Sets this instruction as a entry the ROB. */
- void removeInROB() { robEntry = false; }
+ void clearInROB() { status.reset(RobEntry); }
+
+ /** Returns whether or not this instruction is in the ROB. */
+ bool isInROB() const { return status[RobEntry]; }
/** Sets this instruction as squashed in the ROB. */
- void setSquashedInROB() { squashedInROB = true; }
+ void setSquashedInROB() { status.set(SquashedInROB); }
/** Returns whether or not this instruction is squashed in the ROB. */
- bool isSquashedInROB() const { return squashedInROB; }
-
- /** Returns whether or not this instruction is in the ROB. */
- bool isInROB() const { return robEntry; }
+ bool isSquashedInROB() const { return status[SquashedInROB]; }
/** Read the PC of this instruction. */
const Addr readPC() const { return PC; }
@@ -619,8 +594,6 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Store queue index. */
int16_t sqIdx;
- bool reachedCommit;
-
/** Iterator pointing to this BaseDynInst in the list of all insts. */
ListIt instListIt;
@@ -636,7 +609,7 @@ template<class T>
inline Fault
BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
{
- if (executed) {
+ if (status[Executed]) {
fault = cpu->read(req, data, lqIdx);
return fault;
}
diff --git a/cpu/o3/alpha_cpu_builder.cc b/cpu/o3/alpha_cpu_builder.cc
index 08d42cd46..c563fbef3 100644
--- a/cpu/o3/alpha_cpu_builder.cc
+++ b/cpu/o3/alpha_cpu_builder.cc
@@ -94,12 +94,10 @@ Param<unsigned> renameWidth;
Param<unsigned> commitToIEWDelay;
Param<unsigned> renameToIEWDelay;
Param<unsigned> issueToExecuteDelay;
+Param<unsigned> dispatchWidth;
Param<unsigned> issueWidth;
-Param<unsigned> executeWidth;
-Param<unsigned> executeIntWidth;
-Param<unsigned> executeFloatWidth;
-Param<unsigned> executeBranchWidth;
-Param<unsigned> executeMemoryWidth;
+Param<unsigned> wbWidth;
+Param<unsigned> wbDepth;
SimObjectParam<FUPool *> fuPool;
Param<unsigned> iewToCommitDelay;
@@ -109,6 +107,9 @@ Param<unsigned> squashWidth;
Param<Tick> trapLatency;
Param<Tick> fetchTrapLatency;
+Param<unsigned> backComSize;
+Param<unsigned> forwardComSize;
+
Param<std::string> predType;
Param<unsigned> localPredictorSize;
Param<unsigned> localCtrBits;
@@ -219,12 +220,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
"Issue/Execute/Writeback delay"),
INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
"to the IEW stage)"),
+ INIT_PARAM(dispatchWidth, "Dispatch width"),
INIT_PARAM(issueWidth, "Issue width"),
- INIT_PARAM(executeWidth, "Execute width"),
- INIT_PARAM(executeIntWidth, "Integer execute width"),
- INIT_PARAM(executeFloatWidth, "Floating point execute width"),
- INIT_PARAM(executeBranchWidth, "Branch execute width"),
- INIT_PARAM(executeMemoryWidth, "Memory execute width"),
+ INIT_PARAM(wbWidth, "Writeback width"),
+ INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"),
INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL),
INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
@@ -235,6 +234,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
+ INIT_PARAM(backComSize, "Time buffer size for backwards communication"),
+ INIT_PARAM(forwardComSize, "Time buffer size for forward communication"),
+
INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
INIT_PARAM(localPredictorSize, "Size of local predictor"),
INIT_PARAM(localCtrBits, "Bits per counter"),
@@ -353,12 +355,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
params->commitToIEWDelay = commitToIEWDelay;
params->renameToIEWDelay = renameToIEWDelay;
params->issueToExecuteDelay = issueToExecuteDelay;
+ params->dispatchWidth = dispatchWidth;
params->issueWidth = issueWidth;
- params->executeWidth = executeWidth;
- params->executeIntWidth = executeIntWidth;
- params->executeFloatWidth = executeFloatWidth;
- params->executeBranchWidth = executeBranchWidth;
- params->executeMemoryWidth = executeMemoryWidth;
+ params->wbWidth = wbWidth;
+ params->wbDepth = wbDepth;
params->fuPool = fuPool;
params->iewToCommitDelay = iewToCommitDelay;
@@ -368,6 +368,9 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
params->trapLatency = trapLatency;
params->fetchTrapLatency = fetchTrapLatency;
+ params->backComSize = backComSize;
+ params->forwardComSize = forwardComSize;
+
params->predType = predType;
params->localPredictorSize = localPredictorSize;
params->localCtrBits = localCtrBits;
diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh
index f39fdf6b6..1bf0652cd 100644
--- a/cpu/o3/alpha_cpu_impl.hh
+++ b/cpu/o3/alpha_cpu_impl.hh
@@ -383,7 +383,7 @@ AlphaFullCPU<Impl>::AlphaXC::copyArchRegs(ExecContext *xc)
}
// Copy the misc regs.
- cpu->regFile.miscRegs[tid].copyMiscRegs(xc);
+ TheISA::copyMiscRegs(xc, this);
// Then finally set the PC and the next PC.
cpu->setPC(xc->readPC(), tid);
diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh
index f0836a9fd..4ab130d02 100644
--- a/cpu/o3/alpha_params.hh
+++ b/cpu/o3/alpha_params.hh
@@ -106,12 +106,10 @@ class AlphaSimpleParams : public BaseFullCPU::Params
unsigned commitToIEWDelay;
unsigned renameToIEWDelay;
unsigned issueToExecuteDelay;
+ unsigned dispatchWidth;
unsigned issueWidth;
- unsigned executeWidth;
- unsigned executeIntWidth;
- unsigned executeFloatWidth;
- unsigned executeBranchWidth;
- unsigned executeMemoryWidth;
+ unsigned wbWidth;
+ unsigned wbDepth;
FUPool *fuPool;
//
@@ -125,6 +123,12 @@ class AlphaSimpleParams : public BaseFullCPU::Params
Tick fetchTrapLatency;
//
+ // Timebuffer sizes
+ //
+ unsigned backComSize;
+ unsigned forwardComSize;
+
+ //
// Branch predictor (BP, BTB, RAS)
//
std::string predType;
diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh
index d93822394..b153effc4 100644
--- a/cpu/o3/commit.hh
+++ b/cpu/o3/commit.hh
@@ -160,10 +160,6 @@ class DefaultCommit
/** Sets the pointer to the queue coming from IEW. */
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
- void setFetchStage(Fetch *fetch_stage);
-
- Fetch *fetchStage;
-
/** Sets the pointer to the IEW stage. */
void setIEWStage(IEW *iew_stage);
@@ -367,11 +363,6 @@ class DefaultCommit
*/
unsigned renameWidth;
- /** IEW width, in instructions. Used so ROB knows how many
- * instructions to get from the IEW instruction queue.
- */
- unsigned iewWidth;
-
/** Commit width, in instructions. */
unsigned commitWidth;
@@ -392,10 +383,6 @@ class DefaultCommit
*/
Tick trapLatency;
- Tick fetchTrapLatency;
-
- Tick fetchFaultTick;
-
/** The commit PC of each thread. Refers to the instruction that
* is currently being processed/committed.
*/
diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh
index 798f30294..364e685c2 100644
--- a/cpu/o3/commit_impl.hh
+++ b/cpu/o3/commit_impl.hh
@@ -71,12 +71,10 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
renameToROBDelay(params->renameToROBDelay),
fetchToCommitDelay(params->commitToFetchDelay),
renameWidth(params->renameWidth),
- iewWidth(params->executeWidth),
commitWidth(params->commitWidth),
numThreads(params->numberOfThreads),
switchedOut(false),
- trapLatency(params->trapLatency),
- fetchTrapLatency(params->fetchTrapLatency)
+ trapLatency(params->trapLatency)
{
_status = Active;
_nextStatus = Inactive;
@@ -114,10 +112,8 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
changedROBNumEntries[i] = false;
trapSquash[i] = false;
xcSquash[i] = false;
+ PC[i] = nextPC[i] = 0;
}
-
- fetchFaultTick = 0;
- fetchTrapWait = 0;
}
template <class Impl>
@@ -240,7 +236,6 @@ DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr)
cpu->activateStage(FullCPU::CommitIdx);
trapLatency = cpu->cycles(trapLatency);
- fetchTrapLatency = cpu->cycles(fetchTrapLatency);
}
template <class Impl>
@@ -299,13 +294,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
template <class Impl>
void
-DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage)
-{
- fetchStage = fetch_stage;
-}
-
-template <class Impl>
-void
DefaultCommit<Impl>::setIEWStage(IEW *iew_stage)
{
iewStage = iew_stage;
@@ -431,7 +419,7 @@ DefaultCommit<Impl>::setNextStatus()
}
}
- assert(squashes == squashCounter);
+ squashCounter = squashes;
// If commit is currently squashing, then it will have activity for the
// next cycle. Set its next status as active.
@@ -536,8 +524,6 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid)
commitStatus[tid] = ROBSquashing;
cpu->activityThisCycle();
-
- ++squashCounter;
}
template <class Impl>
@@ -555,8 +541,6 @@ DefaultCommit<Impl>::squashFromXC(unsigned tid)
cpu->activityThisCycle();
xcSquash[tid] = false;
-
- ++squashCounter;
}
template <class Impl>
@@ -571,6 +555,9 @@ DefaultCommit<Impl>::tick()
return;
}
+ if ((*activeThreads).size() <=0)
+ return;
+
list<unsigned>::iterator threads = (*activeThreads).begin();
// Check if any of the threads are done squashing. Change the
@@ -582,10 +569,12 @@ DefaultCommit<Impl>::tick()
if (rob->isDoneSquashing(tid)) {
commitStatus[tid] = Running;
- --squashCounter;
} else {
DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any"
- "insts this cycle.\n", tid);
+ " insts this cycle.\n", tid);
+ rob->doSquash(tid);
+ toIEW->commitInfo[tid].robSquashing = true;
+ wroteToTimeBuffer = true;
}
}
}
@@ -691,29 +680,7 @@ DefaultCommit<Impl>::commit()
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
-/*
- if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
- // Record the fault. Wait until it's empty in the ROB.
- // Then handle the trap. Ignore it if there's already a
- // trap pending as fetch will be redirected.
- fetchFault = fromFetch->fetchFault;
- fetchFaultTick = curTick + fetchTrapLatency;
- commitStatus[0] = FetchTrapPending;
- DPRINTF(Commit, "Fault from fetch recorded. Will trap if the "
- "ROB empties without squashing the fault.\n");
- fetchTrapWait = 0;
- }
- // Fetch may tell commit to clear the trap if it's been squashed.
- if (fromFetch->clearFetchFault) {
- DPRINTF(Commit, "Received clear fetch fault signal\n");
- fetchTrapWait = 0;
- if (commitStatus[0] == FetchTrapPending) {
- DPRINTF(Commit, "Clearing fault from fetch\n");
- commitStatus[0] = Running;
- }
- }
-*/
// Not sure which one takes priority. I think if we have
// both, that's a bad sign.
if (trapSquash[tid] == true) {
@@ -741,8 +708,6 @@ DefaultCommit<Impl>::commit()
commitStatus[tid] = ROBSquashing;
- ++squashCounter;
-
// If we want to include the squashing instruction in the squash,
// then use one older sequence number.
InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
@@ -944,7 +909,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// and committed this instruction.
thread[tid]->funcExeInst--;
- head_inst->reachedCommit = true;
+ head_inst->setAtCommit();
if (head_inst->isNonSpeculative() ||
head_inst->isStoreConditional() ||
@@ -1060,7 +1025,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Generate trap squash event.
generateTrapEvent(tid);
-
+// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
return false;
#else // !FULL_SYSTEM
panic("fault (%d) detected @ PC %08p", inst_fault,
@@ -1083,6 +1048,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
head_inst->renamedDestRegIdx(i));
}
+ if (head_inst->isCopy())
+ panic("Should not commit any copy instructions!");
+
// Finally clear the head ROB entry.
rob->retireHead(tid);
diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc
index 8d72bdc41..f1571e61b 100644
--- a/cpu/o3/cpu.cc
+++ b/cpu/o3/cpu.cc
@@ -108,12 +108,14 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
// For now just have these time buffers be pretty big.
// @todo: Make these time buffer sizes parameters or derived
// from latencies
- timeBuffer(5, 5),
- fetchQueue(5, 5),
- decodeQueue(5, 5),
- renameQueue(5, 5),
- iewQueue(5, 5),
- activityRec(NumStages, 10, params->activity),
+ timeBuffer(params->backComSize, params->forwardComSize),
+ fetchQueue(params->backComSize, params->forwardComSize),
+ decodeQueue(params->backComSize, params->forwardComSize),
+ renameQueue(params->backComSize, params->forwardComSize),
+ iewQueue(params->backComSize, params->forwardComSize),
+ activityRec(NumStages,
+ params->backComSize + params->forwardComSize,
+ params->activity),
globalSeqNum(1),
@@ -180,7 +182,6 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
commit.setIEWQueue(&iewQueue);
commit.setRenameQueue(&renameQueue);
- commit.setFetchStage(&fetch);
commit.setIEWStage(&iew);
rename.setIEWStage(&iew);
rename.setCommitStage(&commit);
@@ -709,7 +710,7 @@ void
FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
// Flush out any old data from the time buffers.
- for (int i = 0; i < 10; ++i) {
+ for (int i = 0; i < timeBuffer.getSize(); ++i) {
timeBuffer.advance();
fetchQueue.advance();
decodeQueue.advance();
@@ -758,6 +759,46 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
tickEvent.schedule(curTick);
}
+/*
+template <class Impl>
+void
+FullO3CPU<Impl>::serialize(std::ostream &os)
+{
+ BaseCPU::serialize(os);
+ nameOut(os, csprintf("%s.tickEvent", name()));
+ tickEvent.serialize(os);
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // write out the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static SimpleThread temp;
+
+ for (int i = 0; i < thread.size(); i++) {
+ nameOut(os, csprintf("%s.xc.%i", name(), i));
+ temp.copyXC(thread[i]->getXC());
+ temp.serialize(os);
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
+{
+ BaseCPU::unserialize(cp, section);
+ tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // read in the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static SimpleThread temp;
+
+ for (int i = 0; i < thread.size(); i++) {
+ temp.copyXC(thread[i]->getXC());
+ temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
+ thread[i]->getXC()->copyArchRegs(temp.getXC());
+ }
+}
+*/
template <class Impl>
uint64_t
FullO3CPU<Impl>::readIntReg(int reg_idx)
@@ -866,7 +907,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegSingle(phys_reg, val);
}
@@ -875,7 +917,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegDouble(phys_reg, val);
}
@@ -884,7 +927,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegInt(phys_reg, val);
}
diff --git a/cpu/o3/cpu.hh b/cpu/o3/cpu.hh
index f4b19bfb3..ef5c9ae53 100644
--- a/cpu/o3/cpu.hh
+++ b/cpu/o3/cpu.hh
@@ -63,6 +63,12 @@ class BaseFullCPU : public BaseCPU
void regStats();
+ /** Sets this CPU's ID. */
+ void setCpuId(int id) { cpu_id = id; }
+
+ /** Reads this CPU's ID. */
+ int readCpuId() { return cpu_id; }
+
protected:
int cpu_id;
};
diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh
index 0b686375e..e1af4d423 100644
--- a/cpu/o3/decode_impl.hh
+++ b/cpu/o3/decode_impl.hh
@@ -278,7 +278,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].predIncorrect = true;
toFetch->decodeInfo[tid].squash = true;
- toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
+ toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
toFetch->decodeInfo[tid].branchTaken =
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
@@ -294,7 +294,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid &&
fromFetch->insts[i]->seqNum > inst->seqNum) {
- fromFetch->insts[i]->squashed = true;
+ fromFetch->insts[i]->setSquashed();
}
}
@@ -343,7 +343,7 @@ DefaultDecode<Impl>::squash(unsigned tid)
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid) {
- fromFetch->insts[i]->squashed = true;
+ fromFetch->insts[i]->setSquashed();
squash_count++;
}
}
@@ -721,9 +721,8 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Go ahead and compute any PC-relative branches.
if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
++decodeBranchResolved;
- inst->setNextPC(inst->branchTarget());
- if (inst->mispredicted()) {
+ if (inst->branchTarget() != inst->readPredTarg()) {
++decodeBranchMispred;
// Might want to set some sort of boolean and just do
diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh
index 92a87ab54..0bde56ce9 100644
--- a/cpu/o3/fetch.hh
+++ b/cpu/o3/fetch.hh
@@ -358,6 +358,12 @@ class DefaultFetch
/** The cache line being fetched. */
uint8_t *cacheData[Impl::MaxThreads];
+ /** The PC of the cacheline that has been loaded. */
+ Addr cacheDataPC[Impl::MaxThreads];
+
+ /** Whether or not the cache data is valid. */
+ bool cacheDataValid[Impl::MaxThreads];
+
/** Size of instructions. */
int instSize;
diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh
index a309bd49a..cc09c4a41 100644
--- a/cpu/o3/fetch_impl.hh
+++ b/cpu/o3/fetch_impl.hh
@@ -138,6 +138,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
// Create space to store a cache line.
cacheData[tid] = new uint8_t[cacheBlkSize];
+ cacheDataPC[tid] = 0;
+ cacheDataValid[tid] = false;
stalls[tid].decode = 0;
stalls[tid].rename = 0;
@@ -334,6 +336,7 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
// Wake up the CPU (if it went to sleep and was waiting on this completion
// event).
cpu->wakeCPU();
+ cacheDataValid[tid] = true;
DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
tid);
@@ -466,7 +469,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
- if (interruptPending && flags == 0 || switchedOut) {
+ if (interruptPending && flags == 0) {
// Hold off fetch from getting new instructions while an interrupt
// is pending.
return false;
@@ -475,6 +478,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// Align the fetch PC so it's at the start of a cache block.
fetch_PC = icacheBlockAlignPC(fetch_PC);
+ // If we've already got the block, no need to try to fetch it again.
+ if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
+ return true;
+ }
+
// Setup the memReq to do a read of the first instruction's address.
// Set the appropriate read size and flags as well.
memReq[tid] = new MemReq();
@@ -525,6 +533,9 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
MemAccessResult result = icacheInterface->access(memReq[tid]);
+ cacheDataPC[tid] = fetch_PC;
+ cacheDataValid[tid] = false;
+
fetchedCacheLines++;
// If the cache missed, then schedule an event to wake
@@ -1002,8 +1013,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetch_PC = next_PC;
if (instruction->isQuiesce()) {
- warn("%lli: Quiesce instruction encountered, halting fetch!",
- curTick);
+// warn("%lli: Quiesce instruction encountered, halting fetch!",
+// curTick);
fetchStatus[tid] = QuiescePending;
++numInst;
status_change = true;
@@ -1067,7 +1078,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetchStatus[tid] = TrapPending;
status_change = true;
- warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
+// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
#else // !FULL_SYSTEM
fatal("fault (%d) detected @ PC %08p", fault, PC[tid]);
#endif // FULL_SYSTEM
diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh
index eda6a6bc0..d21c573fe 100644
--- a/cpu/o3/iew.hh
+++ b/cpu/o3/iew.hh
@@ -224,6 +224,47 @@ class DefaultIEW
/** Returns if the LSQ has any stores to writeback. */
bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); }
+ void incrWb(InstSeqNum &sn)
+ {
+ if (++wbOutstanding == wbMax)
+ ableToIssue = false;
+ DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+ assert(wbOutstanding <= wbMax);
+#ifdef DEBUG
+ wbList.insert(sn);
+#endif
+ }
+
+ void decrWb(InstSeqNum &sn)
+ {
+ if (wbOutstanding-- == wbMax)
+ ableToIssue = true;
+ DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+ assert(wbOutstanding >= 0);
+#ifdef DEBUG
+ assert(wbList.find(sn) != wbList.end());
+ wbList.erase(sn);
+#endif
+ }
+
+#ifdef DEBUG
+ std::set<InstSeqNum> wbList;
+
+ void dumpWb()
+ {
+ std::set<InstSeqNum>::iterator wb_it = wbList.begin();
+ while (wb_it != wbList.end()) {
+ cprintf("[sn:%lli]\n",
+ (*wb_it));
+ wb_it++;
+ }
+ }
+#endif
+
+ bool canIssue() { return ableToIssue; }
+
+ bool ableToIssue;
+
private:
/** Sends commit proper information for a squash due to a branch
* mispredict.
@@ -281,6 +322,9 @@ class DefaultIEW
/** Processes inputs and changes state accordingly. */
void checkSignalsAndUpdate(unsigned tid);
+ /** Removes instructions from rename from a thread's instruction list. */
+ void emptyRenameInsts(unsigned tid);
+
/** Sorts instructions coming from rename into lists separated by thread. */
void sortInsts();
@@ -401,20 +445,12 @@ class DefaultIEW
*/
unsigned issueToExecuteDelay;
- /** Width of issue's read path, in instructions. The read path is both
- * the skid buffer and the rename instruction queue.
- * Note to self: is this really different than issueWidth?
- */
- unsigned issueReadWidth;
+ /** Width of dispatch, in instructions. */
+ unsigned dispatchWidth;
/** Width of issue, in instructions. */
unsigned issueWidth;
- /** Width of execute, in instructions. Might make more sense to break
- * down into FP vs int.
- */
- unsigned executeWidth;
-
/** Index into queue of instructions being written back. */
unsigned wbNumInst;
@@ -425,6 +461,17 @@ class DefaultIEW
*/
unsigned wbCycle;
+ /** Number of instructions in flight that will writeback. */
+ int wbOutstanding;
+
+ /** Writeback width. */
+ unsigned wbWidth;
+
+ /** Writeback width * writeback depth, where writeback depth is
+ * the number of cycles of writing back instructions that can be
+ * buffered. */
+ unsigned wbMax;
+
/** Number of active threads. */
unsigned numThreads;
@@ -459,14 +506,6 @@ class DefaultIEW
Stats::Scalar<> iewIQFullEvents;
/** Stat for number of times the LSQ becomes full. */
Stats::Scalar<> iewLSQFullEvents;
- /** Stat for total number of executed instructions. */
- Stats::Scalar<> iewExecutedInsts;
- /** Stat for total number of executed load instructions. */
- Stats::Vector<> iewExecLoadInsts;
- /** Stat for total number of executed store instructions. */
-// Stats::Scalar<> iewExecStoreInsts;
- /** Stat for total number of squashed instructions skipped at execute. */
- Stats::Scalar<> iewExecSquashedInsts;
/** Stat for total number of memory ordering violation events. */
Stats::Scalar<> memOrderViolationEvents;
/** Stat for total number of incorrect predicted taken branches. */
@@ -476,28 +515,27 @@ class DefaultIEW
/** Stat for total number of mispredicted branches detected at execute. */
Stats::Formula branchMispredicts;
+ /** Stat for total number of executed instructions. */
+ Stats::Scalar<> iewExecutedInsts;
+ /** Stat for total number of executed load instructions. */
+ Stats::Vector<> iewExecLoadInsts;
+ /** Stat for total number of executed store instructions. */
+// Stats::Scalar<> iewExecStoreInsts;
+ /** Stat for total number of squashed instructions skipped at execute. */
+ Stats::Scalar<> iewExecSquashedInsts;
/** Number of executed software prefetches. */
- Stats::Vector<> exeSwp;
+ Stats::Vector<> iewExecutedSwp;
/** Number of executed nops. */
- Stats::Vector<> exeNop;
+ Stats::Vector<> iewExecutedNop;
/** Number of executed meomory references. */
- Stats::Vector<> exeRefs;
+ Stats::Vector<> iewExecutedRefs;
/** Number of executed branches. */
- Stats::Vector<> exeBranches;
-
-// Stats::Vector<> issued_ops;
-/*
- Stats::Vector<> stat_fu_busy;
- Stats::Vector2d<> stat_fuBusy;
- Stats::Vector<> dist_unissued;
- Stats::Vector2d<> stat_issued_inst_type;
-*/
- /** Number of instructions issued per cycle. */
- Stats::Formula issueRate;
+ Stats::Vector<> iewExecutedBranches;
/** Number of executed store instructions. */
Stats::Formula iewExecStoreInsts;
-// Stats::Formula issue_op_rate;
-// Stats::Formula fu_busy_rate;
+ /** Number of instructions executed per cycle. */
+ Stats::Formula iewExecRate;
+
/** Number of instructions sent to commit. */
Stats::Vector<> iewInstsToCommit;
/** Number of instructions that writeback. */
@@ -510,7 +548,6 @@ class DefaultIEW
* to resource contention.
*/
Stats::Vector<> wbPenalized;
-
/** Number of instructions per cycle written back. */
Stats::Formula wbRate;
/** Average number of woken instructions per writeback. */
diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh
index 3ed20cb75..102be4f8d 100644
--- a/cpu/o3/iew_impl.hh
+++ b/cpu/o3/iew_impl.hh
@@ -56,9 +56,11 @@ DefaultIEW<Impl>::LdWritebackEvent::process()
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
if (iewStage->isSwitchedOut()) {
+ iewStage->decrWb(inst->seqNum);
inst = NULL;
return;
} else if (inst->isSquashed()) {
+ iewStage->decrWb(inst->seqNum);
iewStage->wakeCPU();
inst = NULL;
return;
@@ -93,16 +95,17 @@ DefaultIEW<Impl>::LdWritebackEvent::description()
template<class Impl>
DefaultIEW<Impl>::DefaultIEW(Params *params)
: // @todo: Make this into a parameter.
- issueToExecQueue(5, 5),
+ issueToExecQueue(params->backComSize, params->forwardComSize),
instQueue(params),
ldstQueue(params),
fuPool(params->fuPool),
commitToIEWDelay(params->commitToIEWDelay),
renameToIEWDelay(params->renameToIEWDelay),
issueToExecuteDelay(params->issueToExecuteDelay),
- issueReadWidth(params->issueWidth),
+ dispatchWidth(params->dispatchWidth),
issueWidth(params->issueWidth),
- executeWidth(params->executeWidth),
+ wbOutstanding(0),
+ wbWidth(params->wbWidth),
numThreads(params->numberOfThreads),
switchedOut(false)
{
@@ -125,8 +128,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params)
fetchRedirect[i] = false;
}
+ wbMax = wbWidth * params->wbDepth;
+
updateLSQNextCycle = false;
+ ableToIssue = true;
+
skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
}
@@ -144,6 +151,7 @@ DefaultIEW<Impl>::regStats()
using namespace Stats;
instQueue.regStats();
+ ldstQueue.regStats();
iewIdleCycles
.name(name() + ".iewIdleCycles")
@@ -189,20 +197,6 @@ DefaultIEW<Impl>::regStats()
.name(name() + ".iewLSQFullEvents")
.desc("Number of times the LSQ has become full, causing a stall");
- iewExecutedInsts
- .name(name() + ".iewExecutedInsts")
- .desc("Number of executed instructions");
-
- iewExecLoadInsts
- .init(cpu->number_of_threads)
- .name(name() + ".iewExecLoadInsts")
- .desc("Number of load instructions executed")
- .flags(total);
-
- iewExecSquashedInsts
- .name(name() + ".iewExecSquashedInsts")
- .desc("Number of squashed instructions skipped in execute");
-
memOrderViolationEvents
.name(name() + ".memOrderViolationEvents")
.desc("Number of memory order violations");
@@ -221,47 +215,49 @@ DefaultIEW<Impl>::regStats()
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
- exeSwp
+ iewExecutedInsts
+ .name(name() + ".iewExecutedInsts")
+ .desc("Number of executed instructions");
+
+ iewExecLoadInsts
+ .init(cpu->number_of_threads)
+ .name(name() + ".iewExecLoadInsts")
+ .desc("Number of load instructions executed")
+ .flags(total);
+
+ iewExecSquashedInsts
+ .name(name() + ".iewExecSquashedInsts")
+ .desc("Number of squashed instructions skipped in execute");
+
+ iewExecutedSwp
.init(cpu->number_of_threads)
.name(name() + ".EXEC:swp")
.desc("number of swp insts executed")
- .flags(total)
- ;
+ .flags(total);
- exeNop
+ iewExecutedNop
.init(cpu->number_of_threads)
.name(name() + ".EXEC:nop")
.desc("number of nop insts executed")
- .flags(total)
- ;
+ .flags(total);
- exeRefs
+ iewExecutedRefs
.init(cpu->number_of_threads)
.name(name() + ".EXEC:refs")
.desc("number of memory reference insts executed")
- .flags(total)
- ;
+ .flags(total);
- exeBranches
+ iewExecutedBranches
.init(cpu->number_of_threads)
.name(name() + ".EXEC:branches")
.desc("Number of branches executed")
- .flags(total)
- ;
-
- issueRate
- .name(name() + ".EXEC:rate")
- .desc("Inst execution rate")
- .flags(total)
- ;
- issueRate = iewExecutedInsts / cpu->numCycles;
+ .flags(total);
iewExecStoreInsts
.name(name() + ".EXEC:stores")
.desc("Number of stores executed")
- .flags(total)
- ;
- iewExecStoreInsts = exeRefs - iewExecLoadInsts;
+ .flags(total);
+ iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts;
/*
for (int i=0; i<Num_OpClasses; ++i) {
stringstream subname;
@@ -277,58 +273,50 @@ DefaultIEW<Impl>::regStats()
.init(cpu->number_of_threads)
.name(name() + ".WB:sent")
.desc("cumulative count of insts sent to commit")
- .flags(total)
- ;
+ .flags(total);
writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
- .flags(total)
- ;
+ .flags(total);
producerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
- .flags(total)
- ;
+ .flags(total);
consumerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
- .flags(total)
- ;
+ .flags(total);
wbPenalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
- .flags(total)
- ;
+ .flags(total);
wbPenalizedRate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
- .flags(total)
- ;
+ .flags(total);
wbPenalizedRate = wbPenalized / writebackCount;
wbFanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
- .flags(total)
- ;
+ .flags(total);
wbFanout = producerInst / consumerInst;
wbRate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
- .flags(total)
- ;
+ .flags(total);
wbRate = writebackCount / cpu->numCycles;
}
@@ -481,8 +469,7 @@ DefaultIEW<Impl>::takeOverFrom()
updateLSQNextCycle = false;
- // @todo: Fix hardcoded number
- for (int i = 0; i < 6; ++i) {
+ for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
issueToExecQueue.advance();
}
}
@@ -515,16 +502,7 @@ DefaultIEW<Impl>::squash(unsigned tid)
skidBuffer[tid].pop();
}
- while (!insts[tid].empty()) {
- if (insts[tid].front()->isLoad() ||
- insts[tid].front()->isStore() ) {
- toRename->iewInfo[tid].dispatchedToLSQ++;
- }
-
- toRename->iewInfo[tid].dispatched++;
-
- insts[tid].pop();
- }
+ emptyRenameInsts(tid);
}
template<class Impl>
@@ -650,14 +628,16 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
// free slot.
while ((*iewQueue)[wbCycle].insts[wbNumInst]) {
++wbNumInst;
- if (wbNumInst == issueWidth) {
+ if (wbNumInst == wbWidth) {
++wbCycle;
wbNumInst = 0;
}
- assert(wbCycle < 5);
+ assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
}
+ DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
+ wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst);
// Add finished instruction to queue to commit.
(*iewQueue)[wbCycle].insts[wbNumInst] = inst;
(*iewQueue)[wbCycle].size++;
@@ -670,7 +650,7 @@ DefaultIEW<Impl>::validInstsFromRename()
unsigned inst_count = 0;
for (int i=0; i<fromRename->size; i++) {
- if (!fromRename->insts[i]->squashed)
+ if (!fromRename->insts[i]->isSquashed())
inst_count++;
}
@@ -858,10 +838,12 @@ DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid)
}
if (fromCommit->commitInfo[tid].robSquashing) {
- DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n");
+ DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid);
dispatchStatus[tid] = Squashing;
+ emptyRenameInsts(tid);
+ wroteToTimeBuffer = true;
return;
}
@@ -912,6 +894,22 @@ DefaultIEW<Impl>::sortInsts()
template <class Impl>
void
+DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
+{
+ while (!insts[tid].empty()) {
+ if (insts[tid].front()->isLoad() ||
+ insts[tid].front()->isStore() ) {
+ toRename->iewInfo[tid].dispatchedToLSQ++;
+ }
+
+ toRename->iewInfo[tid].dispatched++;
+
+ insts[tid].pop();
+ }
+}
+
+template <class Impl>
+void
DefaultIEW<Impl>::wakeCPU()
{
cpu->wakeCPU();
@@ -1010,7 +1008,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
// Loop through the instructions, putting them in the instruction
// queue.
for ( ; dis_num_inst < insts_to_add &&
- dis_num_inst < issueReadWidth;
+ dis_num_inst < dispatchWidth;
++dis_num_inst)
{
inst = insts_to_dispatch.front();
@@ -1149,7 +1147,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
instQueue.recordProducer(inst);
- exeNop[tid]++;
+ iewExecutedNop[tid]++;
add_to_iq = false;
} else if (inst->isExecuted()) {
@@ -1263,6 +1261,7 @@ DefaultIEW<Impl>::executeInsts()
++iewExecSquashedInsts;
+ decrWb(inst->seqNum);
continue;
}
@@ -1399,8 +1398,8 @@ DefaultIEW<Impl>::writebackInsts()
DynInstPtr inst = toCommit->insts[inst_num];
int tid = inst->threadNumber;
- DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n",
- inst->readPC());
+ DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
iewInstsToCommit[tid]++;
@@ -1425,6 +1424,8 @@ DefaultIEW<Impl>::writebackInsts()
}
writebackCount[tid]++;
}
+
+ decrWb(inst->seqNum);
}
}
@@ -1561,7 +1562,7 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
- exeSwp[thread_number]++;
+ iewExecutedSwp[thread_number]++;
else
iewExecutedInsts++;
#else
@@ -1572,13 +1573,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
// Control operations
//
if (inst->isControl())
- exeBranches[thread_number]++;
+ iewExecutedBranches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
- exeRefs[thread_number]++;
+ iewExecutedRefs[thread_number]++;
if (inst->isLoad()) {
iewExecLoadInsts[thread_number]++;
diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh
index 4802cbaf4..80cd71f0d 100644
--- a/cpu/o3/inst_queue.hh
+++ b/cpu/o3/inst_queue.hh
@@ -490,8 +490,6 @@ class InstructionQueue
/** Number of instructions issued per cycle. */
Stats::Formula issueRate;
-// Stats::Formula issue_stores;
-// Stats::Formula issue_op_rate;
/** Number of times the FU was busy. */
Stats::Vector<> fuBusy;
/** Number of times the FU was busy per instruction issued. */
diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh
index d677a259c..72cb0d708 100644
--- a/cpu/o3/inst_queue_impl.hh
+++ b/cpu/o3/inst_queue_impl.hh
@@ -288,22 +288,7 @@ InstructionQueue<Impl>::regStats()
.flags(total)
;
issueRate = iqInstsIssued / cpu->numCycles;
-/*
- issue_stores
- .name(name() + ".ISSUE:stores")
- .desc("Number of stores issued")
- .flags(total)
- ;
- issue_stores = exe_refs - exe_loads;
-*/
-/*
- issue_op_rate
- .name(name() + ".ISSUE:op_rate")
- .desc("Operation issue rate")
- .flags(total)
- ;
- issue_op_rate = issued_ops / numCycles;
-*/
+
statFuBusy
.init(Num_OpClasses)
.name(name() + ".ISSUE:fu_full")
@@ -700,6 +685,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
int total_issued = 0;
while (total_issued < totalWidth &&
+ iewStage->canIssue() &&
order_it != order_end_it) {
OpClass op_class = (*order_it).queueType;
@@ -790,13 +776,14 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// complete.
++freeEntries;
count[tid]--;
- issuing_inst->removeInIQ();
+ issuing_inst->clearInIQ();
} else {
memDepUnit[tid].issue(issuing_inst);
}
listOrder.erase(order_it++);
statIssuedInstType[tid][op_class]++;
+ iewStage->incrWb(issuing_inst->seqNum);
} else {
statFuBusy[op_class]++;
fuBusy[tid]++;
@@ -1096,7 +1083,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
// inst will flow through the rest of the pipeline.
squashed_inst->setIssued();
squashed_inst->setCanCommit();
- squashed_inst->removeInIQ();
+ squashed_inst->clearInIQ();
//Update Thread IQ Count
count[squashed_inst->threadNumber]--;
diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh
index b321d4590..c67225bc0 100644
--- a/cpu/o3/lsq.hh
+++ b/cpu/o3/lsq.hh
@@ -62,6 +62,9 @@ class LSQ {
/** Returns the name of the LSQ. */
std::string name() const;
+ /** Registers the statistics for each LSQ Unit. */
+ void regStats();
+
/** Sets the pointer to the list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
/** Sets the CPU pointer. */
diff --git a/cpu/o3/lsq_impl.hh b/cpu/o3/lsq_impl.hh
index a6ad27522..a8a55af1a 100644
--- a/cpu/o3/lsq_impl.hh
+++ b/cpu/o3/lsq_impl.hh
@@ -106,6 +106,16 @@ LSQ<Impl>::name() const
template<class Impl>
void
+LSQ<Impl>::regStats()
+{
+ //Initialize LSQs
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].regStats();
+ }
+}
+
+template<class Impl>
+void
LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr)
{
activeThreads = at_ptr;
diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh
index a6afff743..fe174a97d 100644
--- a/cpu/o3/lsq_unit.hh
+++ b/cpu/o3/lsq_unit.hh
@@ -101,6 +101,9 @@ class LSQUnit {
/** Returns the name of the LSQ unit. */
std::string name() const;
+ /** Registers statistics. */
+ void regStats();
+
/** Sets the CPU pointer. */
void setCPU(FullCPU *cpu_ptr)
{ cpu = cpu_ptr; }
@@ -153,9 +156,6 @@ class LSQUnit {
/** Writes back stores. */
void writebackStores();
- // @todo: Include stats in the LSQ unit.
- //void regStats();
-
/** Clears all the entries in the LQ. */
void clearLQ();
@@ -369,25 +369,34 @@ class LSQUnit {
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
-/*
- // total number of loads forwaded from LSQ stores
- Stats::Vector<> lsq_forw_loads;
+ /** Total number of loads forwaded from LSQ stores. */
+ Stats::Scalar<> lsqForwLoads;
+
+ /** Total number of loads ignored due to invalid addresses. */
+ Stats::Scalar<> invAddrLoads;
+
+ /** Total number of squashed loads. */
+ Stats::Scalar<> lsqSquashedLoads;
- // total number of loads ignored due to invalid addresses
- Stats::Vector<> inv_addr_loads;
+ /** Total number of responses from the memory system that are
+ * ignored due to the instruction already being squashed. */
+ Stats::Scalar<> lsqIgnoredResponses;
- // total number of software prefetches ignored due to invalid addresses
- Stats::Vector<> inv_addr_swpfs;
+ /** Total number of squashed stores. */
+ Stats::Scalar<> lsqSquashedStores;
- // total non-speculative bogus addresses seen (debug var)
- Counter sim_invalid_addrs;
- Stats::Vector<> fu_busy; //cumulative fu busy
+ /** Total number of software prefetches ignored due to invalid addresses. */
+ Stats::Scalar<> invAddrSwpfs;
- // ready loads blocked due to memory disambiguation
- Stats::Vector<> lsq_blocked_loads;
+ /** Ready loads blocked due to partial store-forwarding. */
+ Stats::Scalar<> lsqBlockedLoads;
+
+ /** Number of loads that were rescheduled. */
+ Stats::Scalar<> lsqRescheduledLoads;
+
+ /** Number of times the LSQ is blocked due to the cache. */
+ Stats::Scalar<> lsqCacheBlocked;
- Stats::Scalar<> lsqInversion;
-*/
public:
/** Executes the load at the given index. */
template <class T>
@@ -441,8 +450,9 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// at the head of the LSQ and are ready to commit (at the head of the ROB
// too).
if (req->flags & UNCACHEABLE &&
- (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) {
+ (load_idx != loadHead || !loadQueue[load_idx]->isAtCommit())) {
iewStage->rescheduleMemInst(loadQueue[load_idx]);
+ ++lsqRescheduledLoads;
return TheISA::genMachineCheckFault();
}
@@ -552,6 +562,8 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(loadQueue[load_idx]);
+ iewStage->decrWb(loadQueue[load_idx]->seqNum);
+ ++lsqRescheduledLoads;
// Do not generate a writeback event as this instruction is not
// complete.
@@ -559,6 +571,7 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
"Store idx %i to load addr %#x\n",
store_idx, req->vaddr);
+ ++lsqBlockedLoads;
return NoFault;
}
}
@@ -579,6 +592,10 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// if we have a cache, do cache access too
if (fault == NoFault && dcacheInterface) {
if (dcacheInterface->isBlocked()) {
+ ++lsqCacheBlocked;
+
+ iewStage->decrWb(inst->seqNum);
+
// There's an older load that's already going to squash.
if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
return NoFault;
diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh
index 4ee8bb234..5cc3078f8 100644
--- a/cpu/o3/lsq_unit_impl.hh
+++ b/cpu/o3/lsq_unit_impl.hh
@@ -126,6 +126,47 @@ LSQUnit<Impl>::name() const
template<class Impl>
void
+LSQUnit<Impl>::regStats()
+{
+ lsqForwLoads
+ .name(name() + ".forwLoads")
+ .desc("Number of loads that had data forwarded from stores");
+
+ invAddrLoads
+ .name(name() + ".invAddrLoads")
+ .desc("Number of loads ignored due to an invalid address");
+
+ lsqSquashedLoads
+ .name(name() + ".squashedLoads")
+ .desc("Number of loads squashed");
+
+ lsqIgnoredResponses
+ .name(name() + ".ignoredResponses")
+ .desc("Number of memory responses ignored because the instruction is squashed");
+
+ lsqSquashedStores
+ .name(name() + ".squashedStores")
+ .desc("Number of stores squashed");
+
+ invAddrSwpfs
+ .name(name() + ".invAddrSwpfs")
+ .desc("Number of software prefetches ignored due to an invalid address");
+
+ lsqBlockedLoads
+ .name(name() + ".blockedLoads")
+ .desc("Number of blocked loads due to partial load-store forwarding");
+
+ lsqRescheduledLoads
+ .name(name() + ".rescheduledLoads")
+ .desc("Number of loads that were rescheduled");
+
+ lsqCacheBlocked
+ .name(name() + ".cacheBlocked")
+ .desc("Number of times an access to memory failed due to the cache being blocked");
+}
+
+template<class Impl>
+void
LSQUnit<Impl>::clearLQ()
{
loadQueue.clear();
@@ -548,6 +589,7 @@ LSQUnit<Impl>::writebackStores()
if (dcacheInterface && dcacheInterface->isBlocked()) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
" is blocked!\n");
+ ++lsqCacheBlocked;
break;
}
@@ -705,7 +747,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
// Clear the smart pointer to make sure it is decremented.
- loadQueue[load_idx]->squashed = true;
+ loadQueue[load_idx]->setSquashed();
loadQueue[load_idx] = NULL;
--loads;
@@ -748,7 +790,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
// Clear the smart pointer to make sure it is decremented.
- storeQueue[store_idx].inst->squashed = true;
+ storeQueue[store_idx].inst->setSquashed();
storeQueue[store_idx].inst = NULL;
storeQueue[store_idx].canWB = 0;
@@ -765,6 +807,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
storeTail = store_idx;
decrStIdx(store_idx);
+ ++lsqSquashedStores;
}
}
diff --git a/cpu/o3/mem_dep_unit.cc b/cpu/o3/mem_dep_unit.cc
index ccdd1a515..b0f91d44f 100644
--- a/cpu/o3/mem_dep_unit.cc
+++ b/cpu/o3/mem_dep_unit.cc
@@ -35,6 +35,7 @@
// AlphaSimpleImpl.
template class MemDepUnit<StoreSet, AlphaSimpleImpl>;
+#ifdef DEBUG
template <>
int
MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0;
@@ -44,3 +45,4 @@ MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0;
template <>
int
MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0;
+#endif
diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh
index 595e9293f..bfe694bd8 100644
--- a/cpu/o3/mem_dep_unit_impl.hh
+++ b/cpu/o3/mem_dep_unit_impl.hh
@@ -59,7 +59,9 @@ MemDepUnit<MemDepPred, Impl>::~MemDepUnit()
}
}
+#ifdef DEBUG
assert(MemDepEntry::memdep_count == 0);
+#endif
}
template <class MemDepPred, class Impl>
@@ -141,7 +143,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
// Add the MemDepEntry to the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
+#ifdef DEBUG
MemDepEntry::memdep_insert++;
+#endif
instList[tid].push_back(inst);
@@ -227,7 +231,9 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst)
// Insert the MemDepEntry into the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
+#ifdef DEBUG
MemDepEntry::memdep_insert++;
+#endif
// Add the instruction to the list.
instList[tid].push_back(inst);
@@ -275,7 +281,9 @@ MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
// Add the MemDepEntry to the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
+#ifdef DEBUG
MemDepEntry::memdep_insert++;
+#endif
// Add the instruction to the instruction list.
instList[tid].push_back(barr_inst);
@@ -375,7 +383,9 @@ MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
+#ifdef DEBUG
MemDepEntry::memdep_erase++;
+#endif
}
template <class MemDepPred, class Impl>
@@ -470,7 +480,9 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
+#ifdef DEBUG
MemDepEntry::memdep_erase++;
+#endif
instList[tid].erase(squash_it--);
}
@@ -551,5 +563,7 @@ MemDepUnit<MemDepPred, Impl>::dumpLists()
cprintf("Memory dependence hash size: %i\n", memDepHash.size());
+#ifdef DEBUG
cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
+#endif
}
diff --git a/cpu/o3/regfile.hh b/cpu/o3/regfile.hh
index ed1238d36..76c43d3a1 100644
--- a/cpu/o3/regfile.hh
+++ b/cpu/o3/regfile.hh
@@ -223,10 +223,10 @@ class PhysRegFile
public:
/** (signed) integer register file. */
- std::vector<IntReg> intRegFile;
+ IntReg *intRegFile;
/** Floating point register file. */
- std::vector<FloatReg> floatRegFile;
+ FloatReg *floatRegFile;
/** Miscellaneous register file. */
MiscRegFile miscRegs[Impl::MaxThreads];
@@ -256,11 +256,15 @@ PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs,
: numPhysicalIntRegs(_numPhysicalIntRegs),
numPhysicalFloatRegs(_numPhysicalFloatRegs)
{
- intRegFile.resize(numPhysicalIntRegs);
- floatRegFile.resize(numPhysicalFloatRegs);
+ intRegFile = new IntReg[numPhysicalIntRegs];
+ floatRegFile = new FloatReg[numPhysicalFloatRegs];
- //memset(intRegFile, 0, sizeof(*intRegFile));
- //memset(floatRegFile, 0, sizeof(*floatRegFile));
+ for (int i = 0; i < Impl::MaxThreads; ++i) {
+ miscRegs[i].clear();
+ }
+
+ memset(intRegFile, 0, sizeof(*intRegFile));
+ memset(floatRegFile, 0, sizeof(*floatRegFile));
}
#endif
diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh
index 829c99584..93f5b3504 100644
--- a/cpu/o3/rename_impl.hh
+++ b/cpu/o3/rename_impl.hh
@@ -348,7 +348,7 @@ DefaultRename<Impl>::squash(unsigned tid)
for (int i=0; i<fromDecode->size; i++) {
if (fromDecode->insts[i]->threadNumber == tid) {
- fromDecode->insts[i]->squashed = true;
+ fromDecode->insts[i]->setSquashed();
wroteToTimeBuffer = true;
squashCount++;
}
@@ -1029,7 +1029,7 @@ DefaultRename<Impl>::validInsts()
unsigned inst_count = 0;
for (int i=0; i<fromDecode->size; i++) {
- if (!fromDecode->insts[i]->squashed)
+ if (!fromDecode->insts[i]->isSquashed())
inst_count++;
}
diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh
index bdbdde32f..2043e0b34 100644
--- a/cpu/o3/rob.hh
+++ b/cpu/o3/rob.hh
@@ -305,7 +305,7 @@ class ROB
private:
/** The sequence number of the squashed instruction. */
- InstSeqNum squashedSeqNum;
+ InstSeqNum squashedSeqNum[Impl::MaxThreads];
/** Is the ROB done squashing. */
bool doneSquashing[Impl::MaxThreads];
diff --git a/cpu/o3/rob_impl.hh b/cpu/o3/rob_impl.hh
index 25e0c80fd..62c4d9cf7 100644
--- a/cpu/o3/rob_impl.hh
+++ b/cpu/o3/rob_impl.hh
@@ -38,10 +38,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
: numEntries(_numEntries),
squashWidth(_squashWidth),
numInstsInROB(0),
- squashedSeqNum(0),
numThreads(_numThreads)
{
for (int tid=0; tid < numThreads; tid++) {
+ squashedSeqNum[tid] = 0;
doneSquashing[tid] = true;
threadEntries[tid] = 0;
}
@@ -274,7 +274,7 @@ ROB<Impl>::retireHead(unsigned tid)
--numInstsInROB;
--threadEntries[tid];
- head_inst->removeInROB();
+ head_inst->clearInROB();
head_inst->setCommitted();
instList[tid].erase(head_it);
@@ -349,11 +349,11 @@ void
ROB<Impl>::doSquash(unsigned tid)
{
DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n",
- tid, squashedSeqNum);
+ tid, squashedSeqNum[tid]);
assert(squashIt[tid] != instList[tid].end());
- if ((*squashIt[tid])->seqNum < squashedSeqNum) {
+ if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
@@ -368,7 +368,7 @@ ROB<Impl>::doSquash(unsigned tid)
for (int numSquashed = 0;
numSquashed < squashWidth &&
squashIt[tid] != instList[tid].end() &&
- (*squashIt[tid])->seqNum > squashedSeqNum;
+ (*squashIt[tid])->seqNum > squashedSeqNum[tid];
++numSquashed)
{
DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n",
@@ -405,7 +405,7 @@ ROB<Impl>::doSquash(unsigned tid)
// Check if ROB is done squashing.
- if ((*squashIt[tid])->seqNum <= squashedSeqNum) {
+ if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
@@ -517,7 +517,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid)
doneSquashing[tid] = false;
- squashedSeqNum = squash_num;
+ squashedSeqNum[tid] = squash_num;
if (!instList[tid].empty()) {
InstIt tail_thread = instList[tid].end();
diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh
index 5af2b02b2..c272528b1 100644
--- a/cpu/ozone/cpu.hh
+++ b/cpu/ozone/cpu.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005 The Regents of The University of Michigan
+ * Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -79,13 +79,13 @@ template <class>
class Checker;
/**
- * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with
- * simple out-of-order capabilities added to it. It is still a 1 CPI machine
- * (?), but is capable of handling cache misses. Basically it models having
- * a ROB/IQ by only allowing a certain amount of instructions to execute while
- * the cache miss is outstanding.
+ * Light weight out of order CPU model that approximates an out of
+ * order CPU. It is separated into a front end and a back end, with
+ * the template parameter Impl describing the classes used for each.
+ * The goal is to be able to specify through the Impl the class to use
+ * for the front end and back end, with different classes used to
+ * model different levels of detail.
*/
-
template <class Impl>
class OzoneCPU : public BaseCPU
{
@@ -98,6 +98,11 @@ class OzoneCPU : public BaseCPU
typedef TheISA::MiscReg MiscReg;
public:
+ /**
+ * The ExecContext for this CPU, which is used to provide the
+ * CPU's interface to any external objects. Internally most of
+ * the CPU state is stored within the OzoneThreadState class.
+ */
class OzoneXC : public ExecContext {
public:
OzoneCPU<Impl> *cpu;
@@ -235,14 +240,19 @@ class OzoneCPU : public BaseCPU
#endif
};
- // execution context proxy
+ // ExecContext for OzoneCPU
OzoneXC ozoneXC;
+
+ // ExecContext pointer that will be given to any external objects.
ExecContext *xcProxy;
+
+ // ExecContext pointer to the CheckerCPU's ExecContext.
ExecContext *checkerXC;
typedef OzoneThreadState<Impl> ImplState;
private:
+ // Committed thread state for the OzoneCPU.
OzoneThreadState<Impl> thread;
public:
@@ -280,12 +290,6 @@ class OzoneCPU : public BaseCPU
tickEvent.squash();
}
- private:
- Trace::InstRecord *traceData;
-
- template<typename T>
- void trace_data(T data);
-
public:
enum Status {
Running,
@@ -361,6 +365,7 @@ class OzoneCPU : public BaseCPU
FrontEnd *frontEnd;
BackEnd *backEnd;
+
private:
Status status() const { return _status; }
void setStatus(Status new_status) { _status = new_status; }
@@ -392,12 +397,11 @@ class OzoneCPU : public BaseCPU
// number of idle cycles
Stats::Average<> notIdleFraction;
Stats::Formula idleFraction;
- public:
+ public:
virtual void serialize(std::ostream &os);
virtual void unserialize(Checkpoint *cp, const std::string &section);
-
#if FULL_SYSTEM
bool validInstAddr(Addr addr) { return true; }
bool validDataAddr(Addr addr) { return true; }
@@ -585,12 +589,9 @@ class OzoneCPU : public BaseCPU
Fault copy(Addr dest);
- InstSeqNum globalSeqNum;
-
public:
void squashFromXC();
- // @todo: This can be a useful debug function. Implement it.
void dumpInsts() { frontEnd->dumpInsts(); }
#if FULL_SYSTEM
@@ -608,7 +609,6 @@ class OzoneCPU : public BaseCPU
ExecContext *xcBase() { return xcProxy; }
- bool decoupledFrontEnd;
struct CommStruct {
InstSeqNum doneSeqNum;
InstSeqNum nonSpecSeqNum;
@@ -617,8 +617,13 @@ class OzoneCPU : public BaseCPU
bool stall;
};
+
+ InstSeqNum globalSeqNum;
+
TimeBuffer<CommStruct> comm;
+ bool decoupledFrontEnd;
+
bool lockFlag;
Stats::Scalar<> quiesceCycles;
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index 5675da3a8..4f41f220a 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -26,9 +26,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-//#include <cstdio>
-//#include <cstdlib>
-
#include "arch/isa_traits.hh" // For MachInst
#include "base/trace.hh"
#include "config/full_system.hh"
@@ -39,7 +36,6 @@
#include "cpu/ozone/cpu.hh"
#include "cpu/quiesce_event.hh"
#include "cpu/static_inst.hh"
-//#include "mem/base_mem.hh"
#include "mem/mem_interface.hh"
#include "sim/sim_object.hh"
#include "sim/stats.hh"
@@ -50,7 +46,6 @@
#include "arch/alpha/tlb.hh"
#include "arch/vtophys.hh"
#include "base/callback.hh"
-//#include "base/remote_gdb.hh"
#include "cpu/profile.hh"
#include "kern/kernel_stats.hh"
#include "mem/functional/memory_control.hh"
@@ -67,15 +62,6 @@
using namespace TheISA;
template <class Impl>
-template<typename T>
-void
-OzoneCPU<Impl>::trace_data(T data) {
- if (traceData) {
- traceData->setData(data);
- }
-}
-
-template <class Impl>
OzoneCPU<Impl>::TickEvent::TickEvent(OzoneCPU *c, int w)
: Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w)
{
@@ -104,7 +90,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
: BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width),
mem(p->workload[0]->getMemory()),
#endif
- comm(5, 5)
+ comm(5, 5), decoupledFrontEnd(p->decoupledFrontEnd)
{
frontEnd = new FrontEnd(p);
backEnd = new BackEnd(p);
@@ -112,6 +98,9 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
_status = Idle;
if (p->checker) {
+ // If checker is being used, get the checker from the params
+ // pointer, make the Checker's ExecContext, and setup the
+ // xcProxy to point to it.
BaseCPU *temp_checker = p->checker;
checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
checker->setMemory(mem);
@@ -122,11 +111,17 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
thread.xcProxy = checkerXC;
xcProxy = checkerXC;
} else {
+ // If checker is not being used, then the xcProxy points
+ // directly to the CPU's ExecContext.
checker = NULL;
thread.xcProxy = &ozoneXC;
xcProxy = &ozoneXC;
}
+ // Add xcProxy to CPU list of ExecContexts.
+ execContexts.push_back(xcProxy);
+
+ // Give the OzoneXC pointers to the CPU and the thread state.
ozoneXC.cpu = this;
ozoneXC.thread = &thread;
@@ -134,7 +129,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
thread.setStatus(ExecContext::Suspended);
#if FULL_SYSTEM
- /***** All thread state stuff *****/
+ // Setup thread state stuff.
thread.cpu = this;
thread.tid = 0;
thread.mem = p->mem;
@@ -171,8 +166,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
numInst = 0;
startNumInst = 0;
- execContexts.push_back(xcProxy);
-
+ // Give pointers to the front and back end to all things they may need.
frontEnd->setCPU(this);
backEnd->setCPU(this);
@@ -188,12 +182,13 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
frontEnd->setBackEnd(backEnd);
backEnd->setFrontEnd(frontEnd);
- decoupledFrontEnd = p->decoupledFrontEnd;
-
globalSeqNum = 1;
checkInterrupts = false;
+ lockFlag = 0;
+
+ // Setup rename table, initializing all values to ready.
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
thread.renameTable[i] = new DynInst(this);
thread.renameTable[i]->setResultReady();
@@ -206,8 +201,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
// pTable = p->pTable;
#endif
- lockFlag = 0;
-
DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n");
}
@@ -231,14 +224,20 @@ template <class Impl>
void
OzoneCPU<Impl>::signalSwitched()
{
+ // Only complete the switchout when both the front end and back
+ // end have signalled they are ready to switch.
if (++switchCount == 2) {
backEnd->doSwitchOut();
frontEnd->doSwitchOut();
+
if (checker)
checker->switchOut(sampler);
+
_status = SwitchedOut;
+
if (tickEvent.scheduled())
tickEvent.squash();
+
sampler->signalSwitched();
}
assert(switchCount <= 2);
@@ -793,6 +792,7 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
thread->quiesceEvent->xc = this;
}
+ // Copy kernel stats pointer from old context.
thread->kernelStats = old_context->getKernelStats();
// storeCondFailures = 0;
cpu->lockFlag = false;
@@ -814,7 +814,11 @@ OzoneCPU<Impl>::OzoneXC::regStats(const std::string &name)
template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::serialize(std::ostream &os)
-{ }
+{
+ // Once serialization is added, serialize the quiesce event and
+ // kernel stats. Will need to make sure there aren't multiple
+ // things that serialize them.
+}
template <class Impl>
void
@@ -867,7 +871,6 @@ OzoneCPU<Impl>::OzoneXC::getThreadNum()
return thread->tid;
}
-// Also somewhat obnoxious. Really only used for the TLB fault.
template <class Impl>
TheISA::MachInst
OzoneCPU<Impl>::OzoneXC::getInst()
@@ -901,7 +904,7 @@ OzoneCPU<Impl>::OzoneXC::copyArchRegs(ExecContext *xc)
// Need to copy the XC values into the current rename table,
// copy the misc regs.
- thread->regs.miscRegs.copyMiscRegs(xc);
+ TheISA::copyMiscRegs(xc, this);
}
template <class Impl>
diff --git a/cpu/ozone/inorder_back_end_impl.hh b/cpu/ozone/inorder_back_end_impl.hh
index 5a378ec76..cc92ec92f 100644
--- a/cpu/ozone/inorder_back_end_impl.hh
+++ b/cpu/ozone/inorder_back_end_impl.hh
@@ -257,7 +257,7 @@ InorderBackEnd<Impl>::executeInsts()
}
inst->setExecuted();
- inst->setCompleted();
+ inst->setResultReady();
inst->setCanCommit();
instList.pop_front();
diff --git a/cpu/ozone/inst_queue_impl.hh b/cpu/ozone/inst_queue_impl.hh
index 0523c68d6..1b9fcdc84 100644
--- a/cpu/ozone/inst_queue_impl.hh
+++ b/cpu/ozone/inst_queue_impl.hh
@@ -848,13 +848,13 @@ template <class Impl>
void
InstQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
{
- OpClass op_class = ready_inst->opClass();
+// OpClass op_class = ready_inst->opClass();
readyInsts.push(ready_inst);
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
- ready_inst->readPC(), op_class, ready_inst->seqNum);
+ ready_inst->readPC(), ready_inst->opClass(), ready_inst->seqNum);
}
/*
template <class Impl>
@@ -1175,11 +1175,11 @@ InstQueue<Impl>::addIfReady(DynInstPtr &inst)
return;
}
- OpClass op_class = inst->opClass();
+// OpClass op_class = inst->opClass();
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
- inst->readPC(), op_class, inst->seqNum);
+ inst->readPC(), inst->opClass(), inst->seqNum);
readyInsts.push(inst);
}
diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh
index 1c03ffb73..19f2b2b61 100644
--- a/cpu/ozone/lw_back_end.hh
+++ b/cpu/ozone/lw_back_end.hh
@@ -369,37 +369,37 @@ class LWBackEnd
/* Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
*/
- Stats::Vector<> rob_cap_events;
- Stats::Vector<> rob_cap_inst_count;
- Stats::Vector<> iq_cap_events;
- Stats::Vector<> iq_cap_inst_count;
+ Stats::Vector<> robCapEvents;
+ Stats::Vector<> robCapInstCount;
+ Stats::Vector<> iqCapEvents;
+ Stats::Vector<> iqCapInstCount;
// total number of instructions executed
- Stats::Vector<> exe_inst;
- Stats::Vector<> exe_swp;
- Stats::Vector<> exe_nop;
- Stats::Vector<> exe_refs;
- Stats::Vector<> exe_loads;
- Stats::Vector<> exe_branches;
+ Stats::Vector<> exeInst;
+ Stats::Vector<> exeSwp;
+ Stats::Vector<> exeNop;
+ Stats::Vector<> exeRefs;
+ Stats::Vector<> exeLoads;
+ Stats::Vector<> exeBranches;
- Stats::Vector<> issued_ops;
+ Stats::Vector<> issuedOps;
// total number of loads forwaded from LSQ stores
- Stats::Vector<> lsq_forw_loads;
+ Stats::Vector<> lsqForwLoads;
// total number of loads ignored due to invalid addresses
- Stats::Vector<> inv_addr_loads;
+ Stats::Vector<> invAddrLoads;
// total number of software prefetches ignored due to invalid addresses
- Stats::Vector<> inv_addr_swpfs;
+ Stats::Vector<> invAddrSwpfs;
// ready loads blocked due to memory disambiguation
- Stats::Vector<> lsq_blocked_loads;
+ Stats::Vector<> lsqBlockedLoads;
Stats::Scalar<> lsqInversion;
- Stats::Vector<> n_issued_dist;
- Stats::VectorDistribution<> issue_delay_dist;
+ Stats::Vector<> nIssuedDist;
+ Stats::VectorDistribution<> issueDelayDist;
- Stats::VectorDistribution<> queue_res_dist;
+ Stats::VectorDistribution<> queueResDist;
/*
Stats::Vector<> stat_fu_busy;
Stats::Vector2d<> stat_fuBusy;
@@ -417,37 +417,37 @@ class LWBackEnd
Stats::Formula commit_ipb;
Stats::Formula lsq_inv_rate;
*/
- Stats::Vector<> writeback_count;
- Stats::Vector<> producer_inst;
- Stats::Vector<> consumer_inst;
- Stats::Vector<> wb_penalized;
+ Stats::Vector<> writebackCount;
+ Stats::Vector<> producerInst;
+ Stats::Vector<> consumerInst;
+ Stats::Vector<> wbPenalized;
- Stats::Formula wb_rate;
- Stats::Formula wb_fanout;
- Stats::Formula wb_penalized_rate;
+ Stats::Formula wbRate;
+ Stats::Formula wbFanout;
+ Stats::Formula wbPenalizedRate;
// total number of instructions committed
- Stats::Vector<> stat_com_inst;
- Stats::Vector<> stat_com_swp;
- Stats::Vector<> stat_com_refs;
- Stats::Vector<> stat_com_loads;
- Stats::Vector<> stat_com_membars;
- Stats::Vector<> stat_com_branches;
+ Stats::Vector<> statComInst;
+ Stats::Vector<> statComSwp;
+ Stats::Vector<> statComRefs;
+ Stats::Vector<> statComLoads;
+ Stats::Vector<> statComMembars;
+ Stats::Vector<> statComBranches;
- Stats::Distribution<> n_committed_dist;
+ Stats::Distribution<> nCommittedDist;
- Stats::Scalar<> commit_eligible_samples;
- Stats::Vector<> commit_eligible;
+ Stats::Scalar<> commitEligibleSamples;
+ Stats::Vector<> commitEligible;
Stats::Vector<> squashedInsts;
Stats::Vector<> ROBSquashedInsts;
- Stats::Scalar<> ROB_fcount;
- Stats::Formula ROB_full_rate;
+ Stats::Scalar<> ROBFcount;
+ Stats::Formula ROBFullRate;
- Stats::Vector<> ROB_count; // cumulative ROB occupancy
- Stats::Formula ROB_occ_rate;
- Stats::VectorDistribution<> ROB_occ_dist;
+ Stats::Vector<> ROBCount; // cumulative ROB occupancy
+ Stats::Formula ROBOccRate;
+ Stats::VectorDistribution<> ROBOccDist;
public:
void dumpInsts();
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh
index 41b4ea24b..18b2e8f47 100644
--- a/cpu/ozone/lw_back_end_impl.hh
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -251,78 +251,77 @@ void
LWBackEnd<Impl>::regStats()
{
using namespace Stats;
- rob_cap_events
+ robCapEvents
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_events")
.desc("number of cycles where ROB cap was active")
.flags(total)
;
- rob_cap_inst_count
+ robCapInstCount
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_inst")
.desc("number of instructions held up by ROB cap")
.flags(total)
;
- iq_cap_events
+ iqCapEvents
.init(cpu->number_of_threads)
.name(name() +".IQ:cap_events" )
.desc("number of cycles where IQ cap was active")
.flags(total)
;
- iq_cap_inst_count
+ iqCapInstCount
.init(cpu->number_of_threads)
.name(name() + ".IQ:cap_inst")
.desc("number of instructions held up by IQ cap")
.flags(total)
;
-
- exe_inst
+ exeInst
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:count")
.desc("number of insts issued")
.flags(total)
;
- exe_swp
+ exeSwp
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:swp")
.desc("number of swp insts issued")
.flags(total)
;
- exe_nop
+ exeNop
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:nop")
.desc("number of nop insts issued")
.flags(total)
;
- exe_refs
+ exeRefs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:refs")
.desc("number of memory reference insts issued")
.flags(total)
;
- exe_loads
+ exeLoads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:loads")
.desc("number of load insts issued")
.flags(total)
;
- exe_branches
+ exeBranches
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:branches")
.desc("Number of branches issued")
.flags(total)
;
- issued_ops
+ issuedOps
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:op_count")
.desc("number of insts issued")
@@ -339,28 +338,28 @@ LWBackEnd<Impl>::regStats()
//
// Other stats
//
- lsq_forw_loads
+ lsqForwLoads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:forw_loads")
.desc("number of loads forwarded via LSQ")
.flags(total)
;
- inv_addr_loads
+ invAddrLoads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_loads")
.desc("number of invalid-address loads")
.flags(total)
;
- inv_addr_swpfs
+ invAddrSwpfs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_swpfs")
.desc("number of invalid-address SW prefetches")
.flags(total)
;
- lsq_blocked_loads
+ lsqBlockedLoads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:blocked_loads")
.desc("number of ready loads not issued due to memory disambiguation")
@@ -372,51 +371,51 @@ LWBackEnd<Impl>::regStats()
.desc("Number of times LSQ instruction issued early")
;
- n_issued_dist
+ nIssuedDist
.init(issueWidth + 1)
.name(name() + ".ISSUE:issued_per_cycle")
.desc("Number of insts issued each cycle")
.flags(total | pdf | dist)
;
- issue_delay_dist
+ issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
.desc("cycles from operands ready to issue")
.flags(pdf | cdf)
;
- queue_res_dist
+ queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
.desc("cycles from dispatch to issue")
.flags(total | pdf | cdf )
;
for (int i = 0; i < Num_OpClasses; ++i) {
- queue_res_dist.subname(i, opClassStrings[i]);
+ queueResDist.subname(i, opClassStrings[i]);
}
- writeback_count
+ writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
.flags(total)
;
- producer_inst
+ producerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
.flags(total)
;
- consumer_inst
+ consumerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
.flags(total)
;
- wb_penalized
+ wbPenalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
@@ -424,71 +423,71 @@ LWBackEnd<Impl>::regStats()
;
- wb_penalized_rate
+ wbPenalizedRate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
.flags(total)
;
- wb_penalized_rate = wb_penalized / writeback_count;
+ wbPenalizedRate = wbPenalized / writebackCount;
- wb_fanout
+ wbFanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
.flags(total)
;
- wb_fanout = producer_inst / consumer_inst;
+ wbFanout = producerInst / consumerInst;
- wb_rate
+ wbRate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
.flags(total)
;
- wb_rate = writeback_count / cpu->numCycles;
+ wbRate = writebackCount / cpu->numCycles;
- stat_com_inst
+ statComInst
.init(cpu->number_of_threads)
.name(name() + ".COM:count")
.desc("Number of instructions committed")
.flags(total)
;
- stat_com_swp
+ statComSwp
.init(cpu->number_of_threads)
.name(name() + ".COM:swp_count")
.desc("Number of s/w prefetches committed")
.flags(total)
;
- stat_com_refs
+ statComRefs
.init(cpu->number_of_threads)
.name(name() + ".COM:refs")
.desc("Number of memory references committed")
.flags(total)
;
- stat_com_loads
+ statComLoads
.init(cpu->number_of_threads)
.name(name() + ".COM:loads")
.desc("Number of loads committed")
.flags(total)
;
- stat_com_membars
+ statComMembars
.init(cpu->number_of_threads)
.name(name() + ".COM:membars")
.desc("Number of memory barriers committed")
.flags(total)
;
- stat_com_branches
+ statComBranches
.init(cpu->number_of_threads)
.name(name() + ".COM:branches")
.desc("Number of branches committed")
.flags(total)
;
- n_committed_dist
+ nCommittedDist
.init(0,commitWidth,1)
.name(name() + ".COM:committed_per_cycle")
.desc("Number of insts commited each cycle")
@@ -508,14 +507,14 @@ LWBackEnd<Impl>::regStats()
// -> The standard deviation is computed only over cycles where
// we reached the BW limit
//
- commit_eligible
+ commitEligible
.init(cpu->number_of_threads)
.name(name() + ".COM:bw_limited")
.desc("number of insts not committed due to BW limits")
.flags(total)
;
- commit_eligible_samples
+ commitEligibleSamples
.name(name() + ".COM:bw_lim_events")
.desc("number cycles where commit BW limit reached")
;
@@ -532,32 +531,32 @@ LWBackEnd<Impl>::regStats()
.desc("Number of instructions removed from inst list when they reached the head of the ROB")
;
- ROB_fcount
+ ROBFcount
.name(name() + ".ROB:full_count")
.desc("number of cycles where ROB was full")
;
- ROB_count
+ ROBCount
.init(cpu->number_of_threads)
.name(name() + ".ROB:occupancy")
.desc(name() + ".ROB occupancy (cumulative)")
.flags(total)
;
- ROB_full_rate
+ ROBFullRate
.name(name() + ".ROB:full_rate")
.desc("ROB full per cycle")
;
- ROB_full_rate = ROB_fcount / cpu->numCycles;
+ ROBFullRate = ROBFcount / cpu->numCycles;
- ROB_occ_rate
+ ROBOccRate
.name(name() + ".ROB:occ_rate")
.desc("ROB occupancy rate")
.flags(total)
;
- ROB_occ_rate = ROB_count / cpu->numCycles;
+ ROBOccRate = ROBCount / cpu->numCycles;
- ROB_occ_dist
+ ROBOccDist
.init(cpu->number_of_threads,0,numROBEntries,2)
.name(name() + ".ROB:occ_dist")
.desc("ROB Occupancy per cycle")
@@ -660,7 +659,7 @@ LWBackEnd<Impl>::tick()
return;
}
- ROB_count[0]+= numInsts;
+ ROBCount[0]+= numInsts;
wbCycle = 0;
@@ -980,8 +979,8 @@ LWBackEnd<Impl>::executeInsts()
}
}
- issued_ops[0]+= num_executed;
- n_issued_dist[num_executed]++;
+ issuedOps[0]+= num_executed;
+ nIssuedDist[num_executed]++;
}
template<class Impl>
@@ -1002,13 +1001,13 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
inst->setResultReady();
int dependents = wakeDependents(inst);
if (dependents) {
- producer_inst[0]++;
- consumer_inst[0]+= dependents;
+ producerInst[0]++;
+ consumerInst[0]+= dependents;
}
}
}
- writeback_count[0]++;
+ writebackCount[0]++;
}
#if 0
template <class Impl>
@@ -1076,7 +1075,7 @@ LWBackEnd<Impl>::commitInst(int inst_num)
thread->setPC(inst->readPC());
thread->setNextPC(inst->readNextPC());
- inst->reachedCommit = true;
+ inst->setAtCommit();
// If the instruction is not executed yet, then it is a non-speculative
// or store inst. Signal backwards that it should be executed.
@@ -1229,6 +1228,9 @@ LWBackEnd<Impl>::commitInst(int inst_num)
inst->traceData = NULL;
}
+ if (inst->isCopy())
+ panic("Should not commit any copy instructions!");
+
inst->clearDependents();
frontEnd->addFreeRegs(freed_regs);
@@ -1292,7 +1294,7 @@ LWBackEnd<Impl>::commitInsts()
break;
}
}
- n_committed_dist.sample(inst_num);
+ nCommittedDist.sample(inst_num);
}
template <class Impl>
@@ -1344,7 +1346,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
(*insts_it)->setCanCommit();
- (*insts_it)->removeInROB();
+ (*insts_it)->clearInROB();
for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
@@ -1522,27 +1524,27 @@ LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
- exe_swp[thread_number]++;
+ exeSwp[thread_number]++;
else
- exe_inst[thread_number]++;
+ exeInst[thread_number]++;
#else
- exe_inst[thread_number]++;
+ exeInst[thread_number]++;
#endif
//
// Control operations
//
if (inst->isControl())
- exe_branches[thread_number]++;
+ exeBranches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
- exe_refs[thread_number]++;
+ exeRefs[thread_number]++;
if (inst->isLoad())
- exe_loads[thread_number]++;
+ exeLoads[thread_number]++;
}
}
@@ -1562,33 +1564,33 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch()) {
- stat_com_swp[tid]++;
+ statComSwp[tid]++;
} else {
- stat_com_inst[tid]++;
+ statComInst[tid]++;
}
#else
- stat_com_inst[tid]++;
+ statComInst[tid]++;
#endif
//
// Control Instructions
//
if (inst->isControl())
- stat_com_branches[tid]++;
+ statComBranches[tid]++;
//
// Memory references
//
if (inst->isMemRef()) {
- stat_com_refs[tid]++;
+ statComRefs[tid]++;
if (inst->isLoad()) {
- stat_com_loads[tid]++;
+ statComLoads[tid]++;
}
}
if (inst->isMemBarrier()) {
- stat_com_membars[tid]++;
+ statComMembars[tid]++;
}
}
diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh
index 6fe343b42..c0bf0b0fe 100644
--- a/cpu/ozone/lw_lsq.hh
+++ b/cpu/ozone/lw_lsq.hh
@@ -447,7 +447,7 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// too).
// @todo: Fix uncached accesses.
if (req->flags & UNCACHEABLE &&
- (inst != loadQueue.back() || !inst->reachedCommit)) {
+ (inst != loadQueue.back() || !inst->isAtCommit())) {
DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
"commit/LSQ!\n",
inst->seqNum);
diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh
index c86c3a720..f104dff23 100644
--- a/cpu/ozone/thread_state.hh
+++ b/cpu/ozone/thread_state.hh
@@ -182,8 +182,6 @@ struct OzoneThreadState : public ThreadState {
void setNextPC(uint64_t val)
{ nextPC = val; }
- bool misspeculating() { return false; }
-
void setInst(TheISA::MachInst _inst) { inst = _inst; }
Counter readFuncExeInst() { return funcExeInst; }
diff --git a/cpu/thread_state.hh b/cpu/thread_state.hh
index e09cb12fd..12146bd11 100644
--- a/cpu/thread_state.hh
+++ b/cpu/thread_state.hh
@@ -60,6 +60,7 @@ struct ThreadState {
: cpuId(_cpuId), tid(_tid), mem(_mem), process(_process), asid(_asid)
#endif
{
+ numInst = 0;
funcExeInst = 0;
storeCondFailures = 0;
}
diff --git a/python/m5/objects/AlphaFullCPU.py b/python/m5/objects/AlphaFullCPU.py
index 043c3c08f..015e9d872 100644
--- a/python/m5/objects/AlphaFullCPU.py
+++ b/python/m5/objects/AlphaFullCPU.py
@@ -39,12 +39,10 @@ class DerivAlphaFullCPU(BaseCPU):
"Issue/Execute/Writeback delay")
issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
"to the IEW stage)")
- issueWidth = Param.Unsigned("Issue width")
- executeWidth = Param.Unsigned("Execute width")
- executeIntWidth = Param.Unsigned("Integer execute width")
- executeFloatWidth = Param.Unsigned("Floating point execute width")
- executeBranchWidth = Param.Unsigned("Branch execute width")
- executeMemoryWidth = Param.Unsigned("Memory execute width")
+ dispatchWidth = Param.Unsigned(8, "Dispatch width")
+ issueWidth = Param.Unsigned(8, "Issue width")
+ wbWidth = Param.Unsigned(8, "Writeback width")
+ wbDepth = Param.Unsigned(1, "Writeback depth")
fuPool = Param.FUPool(NULL, "Functional Unit pool")
iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
@@ -55,6 +53,9 @@ class DerivAlphaFullCPU(BaseCPU):
trapLatency = Param.Tick("Trap latency")
fetchTrapLatency = Param.Tick("Fetch trap latency")
+ backComSize = Param.Unsigned(5, "Time buffer size for backwards communication")
+ forwardComSize = Param.Unsigned(5, "Time buffer size for forward communication")
+
predType = Param.String("Branch predictor type ('local', 'tournament')")
localPredictorSize = Param.Unsigned("Size of local predictor")
localCtrBits = Param.Unsigned("Bits per counter")