summaryrefslogtreecommitdiff
path: root/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'cpu/o3')
-rw-r--r--cpu/o3/alpha_cpu_builder.cc33
-rw-r--r--cpu/o3/alpha_cpu_impl.hh2
-rw-r--r--cpu/o3/alpha_params.hh14
-rw-r--r--cpu/o3/commit.hh13
-rw-r--r--cpu/o3/commit_impl.hh62
-rw-r--r--cpu/o3/cpu.cc66
-rw-r--r--cpu/o3/cpu.hh6
-rw-r--r--cpu/o3/decode_impl.hh9
-rw-r--r--cpu/o3/fetch.hh6
-rw-r--r--cpu/o3/fetch_impl.hh19
-rw-r--r--cpu/o3/iew.hh107
-rw-r--r--cpu/o3/iew_impl.hh157
-rw-r--r--cpu/o3/inst_queue.hh2
-rw-r--r--cpu/o3/inst_queue_impl.hh23
-rw-r--r--cpu/o3/lsq.hh3
-rw-r--r--cpu/o3/lsq_impl.hh10
-rw-r--r--cpu/o3/lsq_unit.hh53
-rw-r--r--cpu/o3/lsq_unit_impl.hh47
-rw-r--r--cpu/o3/mem_dep_unit.cc2
-rw-r--r--cpu/o3/mem_dep_unit_impl.hh14
-rw-r--r--cpu/o3/regfile.hh16
-rw-r--r--cpu/o3/rename_impl.hh4
-rw-r--r--cpu/o3/rob.hh2
-rw-r--r--cpu/o3/rob_impl.hh14
24 files changed, 414 insertions, 270 deletions
diff --git a/cpu/o3/alpha_cpu_builder.cc b/cpu/o3/alpha_cpu_builder.cc
index 08d42cd46..c563fbef3 100644
--- a/cpu/o3/alpha_cpu_builder.cc
+++ b/cpu/o3/alpha_cpu_builder.cc
@@ -94,12 +94,10 @@ Param<unsigned> renameWidth;
Param<unsigned> commitToIEWDelay;
Param<unsigned> renameToIEWDelay;
Param<unsigned> issueToExecuteDelay;
+Param<unsigned> dispatchWidth;
Param<unsigned> issueWidth;
-Param<unsigned> executeWidth;
-Param<unsigned> executeIntWidth;
-Param<unsigned> executeFloatWidth;
-Param<unsigned> executeBranchWidth;
-Param<unsigned> executeMemoryWidth;
+Param<unsigned> wbWidth;
+Param<unsigned> wbDepth;
SimObjectParam<FUPool *> fuPool;
Param<unsigned> iewToCommitDelay;
@@ -109,6 +107,9 @@ Param<unsigned> squashWidth;
Param<Tick> trapLatency;
Param<Tick> fetchTrapLatency;
+Param<unsigned> backComSize;
+Param<unsigned> forwardComSize;
+
Param<std::string> predType;
Param<unsigned> localPredictorSize;
Param<unsigned> localCtrBits;
@@ -219,12 +220,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
"Issue/Execute/Writeback delay"),
INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
"to the IEW stage)"),
+ INIT_PARAM(dispatchWidth, "Dispatch width"),
INIT_PARAM(issueWidth, "Issue width"),
- INIT_PARAM(executeWidth, "Execute width"),
- INIT_PARAM(executeIntWidth, "Integer execute width"),
- INIT_PARAM(executeFloatWidth, "Floating point execute width"),
- INIT_PARAM(executeBranchWidth, "Branch execute width"),
- INIT_PARAM(executeMemoryWidth, "Memory execute width"),
+ INIT_PARAM(wbWidth, "Writeback width"),
+ INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"),
INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL),
INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
@@ -235,6 +234,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
+ INIT_PARAM(backComSize, "Time buffer size for backwards communication"),
+ INIT_PARAM(forwardComSize, "Time buffer size for forward communication"),
+
INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
INIT_PARAM(localPredictorSize, "Size of local predictor"),
INIT_PARAM(localCtrBits, "Bits per counter"),
@@ -353,12 +355,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
params->commitToIEWDelay = commitToIEWDelay;
params->renameToIEWDelay = renameToIEWDelay;
params->issueToExecuteDelay = issueToExecuteDelay;
+ params->dispatchWidth = dispatchWidth;
params->issueWidth = issueWidth;
- params->executeWidth = executeWidth;
- params->executeIntWidth = executeIntWidth;
- params->executeFloatWidth = executeFloatWidth;
- params->executeBranchWidth = executeBranchWidth;
- params->executeMemoryWidth = executeMemoryWidth;
+ params->wbWidth = wbWidth;
+ params->wbDepth = wbDepth;
params->fuPool = fuPool;
params->iewToCommitDelay = iewToCommitDelay;
@@ -368,6 +368,9 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
params->trapLatency = trapLatency;
params->fetchTrapLatency = fetchTrapLatency;
+ params->backComSize = backComSize;
+ params->forwardComSize = forwardComSize;
+
params->predType = predType;
params->localPredictorSize = localPredictorSize;
params->localCtrBits = localCtrBits;
diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh
index f39fdf6b6..1bf0652cd 100644
--- a/cpu/o3/alpha_cpu_impl.hh
+++ b/cpu/o3/alpha_cpu_impl.hh
@@ -383,7 +383,7 @@ AlphaFullCPU<Impl>::AlphaXC::copyArchRegs(ExecContext *xc)
}
// Copy the misc regs.
- cpu->regFile.miscRegs[tid].copyMiscRegs(xc);
+ TheISA::copyMiscRegs(xc, this);
// Then finally set the PC and the next PC.
cpu->setPC(xc->readPC(), tid);
diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh
index f0836a9fd..4ab130d02 100644
--- a/cpu/o3/alpha_params.hh
+++ b/cpu/o3/alpha_params.hh
@@ -106,12 +106,10 @@ class AlphaSimpleParams : public BaseFullCPU::Params
unsigned commitToIEWDelay;
unsigned renameToIEWDelay;
unsigned issueToExecuteDelay;
+ unsigned dispatchWidth;
unsigned issueWidth;
- unsigned executeWidth;
- unsigned executeIntWidth;
- unsigned executeFloatWidth;
- unsigned executeBranchWidth;
- unsigned executeMemoryWidth;
+ unsigned wbWidth;
+ unsigned wbDepth;
FUPool *fuPool;
//
@@ -125,6 +123,12 @@ class AlphaSimpleParams : public BaseFullCPU::Params
Tick fetchTrapLatency;
//
+ // Timebuffer sizes
+ //
+ unsigned backComSize;
+ unsigned forwardComSize;
+
+ //
// Branch predictor (BP, BTB, RAS)
//
std::string predType;
diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh
index d93822394..b153effc4 100644
--- a/cpu/o3/commit.hh
+++ b/cpu/o3/commit.hh
@@ -160,10 +160,6 @@ class DefaultCommit
/** Sets the pointer to the queue coming from IEW. */
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
- void setFetchStage(Fetch *fetch_stage);
-
- Fetch *fetchStage;
-
/** Sets the pointer to the IEW stage. */
void setIEWStage(IEW *iew_stage);
@@ -367,11 +363,6 @@ class DefaultCommit
*/
unsigned renameWidth;
- /** IEW width, in instructions. Used so ROB knows how many
- * instructions to get from the IEW instruction queue.
- */
- unsigned iewWidth;
-
/** Commit width, in instructions. */
unsigned commitWidth;
@@ -392,10 +383,6 @@ class DefaultCommit
*/
Tick trapLatency;
- Tick fetchTrapLatency;
-
- Tick fetchFaultTick;
-
/** The commit PC of each thread. Refers to the instruction that
* is currently being processed/committed.
*/
diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh
index 798f30294..364e685c2 100644
--- a/cpu/o3/commit_impl.hh
+++ b/cpu/o3/commit_impl.hh
@@ -71,12 +71,10 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
renameToROBDelay(params->renameToROBDelay),
fetchToCommitDelay(params->commitToFetchDelay),
renameWidth(params->renameWidth),
- iewWidth(params->executeWidth),
commitWidth(params->commitWidth),
numThreads(params->numberOfThreads),
switchedOut(false),
- trapLatency(params->trapLatency),
- fetchTrapLatency(params->fetchTrapLatency)
+ trapLatency(params->trapLatency)
{
_status = Active;
_nextStatus = Inactive;
@@ -114,10 +112,8 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
changedROBNumEntries[i] = false;
trapSquash[i] = false;
xcSquash[i] = false;
+ PC[i] = nextPC[i] = 0;
}
-
- fetchFaultTick = 0;
- fetchTrapWait = 0;
}
template <class Impl>
@@ -240,7 +236,6 @@ DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr)
cpu->activateStage(FullCPU::CommitIdx);
trapLatency = cpu->cycles(trapLatency);
- fetchTrapLatency = cpu->cycles(fetchTrapLatency);
}
template <class Impl>
@@ -299,13 +294,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
template <class Impl>
void
-DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage)
-{
- fetchStage = fetch_stage;
-}
-
-template <class Impl>
-void
DefaultCommit<Impl>::setIEWStage(IEW *iew_stage)
{
iewStage = iew_stage;
@@ -431,7 +419,7 @@ DefaultCommit<Impl>::setNextStatus()
}
}
- assert(squashes == squashCounter);
+ squashCounter = squashes;
// If commit is currently squashing, then it will have activity for the
// next cycle. Set its next status as active.
@@ -536,8 +524,6 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid)
commitStatus[tid] = ROBSquashing;
cpu->activityThisCycle();
-
- ++squashCounter;
}
template <class Impl>
@@ -555,8 +541,6 @@ DefaultCommit<Impl>::squashFromXC(unsigned tid)
cpu->activityThisCycle();
xcSquash[tid] = false;
-
- ++squashCounter;
}
template <class Impl>
@@ -571,6 +555,9 @@ DefaultCommit<Impl>::tick()
return;
}
+ if ((*activeThreads).size() <=0)
+ return;
+
list<unsigned>::iterator threads = (*activeThreads).begin();
// Check if any of the threads are done squashing. Change the
@@ -582,10 +569,12 @@ DefaultCommit<Impl>::tick()
if (rob->isDoneSquashing(tid)) {
commitStatus[tid] = Running;
- --squashCounter;
} else {
DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any"
- "insts this cycle.\n", tid);
+ " insts this cycle.\n", tid);
+ rob->doSquash(tid);
+ toIEW->commitInfo[tid].robSquashing = true;
+ wroteToTimeBuffer = true;
}
}
}
@@ -691,29 +680,7 @@ DefaultCommit<Impl>::commit()
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
-/*
- if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
- // Record the fault. Wait until it's empty in the ROB.
- // Then handle the trap. Ignore it if there's already a
- // trap pending as fetch will be redirected.
- fetchFault = fromFetch->fetchFault;
- fetchFaultTick = curTick + fetchTrapLatency;
- commitStatus[0] = FetchTrapPending;
- DPRINTF(Commit, "Fault from fetch recorded. Will trap if the "
- "ROB empties without squashing the fault.\n");
- fetchTrapWait = 0;
- }
- // Fetch may tell commit to clear the trap if it's been squashed.
- if (fromFetch->clearFetchFault) {
- DPRINTF(Commit, "Received clear fetch fault signal\n");
- fetchTrapWait = 0;
- if (commitStatus[0] == FetchTrapPending) {
- DPRINTF(Commit, "Clearing fault from fetch\n");
- commitStatus[0] = Running;
- }
- }
-*/
// Not sure which one takes priority. I think if we have
// both, that's a bad sign.
if (trapSquash[tid] == true) {
@@ -741,8 +708,6 @@ DefaultCommit<Impl>::commit()
commitStatus[tid] = ROBSquashing;
- ++squashCounter;
-
// If we want to include the squashing instruction in the squash,
// then use one older sequence number.
InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
@@ -944,7 +909,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// and committed this instruction.
thread[tid]->funcExeInst--;
- head_inst->reachedCommit = true;
+ head_inst->setAtCommit();
if (head_inst->isNonSpeculative() ||
head_inst->isStoreConditional() ||
@@ -1060,7 +1025,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Generate trap squash event.
generateTrapEvent(tid);
-
+// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
return false;
#else // !FULL_SYSTEM
panic("fault (%d) detected @ PC %08p", inst_fault,
@@ -1083,6 +1048,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
head_inst->renamedDestRegIdx(i));
}
+ if (head_inst->isCopy())
+ panic("Should not commit any copy instructions!");
+
// Finally clear the head ROB entry.
rob->retireHead(tid);
diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc
index 8d72bdc41..f1571e61b 100644
--- a/cpu/o3/cpu.cc
+++ b/cpu/o3/cpu.cc
@@ -108,12 +108,14 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
// For now just have these time buffers be pretty big.
// @todo: Make these time buffer sizes parameters or derived
// from latencies
- timeBuffer(5, 5),
- fetchQueue(5, 5),
- decodeQueue(5, 5),
- renameQueue(5, 5),
- iewQueue(5, 5),
- activityRec(NumStages, 10, params->activity),
+ timeBuffer(params->backComSize, params->forwardComSize),
+ fetchQueue(params->backComSize, params->forwardComSize),
+ decodeQueue(params->backComSize, params->forwardComSize),
+ renameQueue(params->backComSize, params->forwardComSize),
+ iewQueue(params->backComSize, params->forwardComSize),
+ activityRec(NumStages,
+ params->backComSize + params->forwardComSize,
+ params->activity),
globalSeqNum(1),
@@ -180,7 +182,6 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
commit.setIEWQueue(&iewQueue);
commit.setRenameQueue(&renameQueue);
- commit.setFetchStage(&fetch);
commit.setIEWStage(&iew);
rename.setIEWStage(&iew);
rename.setCommitStage(&commit);
@@ -709,7 +710,7 @@ void
FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
// Flush out any old data from the time buffers.
- for (int i = 0; i < 10; ++i) {
+ for (int i = 0; i < timeBuffer.getSize(); ++i) {
timeBuffer.advance();
fetchQueue.advance();
decodeQueue.advance();
@@ -758,6 +759,46 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
tickEvent.schedule(curTick);
}
+/*
+template <class Impl>
+void
+FullO3CPU<Impl>::serialize(std::ostream &os)
+{
+ BaseCPU::serialize(os);
+ nameOut(os, csprintf("%s.tickEvent", name()));
+ tickEvent.serialize(os);
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // write out the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static SimpleThread temp;
+
+ for (int i = 0; i < thread.size(); i++) {
+ nameOut(os, csprintf("%s.xc.%i", name(), i));
+ temp.copyXC(thread[i]->getXC());
+ temp.serialize(os);
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
+{
+ BaseCPU::unserialize(cp, section);
+ tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // read in the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static SimpleThread temp;
+
+ for (int i = 0; i < thread.size(); i++) {
+ temp.copyXC(thread[i]->getXC());
+ temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
+ thread[i]->getXC()->copyArchRegs(temp.getXC());
+ }
+}
+*/
template <class Impl>
uint64_t
FullO3CPU<Impl>::readIntReg(int reg_idx)
@@ -866,7 +907,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegSingle(phys_reg, val);
}
@@ -875,7 +917,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegDouble(phys_reg, val);
}
@@ -884,7 +927,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegInt(phys_reg, val);
}
diff --git a/cpu/o3/cpu.hh b/cpu/o3/cpu.hh
index f4b19bfb3..ef5c9ae53 100644
--- a/cpu/o3/cpu.hh
+++ b/cpu/o3/cpu.hh
@@ -63,6 +63,12 @@ class BaseFullCPU : public BaseCPU
void regStats();
+ /** Sets this CPU's ID. */
+ void setCpuId(int id) { cpu_id = id; }
+
+ /** Reads this CPU's ID. */
+ int readCpuId() { return cpu_id; }
+
protected:
int cpu_id;
};
diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh
index 0b686375e..e1af4d423 100644
--- a/cpu/o3/decode_impl.hh
+++ b/cpu/o3/decode_impl.hh
@@ -278,7 +278,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].predIncorrect = true;
toFetch->decodeInfo[tid].squash = true;
- toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
+ toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
toFetch->decodeInfo[tid].branchTaken =
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
@@ -294,7 +294,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid &&
fromFetch->insts[i]->seqNum > inst->seqNum) {
- fromFetch->insts[i]->squashed = true;
+ fromFetch->insts[i]->setSquashed();
}
}
@@ -343,7 +343,7 @@ DefaultDecode<Impl>::squash(unsigned tid)
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid) {
- fromFetch->insts[i]->squashed = true;
+ fromFetch->insts[i]->setSquashed();
squash_count++;
}
}
@@ -721,9 +721,8 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Go ahead and compute any PC-relative branches.
if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
++decodeBranchResolved;
- inst->setNextPC(inst->branchTarget());
- if (inst->mispredicted()) {
+ if (inst->branchTarget() != inst->readPredTarg()) {
++decodeBranchMispred;
// Might want to set some sort of boolean and just do
diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh
index 92a87ab54..0bde56ce9 100644
--- a/cpu/o3/fetch.hh
+++ b/cpu/o3/fetch.hh
@@ -358,6 +358,12 @@ class DefaultFetch
/** The cache line being fetched. */
uint8_t *cacheData[Impl::MaxThreads];
+ /** The PC of the cacheline that has been loaded. */
+ Addr cacheDataPC[Impl::MaxThreads];
+
+ /** Whether or not the cache data is valid. */
+ bool cacheDataValid[Impl::MaxThreads];
+
/** Size of instructions. */
int instSize;
diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh
index a309bd49a..cc09c4a41 100644
--- a/cpu/o3/fetch_impl.hh
+++ b/cpu/o3/fetch_impl.hh
@@ -138,6 +138,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
// Create space to store a cache line.
cacheData[tid] = new uint8_t[cacheBlkSize];
+ cacheDataPC[tid] = 0;
+ cacheDataValid[tid] = false;
stalls[tid].decode = 0;
stalls[tid].rename = 0;
@@ -334,6 +336,7 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
// Wake up the CPU (if it went to sleep and was waiting on this completion
// event).
cpu->wakeCPU();
+ cacheDataValid[tid] = true;
DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
tid);
@@ -466,7 +469,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
- if (interruptPending && flags == 0 || switchedOut) {
+ if (interruptPending && flags == 0) {
// Hold off fetch from getting new instructions while an interrupt
// is pending.
return false;
@@ -475,6 +478,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// Align the fetch PC so it's at the start of a cache block.
fetch_PC = icacheBlockAlignPC(fetch_PC);
+ // If we've already got the block, no need to try to fetch it again.
+ if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
+ return true;
+ }
+
// Setup the memReq to do a read of the first instruction's address.
// Set the appropriate read size and flags as well.
memReq[tid] = new MemReq();
@@ -525,6 +533,9 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
MemAccessResult result = icacheInterface->access(memReq[tid]);
+ cacheDataPC[tid] = fetch_PC;
+ cacheDataValid[tid] = false;
+
fetchedCacheLines++;
// If the cache missed, then schedule an event to wake
@@ -1002,8 +1013,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetch_PC = next_PC;
if (instruction->isQuiesce()) {
- warn("%lli: Quiesce instruction encountered, halting fetch!",
- curTick);
+// warn("%lli: Quiesce instruction encountered, halting fetch!",
+// curTick);
fetchStatus[tid] = QuiescePending;
++numInst;
status_change = true;
@@ -1067,7 +1078,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetchStatus[tid] = TrapPending;
status_change = true;
- warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
+// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
#else // !FULL_SYSTEM
fatal("fault (%d) detected @ PC %08p", fault, PC[tid]);
#endif // FULL_SYSTEM
diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh
index eda6a6bc0..d21c573fe 100644
--- a/cpu/o3/iew.hh
+++ b/cpu/o3/iew.hh
@@ -224,6 +224,47 @@ class DefaultIEW
/** Returns if the LSQ has any stores to writeback. */
bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); }
+ void incrWb(InstSeqNum &sn)
+ {
+ if (++wbOutstanding == wbMax)
+ ableToIssue = false;
+ DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+ assert(wbOutstanding <= wbMax);
+#ifdef DEBUG
+ wbList.insert(sn);
+#endif
+ }
+
+ void decrWb(InstSeqNum &sn)
+ {
+ if (wbOutstanding-- == wbMax)
+ ableToIssue = true;
+ DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+ assert(wbOutstanding >= 0);
+#ifdef DEBUG
+ assert(wbList.find(sn) != wbList.end());
+ wbList.erase(sn);
+#endif
+ }
+
+#ifdef DEBUG
+ std::set<InstSeqNum> wbList;
+
+ void dumpWb()
+ {
+ std::set<InstSeqNum>::iterator wb_it = wbList.begin();
+ while (wb_it != wbList.end()) {
+ cprintf("[sn:%lli]\n",
+ (*wb_it));
+ wb_it++;
+ }
+ }
+#endif
+
+ bool canIssue() { return ableToIssue; }
+
+ bool ableToIssue;
+
private:
/** Sends commit proper information for a squash due to a branch
* mispredict.
@@ -281,6 +322,9 @@ class DefaultIEW
/** Processes inputs and changes state accordingly. */
void checkSignalsAndUpdate(unsigned tid);
+ /** Removes instructions from rename from a thread's instruction list. */
+ void emptyRenameInsts(unsigned tid);
+
/** Sorts instructions coming from rename into lists separated by thread. */
void sortInsts();
@@ -401,20 +445,12 @@ class DefaultIEW
*/
unsigned issueToExecuteDelay;
- /** Width of issue's read path, in instructions. The read path is both
- * the skid buffer and the rename instruction queue.
- * Note to self: is this really different than issueWidth?
- */
- unsigned issueReadWidth;
+ /** Width of dispatch, in instructions. */
+ unsigned dispatchWidth;
/** Width of issue, in instructions. */
unsigned issueWidth;
- /** Width of execute, in instructions. Might make more sense to break
- * down into FP vs int.
- */
- unsigned executeWidth;
-
/** Index into queue of instructions being written back. */
unsigned wbNumInst;
@@ -425,6 +461,17 @@ class DefaultIEW
*/
unsigned wbCycle;
+ /** Number of instructions in flight that will writeback. */
+ int wbOutstanding;
+
+ /** Writeback width. */
+ unsigned wbWidth;
+
+ /** Writeback width * writeback depth, where writeback depth is
+ * the number of cycles of writing back instructions that can be
+ * buffered. */
+ unsigned wbMax;
+
/** Number of active threads. */
unsigned numThreads;
@@ -459,14 +506,6 @@ class DefaultIEW
Stats::Scalar<> iewIQFullEvents;
/** Stat for number of times the LSQ becomes full. */
Stats::Scalar<> iewLSQFullEvents;
- /** Stat for total number of executed instructions. */
- Stats::Scalar<> iewExecutedInsts;
- /** Stat for total number of executed load instructions. */
- Stats::Vector<> iewExecLoadInsts;
- /** Stat for total number of executed store instructions. */
-// Stats::Scalar<> iewExecStoreInsts;
- /** Stat for total number of squashed instructions skipped at execute. */
- Stats::Scalar<> iewExecSquashedInsts;
/** Stat for total number of memory ordering violation events. */
Stats::Scalar<> memOrderViolationEvents;
/** Stat for total number of incorrect predicted taken branches. */
@@ -476,28 +515,27 @@ class DefaultIEW
/** Stat for total number of mispredicted branches detected at execute. */
Stats::Formula branchMispredicts;
+ /** Stat for total number of executed instructions. */
+ Stats::Scalar<> iewExecutedInsts;
+ /** Stat for total number of executed load instructions. */
+ Stats::Vector<> iewExecLoadInsts;
+ /** Stat for total number of executed store instructions. */
+// Stats::Scalar<> iewExecStoreInsts;
+ /** Stat for total number of squashed instructions skipped at execute. */
+ Stats::Scalar<> iewExecSquashedInsts;
/** Number of executed software prefetches. */
- Stats::Vector<> exeSwp;
+ Stats::Vector<> iewExecutedSwp;
/** Number of executed nops. */
- Stats::Vector<> exeNop;
+ Stats::Vector<> iewExecutedNop;
/** Number of executed meomory references. */
- Stats::Vector<> exeRefs;
+ Stats::Vector<> iewExecutedRefs;
/** Number of executed branches. */
- Stats::Vector<> exeBranches;
-
-// Stats::Vector<> issued_ops;
-/*
- Stats::Vector<> stat_fu_busy;
- Stats::Vector2d<> stat_fuBusy;
- Stats::Vector<> dist_unissued;
- Stats::Vector2d<> stat_issued_inst_type;
-*/
- /** Number of instructions issued per cycle. */
- Stats::Formula issueRate;
+ Stats::Vector<> iewExecutedBranches;
/** Number of executed store instructions. */
Stats::Formula iewExecStoreInsts;
-// Stats::Formula issue_op_rate;
-// Stats::Formula fu_busy_rate;
+ /** Number of instructions executed per cycle. */
+ Stats::Formula iewExecRate;
+
/** Number of instructions sent to commit. */
Stats::Vector<> iewInstsToCommit;
/** Number of instructions that writeback. */
@@ -510,7 +548,6 @@ class DefaultIEW
* to resource contention.
*/
Stats::Vector<> wbPenalized;
-
/** Number of instructions per cycle written back. */
Stats::Formula wbRate;
/** Average number of woken instructions per writeback. */
diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh
index 3ed20cb75..102be4f8d 100644
--- a/cpu/o3/iew_impl.hh
+++ b/cpu/o3/iew_impl.hh
@@ -56,9 +56,11 @@ DefaultIEW<Impl>::LdWritebackEvent::process()
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
if (iewStage->isSwitchedOut()) {
+ iewStage->decrWb(inst->seqNum);
inst = NULL;
return;
} else if (inst->isSquashed()) {
+ iewStage->decrWb(inst->seqNum);
iewStage->wakeCPU();
inst = NULL;
return;
@@ -93,16 +95,17 @@ DefaultIEW<Impl>::LdWritebackEvent::description()
template<class Impl>
DefaultIEW<Impl>::DefaultIEW(Params *params)
: // @todo: Make this into a parameter.
- issueToExecQueue(5, 5),
+ issueToExecQueue(params->backComSize, params->forwardComSize),
instQueue(params),
ldstQueue(params),
fuPool(params->fuPool),
commitToIEWDelay(params->commitToIEWDelay),
renameToIEWDelay(params->renameToIEWDelay),
issueToExecuteDelay(params->issueToExecuteDelay),
- issueReadWidth(params->issueWidth),
+ dispatchWidth(params->dispatchWidth),
issueWidth(params->issueWidth),
- executeWidth(params->executeWidth),
+ wbOutstanding(0),
+ wbWidth(params->wbWidth),
numThreads(params->numberOfThreads),
switchedOut(false)
{
@@ -125,8 +128,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params)
fetchRedirect[i] = false;
}
+ wbMax = wbWidth * params->wbDepth;
+
updateLSQNextCycle = false;
+ ableToIssue = true;
+
skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
}
@@ -144,6 +151,7 @@ DefaultIEW<Impl>::regStats()
using namespace Stats;
instQueue.regStats();
+ ldstQueue.regStats();
iewIdleCycles
.name(name() + ".iewIdleCycles")
@@ -189,20 +197,6 @@ DefaultIEW<Impl>::regStats()
.name(name() + ".iewLSQFullEvents")
.desc("Number of times the LSQ has become full, causing a stall");
- iewExecutedInsts
- .name(name() + ".iewExecutedInsts")
- .desc("Number of executed instructions");
-
- iewExecLoadInsts
- .init(cpu->number_of_threads)
- .name(name() + ".iewExecLoadInsts")
- .desc("Number of load instructions executed")
- .flags(total);
-
- iewExecSquashedInsts
- .name(name() + ".iewExecSquashedInsts")
- .desc("Number of squashed instructions skipped in execute");
-
memOrderViolationEvents
.name(name() + ".memOrderViolationEvents")
.desc("Number of memory order violations");
@@ -221,47 +215,49 @@ DefaultIEW<Impl>::regStats()
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
- exeSwp
+ iewExecutedInsts
+ .name(name() + ".iewExecutedInsts")
+ .desc("Number of executed instructions");
+
+ iewExecLoadInsts
+ .init(cpu->number_of_threads)
+ .name(name() + ".iewExecLoadInsts")
+ .desc("Number of load instructions executed")
+ .flags(total);
+
+ iewExecSquashedInsts
+ .name(name() + ".iewExecSquashedInsts")
+ .desc("Number of squashed instructions skipped in execute");
+
+ iewExecutedSwp
.init(cpu->number_of_threads)
.name(name() + ".EXEC:swp")
.desc("number of swp insts executed")
- .flags(total)
- ;
+ .flags(total);
- exeNop
+ iewExecutedNop
.init(cpu->number_of_threads)
.name(name() + ".EXEC:nop")
.desc("number of nop insts executed")
- .flags(total)
- ;
+ .flags(total);
- exeRefs
+ iewExecutedRefs
.init(cpu->number_of_threads)
.name(name() + ".EXEC:refs")
.desc("number of memory reference insts executed")
- .flags(total)
- ;
+ .flags(total);
- exeBranches
+ iewExecutedBranches
.init(cpu->number_of_threads)
.name(name() + ".EXEC:branches")
.desc("Number of branches executed")
- .flags(total)
- ;
-
- issueRate
- .name(name() + ".EXEC:rate")
- .desc("Inst execution rate")
- .flags(total)
- ;
- issueRate = iewExecutedInsts / cpu->numCycles;
+ .flags(total);
iewExecStoreInsts
.name(name() + ".EXEC:stores")
.desc("Number of stores executed")
- .flags(total)
- ;
- iewExecStoreInsts = exeRefs - iewExecLoadInsts;
+ .flags(total);
+ iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts;
/*
for (int i=0; i<Num_OpClasses; ++i) {
stringstream subname;
@@ -277,58 +273,50 @@ DefaultIEW<Impl>::regStats()
.init(cpu->number_of_threads)
.name(name() + ".WB:sent")
.desc("cumulative count of insts sent to commit")
- .flags(total)
- ;
+ .flags(total);
writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
- .flags(total)
- ;
+ .flags(total);
producerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
- .flags(total)
- ;
+ .flags(total);
consumerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
- .flags(total)
- ;
+ .flags(total);
wbPenalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
- .flags(total)
- ;
+ .flags(total);
wbPenalizedRate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
- .flags(total)
- ;
+ .flags(total);
wbPenalizedRate = wbPenalized / writebackCount;
wbFanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
- .flags(total)
- ;
+ .flags(total);
wbFanout = producerInst / consumerInst;
wbRate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
- .flags(total)
- ;
+ .flags(total);
wbRate = writebackCount / cpu->numCycles;
}
@@ -481,8 +469,7 @@ DefaultIEW<Impl>::takeOverFrom()
updateLSQNextCycle = false;
- // @todo: Fix hardcoded number
- for (int i = 0; i < 6; ++i) {
+ for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
issueToExecQueue.advance();
}
}
@@ -515,16 +502,7 @@ DefaultIEW<Impl>::squash(unsigned tid)
skidBuffer[tid].pop();
}
- while (!insts[tid].empty()) {
- if (insts[tid].front()->isLoad() ||
- insts[tid].front()->isStore() ) {
- toRename->iewInfo[tid].dispatchedToLSQ++;
- }
-
- toRename->iewInfo[tid].dispatched++;
-
- insts[tid].pop();
- }
+ emptyRenameInsts(tid);
}
template<class Impl>
@@ -650,14 +628,16 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
// free slot.
while ((*iewQueue)[wbCycle].insts[wbNumInst]) {
++wbNumInst;
- if (wbNumInst == issueWidth) {
+ if (wbNumInst == wbWidth) {
++wbCycle;
wbNumInst = 0;
}
- assert(wbCycle < 5);
+ assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
}
+ DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
+ wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst);
// Add finished instruction to queue to commit.
(*iewQueue)[wbCycle].insts[wbNumInst] = inst;
(*iewQueue)[wbCycle].size++;
@@ -670,7 +650,7 @@ DefaultIEW<Impl>::validInstsFromRename()
unsigned inst_count = 0;
for (int i=0; i<fromRename->size; i++) {
- if (!fromRename->insts[i]->squashed)
+ if (!fromRename->insts[i]->isSquashed())
inst_count++;
}
@@ -858,10 +838,12 @@ DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid)
}
if (fromCommit->commitInfo[tid].robSquashing) {
- DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n");
+ DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid);
dispatchStatus[tid] = Squashing;
+ emptyRenameInsts(tid);
+ wroteToTimeBuffer = true;
return;
}
@@ -912,6 +894,22 @@ DefaultIEW<Impl>::sortInsts()
template <class Impl>
void
+DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
+{
+ while (!insts[tid].empty()) {
+ if (insts[tid].front()->isLoad() ||
+ insts[tid].front()->isStore() ) {
+ toRename->iewInfo[tid].dispatchedToLSQ++;
+ }
+
+ toRename->iewInfo[tid].dispatched++;
+
+ insts[tid].pop();
+ }
+}
+
+template <class Impl>
+void
DefaultIEW<Impl>::wakeCPU()
{
cpu->wakeCPU();
@@ -1010,7 +1008,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
// Loop through the instructions, putting them in the instruction
// queue.
for ( ; dis_num_inst < insts_to_add &&
- dis_num_inst < issueReadWidth;
+ dis_num_inst < dispatchWidth;
++dis_num_inst)
{
inst = insts_to_dispatch.front();
@@ -1149,7 +1147,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
instQueue.recordProducer(inst);
- exeNop[tid]++;
+ iewExecutedNop[tid]++;
add_to_iq = false;
} else if (inst->isExecuted()) {
@@ -1263,6 +1261,7 @@ DefaultIEW<Impl>::executeInsts()
++iewExecSquashedInsts;
+ decrWb(inst->seqNum);
continue;
}
@@ -1399,8 +1398,8 @@ DefaultIEW<Impl>::writebackInsts()
DynInstPtr inst = toCommit->insts[inst_num];
int tid = inst->threadNumber;
- DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n",
- inst->readPC());
+ DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
iewInstsToCommit[tid]++;
@@ -1425,6 +1424,8 @@ DefaultIEW<Impl>::writebackInsts()
}
writebackCount[tid]++;
}
+
+ decrWb(inst->seqNum);
}
}
@@ -1561,7 +1562,7 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
- exeSwp[thread_number]++;
+ iewExecutedSwp[thread_number]++;
else
iewExecutedInsts++;
#else
@@ -1572,13 +1573,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
// Control operations
//
if (inst->isControl())
- exeBranches[thread_number]++;
+ iewExecutedBranches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
- exeRefs[thread_number]++;
+ iewExecutedRefs[thread_number]++;
if (inst->isLoad()) {
iewExecLoadInsts[thread_number]++;
diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh
index 4802cbaf4..80cd71f0d 100644
--- a/cpu/o3/inst_queue.hh
+++ b/cpu/o3/inst_queue.hh
@@ -490,8 +490,6 @@ class InstructionQueue
/** Number of instructions issued per cycle. */
Stats::Formula issueRate;
-// Stats::Formula issue_stores;
-// Stats::Formula issue_op_rate;
/** Number of times the FU was busy. */
Stats::Vector<> fuBusy;
/** Number of times the FU was busy per instruction issued. */
diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh
index d677a259c..72cb0d708 100644
--- a/cpu/o3/inst_queue_impl.hh
+++ b/cpu/o3/inst_queue_impl.hh
@@ -288,22 +288,7 @@ InstructionQueue<Impl>::regStats()
.flags(total)
;
issueRate = iqInstsIssued / cpu->numCycles;
-/*
- issue_stores
- .name(name() + ".ISSUE:stores")
- .desc("Number of stores issued")
- .flags(total)
- ;
- issue_stores = exe_refs - exe_loads;
-*/
-/*
- issue_op_rate
- .name(name() + ".ISSUE:op_rate")
- .desc("Operation issue rate")
- .flags(total)
- ;
- issue_op_rate = issued_ops / numCycles;
-*/
+
statFuBusy
.init(Num_OpClasses)
.name(name() + ".ISSUE:fu_full")
@@ -700,6 +685,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
int total_issued = 0;
while (total_issued < totalWidth &&
+ iewStage->canIssue() &&
order_it != order_end_it) {
OpClass op_class = (*order_it).queueType;
@@ -790,13 +776,14 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// complete.
++freeEntries;
count[tid]--;
- issuing_inst->removeInIQ();
+ issuing_inst->clearInIQ();
} else {
memDepUnit[tid].issue(issuing_inst);
}
listOrder.erase(order_it++);
statIssuedInstType[tid][op_class]++;
+ iewStage->incrWb(issuing_inst->seqNum);
} else {
statFuBusy[op_class]++;
fuBusy[tid]++;
@@ -1096,7 +1083,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
// inst will flow through the rest of the pipeline.
squashed_inst->setIssued();
squashed_inst->setCanCommit();
- squashed_inst->removeInIQ();
+ squashed_inst->clearInIQ();
//Update Thread IQ Count
count[squashed_inst->threadNumber]--;
diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh
index b321d4590..c67225bc0 100644
--- a/cpu/o3/lsq.hh
+++ b/cpu/o3/lsq.hh
@@ -62,6 +62,9 @@ class LSQ {
/** Returns the name of the LSQ. */
std::string name() const;
+ /** Registers the statistics for each LSQ Unit. */
+ void regStats();
+
/** Sets the pointer to the list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
/** Sets the CPU pointer. */
diff --git a/cpu/o3/lsq_impl.hh b/cpu/o3/lsq_impl.hh
index a6ad27522..a8a55af1a 100644
--- a/cpu/o3/lsq_impl.hh
+++ b/cpu/o3/lsq_impl.hh
@@ -106,6 +106,16 @@ LSQ<Impl>::name() const
template<class Impl>
void
+LSQ<Impl>::regStats()
+{
+ //Initialize LSQs
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].regStats();
+ }
+}
+
+template<class Impl>
+void
LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr)
{
activeThreads = at_ptr;
diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh
index a6afff743..fe174a97d 100644
--- a/cpu/o3/lsq_unit.hh
+++ b/cpu/o3/lsq_unit.hh
@@ -101,6 +101,9 @@ class LSQUnit {
/** Returns the name of the LSQ unit. */
std::string name() const;
+ /** Registers statistics. */
+ void regStats();
+
/** Sets the CPU pointer. */
void setCPU(FullCPU *cpu_ptr)
{ cpu = cpu_ptr; }
@@ -153,9 +156,6 @@ class LSQUnit {
/** Writes back stores. */
void writebackStores();
- // @todo: Include stats in the LSQ unit.
- //void regStats();
-
/** Clears all the entries in the LQ. */
void clearLQ();
@@ -369,25 +369,34 @@ class LSQUnit {
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
-/*
- // total number of loads forwaded from LSQ stores
- Stats::Vector<> lsq_forw_loads;
+ /** Total number of loads forwaded from LSQ stores. */
+ Stats::Scalar<> lsqForwLoads;
+
+ /** Total number of loads ignored due to invalid addresses. */
+ Stats::Scalar<> invAddrLoads;
+
+ /** Total number of squashed loads. */
+ Stats::Scalar<> lsqSquashedLoads;
- // total number of loads ignored due to invalid addresses
- Stats::Vector<> inv_addr_loads;
+ /** Total number of responses from the memory system that are
+ * ignored due to the instruction already being squashed. */
+ Stats::Scalar<> lsqIgnoredResponses;
- // total number of software prefetches ignored due to invalid addresses
- Stats::Vector<> inv_addr_swpfs;
+ /** Total number of squashed stores. */
+ Stats::Scalar<> lsqSquashedStores;
- // total non-speculative bogus addresses seen (debug var)
- Counter sim_invalid_addrs;
- Stats::Vector<> fu_busy; //cumulative fu busy
+ /** Total number of software prefetches ignored due to invalid addresses. */
+ Stats::Scalar<> invAddrSwpfs;
- // ready loads blocked due to memory disambiguation
- Stats::Vector<> lsq_blocked_loads;
+ /** Ready loads blocked due to partial store-forwarding. */
+ Stats::Scalar<> lsqBlockedLoads;
+
+ /** Number of loads that were rescheduled. */
+ Stats::Scalar<> lsqRescheduledLoads;
+
+ /** Number of times the LSQ is blocked due to the cache. */
+ Stats::Scalar<> lsqCacheBlocked;
- Stats::Scalar<> lsqInversion;
-*/
public:
/** Executes the load at the given index. */
template <class T>
@@ -441,8 +450,9 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// at the head of the LSQ and are ready to commit (at the head of the ROB
// too).
if (req->flags & UNCACHEABLE &&
- (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) {
+ (load_idx != loadHead || !loadQueue[load_idx]->isAtCommit())) {
iewStage->rescheduleMemInst(loadQueue[load_idx]);
+ ++lsqRescheduledLoads;
return TheISA::genMachineCheckFault();
}
@@ -552,6 +562,8 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(loadQueue[load_idx]);
+ iewStage->decrWb(loadQueue[load_idx]->seqNum);
+ ++lsqRescheduledLoads;
// Do not generate a writeback event as this instruction is not
// complete.
@@ -559,6 +571,7 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
"Store idx %i to load addr %#x\n",
store_idx, req->vaddr);
+ ++lsqBlockedLoads;
return NoFault;
}
}
@@ -579,6 +592,10 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// if we have a cache, do cache access too
if (fault == NoFault && dcacheInterface) {
if (dcacheInterface->isBlocked()) {
+ ++lsqCacheBlocked;
+
+ iewStage->decrWb(inst->seqNum);
+
// There's an older load that's already going to squash.
if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
return NoFault;
diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh
index 4ee8bb234..5cc3078f8 100644
--- a/cpu/o3/lsq_unit_impl.hh
+++ b/cpu/o3/lsq_unit_impl.hh
@@ -126,6 +126,47 @@ LSQUnit<Impl>::name() const
template<class Impl>
void
+LSQUnit<Impl>::regStats()
+{
+ lsqForwLoads
+ .name(name() + ".forwLoads")
+ .desc("Number of loads that had data forwarded from stores");
+
+ invAddrLoads
+ .name(name() + ".invAddrLoads")
+ .desc("Number of loads ignored due to an invalid address");
+
+ lsqSquashedLoads
+ .name(name() + ".squashedLoads")
+ .desc("Number of loads squashed");
+
+ lsqIgnoredResponses
+ .name(name() + ".ignoredResponses")
+ .desc("Number of memory responses ignored because the instruction is squashed");
+
+ lsqSquashedStores
+ .name(name() + ".squashedStores")
+ .desc("Number of stores squashed");
+
+ invAddrSwpfs
+ .name(name() + ".invAddrSwpfs")
+ .desc("Number of software prefetches ignored due to an invalid address");
+
+ lsqBlockedLoads
+ .name(name() + ".blockedLoads")
+ .desc("Number of blocked loads due to partial load-store forwarding");
+
+ lsqRescheduledLoads
+ .name(name() + ".rescheduledLoads")
+ .desc("Number of loads that were rescheduled");
+
+ lsqCacheBlocked
+ .name(name() + ".cacheBlocked")
+ .desc("Number of times an access to memory failed due to the cache being blocked");
+}
+
+template<class Impl>
+void
LSQUnit<Impl>::clearLQ()
{
loadQueue.clear();
@@ -548,6 +589,7 @@ LSQUnit<Impl>::writebackStores()
if (dcacheInterface && dcacheInterface->isBlocked()) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
" is blocked!\n");
+ ++lsqCacheBlocked;
break;
}
@@ -705,7 +747,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
// Clear the smart pointer to make sure it is decremented.
- loadQueue[load_idx]->squashed = true;
+ loadQueue[load_idx]->setSquashed();
loadQueue[load_idx] = NULL;
--loads;
@@ -748,7 +790,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
// Clear the smart pointer to make sure it is decremented.
- storeQueue[store_idx].inst->squashed = true;
+ storeQueue[store_idx].inst->setSquashed();
storeQueue[store_idx].inst = NULL;
storeQueue[store_idx].canWB = 0;
@@ -765,6 +807,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
storeTail = store_idx;
decrStIdx(store_idx);
+ ++lsqSquashedStores;
}
}
diff --git a/cpu/o3/mem_dep_unit.cc b/cpu/o3/mem_dep_unit.cc
index ccdd1a515..b0f91d44f 100644
--- a/cpu/o3/mem_dep_unit.cc
+++ b/cpu/o3/mem_dep_unit.cc
@@ -35,6 +35,7 @@
// AlphaSimpleImpl.
template class MemDepUnit<StoreSet, AlphaSimpleImpl>;
+#ifdef DEBUG
template <>
int
MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0;
@@ -44,3 +45,4 @@ MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0;
template <>
int
MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0;
+#endif
diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh
index 595e9293f..bfe694bd8 100644
--- a/cpu/o3/mem_dep_unit_impl.hh
+++ b/cpu/o3/mem_dep_unit_impl.hh
@@ -59,7 +59,9 @@ MemDepUnit<MemDepPred, Impl>::~MemDepUnit()
}
}
+#ifdef DEBUG
assert(MemDepEntry::memdep_count == 0);
+#endif
}
template <class MemDepPred, class Impl>
@@ -141,7 +143,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
// Add the MemDepEntry to the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
+#ifdef DEBUG
MemDepEntry::memdep_insert++;
+#endif
instList[tid].push_back(inst);
@@ -227,7 +231,9 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst)
// Insert the MemDepEntry into the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
+#ifdef DEBUG
MemDepEntry::memdep_insert++;
+#endif
// Add the instruction to the list.
instList[tid].push_back(inst);
@@ -275,7 +281,9 @@ MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
// Add the MemDepEntry to the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
+#ifdef DEBUG
MemDepEntry::memdep_insert++;
+#endif
// Add the instruction to the instruction list.
instList[tid].push_back(barr_inst);
@@ -375,7 +383,9 @@ MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
+#ifdef DEBUG
MemDepEntry::memdep_erase++;
+#endif
}
template <class MemDepPred, class Impl>
@@ -470,7 +480,9 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
+#ifdef DEBUG
MemDepEntry::memdep_erase++;
+#endif
instList[tid].erase(squash_it--);
}
@@ -551,5 +563,7 @@ MemDepUnit<MemDepPred, Impl>::dumpLists()
cprintf("Memory dependence hash size: %i\n", memDepHash.size());
+#ifdef DEBUG
cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
+#endif
}
diff --git a/cpu/o3/regfile.hh b/cpu/o3/regfile.hh
index ed1238d36..76c43d3a1 100644
--- a/cpu/o3/regfile.hh
+++ b/cpu/o3/regfile.hh
@@ -223,10 +223,10 @@ class PhysRegFile
public:
/** (signed) integer register file. */
- std::vector<IntReg> intRegFile;
+ IntReg *intRegFile;
/** Floating point register file. */
- std::vector<FloatReg> floatRegFile;
+ FloatReg *floatRegFile;
/** Miscellaneous register file. */
MiscRegFile miscRegs[Impl::MaxThreads];
@@ -256,11 +256,15 @@ PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs,
: numPhysicalIntRegs(_numPhysicalIntRegs),
numPhysicalFloatRegs(_numPhysicalFloatRegs)
{
- intRegFile.resize(numPhysicalIntRegs);
- floatRegFile.resize(numPhysicalFloatRegs);
+ intRegFile = new IntReg[numPhysicalIntRegs];
+ floatRegFile = new FloatReg[numPhysicalFloatRegs];
- //memset(intRegFile, 0, sizeof(*intRegFile));
- //memset(floatRegFile, 0, sizeof(*floatRegFile));
+ for (int i = 0; i < Impl::MaxThreads; ++i) {
+ miscRegs[i].clear();
+ }
+
+ memset(intRegFile, 0, sizeof(*intRegFile));
+ memset(floatRegFile, 0, sizeof(*floatRegFile));
}
#endif
diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh
index 829c99584..93f5b3504 100644
--- a/cpu/o3/rename_impl.hh
+++ b/cpu/o3/rename_impl.hh
@@ -348,7 +348,7 @@ DefaultRename<Impl>::squash(unsigned tid)
for (int i=0; i<fromDecode->size; i++) {
if (fromDecode->insts[i]->threadNumber == tid) {
- fromDecode->insts[i]->squashed = true;
+ fromDecode->insts[i]->setSquashed();
wroteToTimeBuffer = true;
squashCount++;
}
@@ -1029,7 +1029,7 @@ DefaultRename<Impl>::validInsts()
unsigned inst_count = 0;
for (int i=0; i<fromDecode->size; i++) {
- if (!fromDecode->insts[i]->squashed)
+ if (!fromDecode->insts[i]->isSquashed())
inst_count++;
}
diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh
index bdbdde32f..2043e0b34 100644
--- a/cpu/o3/rob.hh
+++ b/cpu/o3/rob.hh
@@ -305,7 +305,7 @@ class ROB
private:
/** The sequence number of the squashed instruction. */
- InstSeqNum squashedSeqNum;
+ InstSeqNum squashedSeqNum[Impl::MaxThreads];
/** Is the ROB done squashing. */
bool doneSquashing[Impl::MaxThreads];
diff --git a/cpu/o3/rob_impl.hh b/cpu/o3/rob_impl.hh
index 25e0c80fd..62c4d9cf7 100644
--- a/cpu/o3/rob_impl.hh
+++ b/cpu/o3/rob_impl.hh
@@ -38,10 +38,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
: numEntries(_numEntries),
squashWidth(_squashWidth),
numInstsInROB(0),
- squashedSeqNum(0),
numThreads(_numThreads)
{
for (int tid=0; tid < numThreads; tid++) {
+ squashedSeqNum[tid] = 0;
doneSquashing[tid] = true;
threadEntries[tid] = 0;
}
@@ -274,7 +274,7 @@ ROB<Impl>::retireHead(unsigned tid)
--numInstsInROB;
--threadEntries[tid];
- head_inst->removeInROB();
+ head_inst->clearInROB();
head_inst->setCommitted();
instList[tid].erase(head_it);
@@ -349,11 +349,11 @@ void
ROB<Impl>::doSquash(unsigned tid)
{
DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n",
- tid, squashedSeqNum);
+ tid, squashedSeqNum[tid]);
assert(squashIt[tid] != instList[tid].end());
- if ((*squashIt[tid])->seqNum < squashedSeqNum) {
+ if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
@@ -368,7 +368,7 @@ ROB<Impl>::doSquash(unsigned tid)
for (int numSquashed = 0;
numSquashed < squashWidth &&
squashIt[tid] != instList[tid].end() &&
- (*squashIt[tid])->seqNum > squashedSeqNum;
+ (*squashIt[tid])->seqNum > squashedSeqNum[tid];
++numSquashed)
{
DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n",
@@ -405,7 +405,7 @@ ROB<Impl>::doSquash(unsigned tid)
// Check if ROB is done squashing.
- if ((*squashIt[tid])->seqNum <= squashedSeqNum) {
+ if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
@@ -517,7 +517,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid)
doneSquashing[tid] = false;
- squashedSeqNum = squash_num;
+ squashedSeqNum[tid] = squash_num;
if (!instList[tid].empty()) {
InstIt tail_thread = instList[tid].end();