summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2006-08-24 17:45:04 -0400
committerKevin Lim <ktlim@umich.edu>2006-08-24 17:45:04 -0400
commit4ec5e90c8fa72a4bea34a35d0f0194cefae37f81 (patch)
treefce1c3b738b5400b33a930674988c92ded9a6122
parentad2fa1e1c9587e8c2a2b7f3e5a9c592312042eb4 (diff)
downloadgem5-4ec5e90c8fa72a4bea34a35d0f0194cefae37f81.tar.xz
Ozone updates.
cpu/ozone/front_end.hh: cpu/ozone/front_end_impl.hh: cpu/ozone/lw_back_end.hh: Support latency for Ozone FE and BE. cpu/ozone/lw_back_end_impl.hh: Support latency for Ozone FE and BE. Also fixes for switching out, profiling. cpu/ozone/lw_lsq.hh: cpu/ozone/lw_lsq_impl.hh: Fixes for switching out. cpu/ozone/simple_params.hh: Updated parameters. --HG-- extra : convert_revision : 21d4846a59a2239bfdf8fe92e47fd0972debe4f5
-rw-r--r--cpu/ozone/front_end.hh7
-rw-r--r--cpu/ozone/front_end_impl.hh38
-rw-r--r--cpu/ozone/lw_back_end.hh18
-rw-r--r--cpu/ozone/lw_back_end_impl.hh172
-rw-r--r--cpu/ozone/lw_lsq.hh17
-rw-r--r--cpu/ozone/lw_lsq_impl.hh49
-rw-r--r--cpu/ozone/simple_params.hh4
7 files changed, 237 insertions, 68 deletions
diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh
index dd382491f..b677e667c 100644
--- a/cpu/ozone/front_end.hh
+++ b/cpu/ozone/front_end.hh
@@ -31,6 +31,7 @@
#include <deque>
+#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/bpred_unit.hh"
#include "cpu/ozone/rename_table.hh"
@@ -210,15 +211,21 @@ class FrontEnd
void dumpInsts();
private:
+ TimeBuffer<int> numInstsReady;
+
typedef typename std::deque<DynInstPtr> InstBuff;
typedef typename InstBuff::iterator InstBuffIt;
+ InstBuff feBuffer;
+
InstBuff instBuffer;
int instBufferSize;
int maxInstBufferSize;
+ int latency;
+
int width;
int freeRegs;
diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh
index ca9948b7d..09fc2e2f8 100644
--- a/cpu/ozone/front_end_impl.hh
+++ b/cpu/ozone/front_end_impl.hh
@@ -41,8 +41,10 @@ template <class Impl>
FrontEnd<Impl>::FrontEnd(Params *params)
: branchPred(params),
icacheInterface(params->icacheInterface),
+ numInstsReady(params->frontEndLatency, 0),
instBufferSize(0),
maxInstBufferSize(params->maxInstBufferSize),
+ latency(params->frontEndLatency),
width(params->frontEndWidth),
freeRegs(params->numPhysicalRegs),
numPhysRegs(params->numPhysicalRegs),
@@ -261,6 +263,18 @@ FrontEnd<Impl>::tick()
if (switchedOut)
return;
+ for (int insts_to_queue = numInstsReady[-latency];
+ !instBuffer.empty() && insts_to_queue;
+ --insts_to_queue)
+ {
+ DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
+ instBuffer.front()->seqNum);
+ feBuffer.push_back(instBuffer.front());
+ instBuffer.pop_front();
+ }
+
+ numInstsReady.advance();
+
// @todo: Maybe I want to just have direct communication...
if (fromCommit->doneSeqNum) {
branchPred.update(fromCommit->doneSeqNum, 0);
@@ -349,6 +363,7 @@ FrontEnd<Impl>::tick()
// latency
instBuffer.push_back(inst);
++instBufferSize;
+ numInstsReady[0]++;
++num_inst;
#if FULL_SYSTEM
@@ -570,6 +585,7 @@ FrontEnd<Impl>::handleFault(Fault &fault)
instruction->fault = fault;
instruction->setCanIssue();
instBuffer.push_back(instruction);
+ numInstsReady[0]++;
++instBufferSize;
}
@@ -599,6 +615,21 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
freeRegs+= inst->numDestRegs();
}
+ while (!feBuffer.empty() &&
+ feBuffer.back()->seqNum > squash_num) {
+ DynInstPtr inst = feBuffer.back();
+
+ DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
+ inst->seqNum, inst->readPC());
+
+ inst->clearDependents();
+
+ feBuffer.pop_back();
+ --instBufferSize;
+
+ freeRegs+= inst->numDestRegs();
+ }
+
// Copy over rename table from the back end.
renameTable.copyFrom(backEnd->renameTable);
@@ -633,13 +664,13 @@ template <class Impl>
typename Impl::DynInstPtr
FrontEnd<Impl>::getInst()
{
- if (instBufferSize == 0) {
+ if (feBuffer.empty()) {
return NULL;
}
- DynInstPtr inst = instBuffer.front();
+ DynInstPtr inst = feBuffer.front();
- instBuffer.pop_front();
+ feBuffer.pop_front();
--instBufferSize;
@@ -857,6 +888,7 @@ FrontEnd<Impl>::doSwitchOut()
squash(0, 0);
instBuffer.clear();
instBufferSize = 0;
+ feBuffer.clear();
status = Idle;
}
diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh
index 19f2b2b61..4e2f5606c 100644
--- a/cpu/ozone/lw_back_end.hh
+++ b/cpu/ozone/lw_back_end.hh
@@ -78,7 +78,7 @@ class LWBackEnd
TimeBuffer<IssueToExec> i2e;
typename TimeBuffer<IssueToExec>::wire instsToExecute;
TimeBuffer<ExecToCommit> e2c;
- TimeBuffer<Writeback> numInstsToWB;
+ TimeBuffer<int> numInstsToWB;
TimeBuffer<CommStruct> *comm;
typename TimeBuffer<CommStruct>::wire toIEW;
@@ -157,7 +157,7 @@ class LWBackEnd
Tick lastCommitCycle;
- bool robEmpty() { return instList.empty(); }
+ bool robEmpty() { return numInsts == 0; }
bool isFull() { return numInsts >= numROBEntries; }
bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
@@ -212,6 +212,7 @@ class LWBackEnd
}
void instToCommit(DynInstPtr &inst);
+ void readyInstsForCommit();
void switchOut();
void doSwitchOut();
@@ -293,12 +294,13 @@ class LWBackEnd
MemReqPtr memReq;
+ int latency;
+
// General back end width. Used if the more specific isn't given.
int width;
// Dispatch width.
int dispatchWidth;
- int numDispatchEntries;
int dispatchSize;
int waitingInsts;
@@ -323,6 +325,7 @@ class LWBackEnd
int numROBEntries;
int numInsts;
+ bool lsqLimits;
std::set<InstSeqNum> waitingMemOps;
typedef std::set<InstSeqNum>::iterator MemIt;
@@ -333,9 +336,6 @@ class LWBackEnd
InstSeqNum squashSeqNum;
Addr squashNextPC;
- Fault faultFromFetch;
- bool fetchHasFault;
-
bool switchedOut;
bool switchPending;
@@ -359,8 +359,6 @@ class LWBackEnd
std::list<DynInstPtr> replayList;
std::list<DynInstPtr> writeback;
- int latency;
-
int squashLatency;
bool exactFullStall;
@@ -397,9 +395,11 @@ class LWBackEnd
Stats::Scalar<> lsqInversion;
Stats::Vector<> nIssuedDist;
+/*
Stats::VectorDistribution<> issueDelayDist;
Stats::VectorDistribution<> queueResDist;
+*/
/*
Stats::Vector<> stat_fu_busy;
Stats::Vector2d<> stat_fuBusy;
@@ -447,7 +447,7 @@ class LWBackEnd
Stats::Vector<> ROBCount; // cumulative ROB occupancy
Stats::Formula ROBOccRate;
- Stats::VectorDistribution<> ROBOccDist;
+// Stats::VectorDistribution<> ROBOccDist;
public:
void dumpInsts();
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh
index 9e1cd28cf..9a6ad4c14 100644
--- a/cpu/ozone/lw_back_end_impl.hh
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -151,8 +151,10 @@ LWBackEnd<Impl>::LdWritebackEvent::process()
// iewStage->wakeCPU();
- if (be->isSwitchedOut())
- return;
+ assert(inst->isSquashed() || !be->isSwitchedOut());
+
+// if (be->isSwitchedOut() && inst->isLoad())
+// return;
if (dcacheMiss) {
be->removeDcacheMiss(inst);
@@ -208,14 +210,14 @@ LWBackEnd<Impl>::DCacheCompletionEvent::description()
template <class Impl>
LWBackEnd<Impl>::LWBackEnd(Params *params)
- : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+ : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
- dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
+ dcacheInterface(params->dcacheInterface), latency(params->backEndLatency),
+ width(params->backEndWidth), lsqLimits(params->lsqLimits),
exactFullStall(true)
{
numROBEntries = params->numROBEntries;
numInsts = 0;
- numDispatchEntries = 32;
maxOutstandingMemOps = params->maxOutstandingMemOps;
numWaitingMemOps = 0;
waitingInsts = 0;
@@ -251,6 +253,8 @@ void
LWBackEnd<Impl>::regStats()
{
using namespace Stats;
+ LSQ.regStats();
+
robCapEvents
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_events")
@@ -377,6 +381,7 @@ LWBackEnd<Impl>::regStats()
.desc("Number of insts issued each cycle")
.flags(total | pdf | dist)
;
+/*
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
@@ -393,7 +398,7 @@ LWBackEnd<Impl>::regStats()
for (int i = 0; i < Num_OpClasses; ++i) {
queueResDist.subname(i, opClassStrings[i]);
}
-
+*/
writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
@@ -555,13 +560,14 @@ LWBackEnd<Impl>::regStats()
.flags(total)
;
ROBOccRate = ROBCount / cpu->numCycles;
-
+/*
ROBOccDist
.init(cpu->number_of_threads,0,numROBEntries,2)
.name(name() + ".ROB:occ_dist")
.desc("ROB Occupancy per cycle")
.flags(total | cdf)
;
+*/
}
template <class Impl>
@@ -654,18 +660,22 @@ LWBackEnd<Impl>::tick()
{
DPRINTF(BE, "Ticking back end\n");
+ // Read in any done instruction information and update the IQ or LSQ.
+ updateStructures();
+
if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
cpu->signalSwitched();
return;
}
+ readyInstsForCommit();
+
+ numInstsToWB.advance();
+
ROBCount[0]+= numInsts;
wbCycle = 0;
- // Read in any done instruction information and update the IQ or LSQ.
- updateStructures();
-
#if FULL_SYSTEM
checkInterrupts();
@@ -740,6 +750,10 @@ LWBackEnd<Impl>::dispatchInsts()
while (numInsts < numROBEntries &&
numWaitingMemOps < maxOutstandingMemOps) {
// Get instruction from front of time buffer
+ if (lsqLimits && LSQ.isFull()) {
+ break;
+ }
+
DynInstPtr inst = frontEnd->getInst();
if (!inst) {
break;
@@ -798,6 +812,7 @@ LWBackEnd<Impl>::dispatchInsts()
inst->setIssued();
inst->setExecuted();
inst->setCanCommit();
+ numInstsToWB[0]++;
} else {
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
"exeList.\n",
@@ -987,16 +1002,10 @@ template<class Impl>
void
LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
{
-
DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
if (!inst->isSquashed()) {
- DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- inst->setCanCommit();
-
if (inst->isExecuted()) {
inst->setResultReady();
int dependents = wakeDependents(inst);
@@ -1007,8 +1016,32 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
}
}
+ writeback.push_back(inst);
+
+ numInstsToWB[0]++;
+
writebackCount[0]++;
}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::readyInstsForCommit()
+{
+ for (int i = numInstsToWB[-latency];
+ !writeback.empty() && i;
+ --i)
+ {
+ DynInstPtr inst = writeback.front();
+ writeback.pop_front();
+ if (!inst->isSquashed()) {
+ DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ inst->setCanCommit();
+ }
+ }
+}
+
#if 0
template <class Impl>
void
@@ -1221,6 +1254,20 @@ LWBackEnd<Impl>::commitInst(int inst_num)
++freed_regs;
}
+#if FULL_SYSTEM
+ if (thread->profile) {
+// bool usermode =
+// (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+// thread->profilePC = usermode ? 1 : inst->readPC();
+ thread->profilePC = inst->readPC();
+ ProfileNode *node = thread->profile->consume(thread->getXCProxy(),
+ inst->staticInst);
+
+ if (node)
+ thread->profileNode = node;
+ }
+#endif
+
if (inst->traceData) {
inst->traceData->setFetchSeq(inst->seqNum);
inst->traceData->setCPSeq(thread->numInst);
@@ -1280,9 +1327,9 @@ LWBackEnd<Impl>::commitInsts()
while (!instList.empty() && inst_num < commitWidth) {
if (instList.back()->isSquashed()) {
instList.back()->clearDependents();
+ ROBSquashedInsts[instList.back()->threadNumber]++;
instList.pop_back();
--numInsts;
- ROBSquashedInsts[instList.back()->threadNumber]++;
continue;
}
@@ -1304,10 +1351,10 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
LSQ.squash(sn);
int freed_regs = 0;
- InstListIt waiting_list_end = waitingList.end();
+ InstListIt insts_end_it = waitingList.end();
InstListIt insts_it = waitingList.begin();
- while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
+ while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
++insts_it;
@@ -1333,6 +1380,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
while (!instList.empty() && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
+ panic("Instruction should not be already squashed and on list!");
++insts_it;
continue;
}
@@ -1364,18 +1412,6 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
--numInsts;
}
- insts_it = waitingList.begin();
- while (!waitingList.empty() && insts_it != waitingList.end()) {
- if ((*insts_it)->seqNum < sn) {
- ++insts_it;
- continue;
- }
- assert((*insts_it)->isSquashed());
-
- waitingList.erase(insts_it++);
- waitingInsts--;
- }
-
while (memBarrier && memBarrier->seqNum > sn) {
DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
"squashed)\n", memBarrier->seqNum);
@@ -1393,6 +1429,18 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
}
}
+ insts_it = replayList.begin();
+ insts_end_it = replayList.end();
+ while (!replayList.empty() && insts_it != insts_end_it) {
+ if ((*insts_it)->seqNum < sn) {
+ ++insts_it;
+ continue;
+ }
+ assert((*insts_it)->isSquashed());
+
+ replayList.erase(insts_it++);
+ }
+
frontEnd->addFreeRegs(freed_regs);
}
@@ -1465,14 +1513,6 @@ LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
template <class Impl>
void
-LWBackEnd<Impl>::fetchFault(Fault &fault)
-{
- faultFromFetch = fault;
- fetchHasFault = true;
-}
-
-template <class Impl>
-void
LWBackEnd<Impl>::switchOut()
{
switchPending = true;
@@ -1489,16 +1529,25 @@ LWBackEnd<Impl>::doSwitchOut()
// yet written back.
assert(robEmpty());
assert(!LSQ.hasStoresToWB());
-
+ writeback.clear();
+ for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
+ numInstsToWB.advance();
+
+// squash(0);
+ assert(waitingList.empty());
+ assert(instList.empty());
+ assert(replayList.empty());
+ assert(writeback.empty());
LSQ.switchOut();
-
- squash(0);
}
template <class Impl>
void
LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
{
+ assert(!squashPending);
+ squashSeqNum = 0;
+ squashNextPC = 0;
xcSquash = false;
trapSquash = false;
@@ -1641,6 +1690,45 @@ LWBackEnd<Impl>::dumpInsts()
++num;
}
+ inst_list_it = --(writeback.end());
+
+ cprintf("Writeback list size: %i\n", writeback.size());
+
+ while (inst_list_it != writeback.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+
cprintf("Waiting list size: %i\n", waitingList.size());
inst_list_it = --(waitingList.end());
diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh
index c0bf0b0fe..07fd1aec5 100644
--- a/cpu/ozone/lw_lsq.hh
+++ b/cpu/ozone/lw_lsq.hh
@@ -110,6 +110,8 @@ class OzoneLWLSQ {
/** Returns the name of the LSQ unit. */
std::string name() const;
+ void regStats();
+
/** Sets the CPU pointer. */
void setCPU(FullCPU *cpu_ptr)
{ cpu = cpu_ptr; }
@@ -203,7 +205,7 @@ class OzoneLWLSQ {
int numLoads() { return loads; }
/** Returns the number of stores in the SQ. */
- int numStores() { return stores; }
+ int numStores() { return stores + storesInFlight; }
/** Returns if either the LQ or SQ is full. */
bool isFull() { return lqFull() || sqFull(); }
@@ -212,7 +214,7 @@ class OzoneLWLSQ {
bool lqFull() { return loads >= (LQEntries - 1); }
/** Returns if the SQ is full. */
- bool sqFull() { return stores >= (SQEntries - 1); }
+ bool sqFull() { return (stores + storesInFlight) >= (SQEntries - 1); }
/** Debugging function to dump instructions in the LSQ. */
void dumpInsts();
@@ -241,7 +243,9 @@ class OzoneLWLSQ {
private:
/** Completes the store at the specified index. */
- void completeStore(int store_idx);
+ void completeStore(DynInstPtr &inst);
+
+ void removeStore(int store_idx);
private:
/** Pointer to the CPU. */
@@ -342,6 +346,10 @@ class OzoneLWLSQ {
int storesToWB;
+ public:
+ int storesInFlight;
+
+ private:
/// @todo Consider moving to a more advanced model with write vs read ports
/** The number of cache ports available each cycle. */
int cachePorts;
@@ -351,6 +359,9 @@ class OzoneLWLSQ {
//list<InstSeqNum> mshrSeqNums;
+ /** Tota number of memory ordering violations. */
+ Stats::Scalar<> lsqMemOrderViolation;
+
//Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh
index f72bbb1cc..c60884fc3 100644
--- a/cpu/ozone/lw_lsq_impl.hh
+++ b/cpu/ozone/lw_lsq_impl.hh
@@ -57,6 +57,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
// lsqPtr->cpu->wakeCPU();
if (lsqPtr->isSwitchedOut()) {
+ panic("Should not be switched out!");
if (wbEvent)
delete wbEvent;
@@ -68,7 +69,11 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
delete wbEvent;
}
- lsqPtr->completeStore(inst->sqIdx);
+ lsqPtr->completeStore(inst);
+ lsqPtr->removeStore(inst->sqIdx);
+ --(lsqPtr->storesInFlight);
+
+ DPRINTF(OzoneLSQ, "StoresInFlight: %i\n", lsqPtr->storesInFlight);
if (miss)
be->removeDcacheMiss(inst);
}
@@ -82,7 +87,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::description()
template <class Impl>
OzoneLWLSQ<Impl>::OzoneLWLSQ()
- : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+ : loads(0), stores(0), storesToWB(0), storesInFlight(0), stalled(false), isLoadBlocked(false),
loadBlockedHandled(false)
{
}
@@ -123,6 +128,15 @@ OzoneLWLSQ<Impl>::name() const
template<class Impl>
void
+OzoneLWLSQ<Impl>::regStats()
+{
+ lsqMemOrderViolation
+ .name(name() + ".memOrderViolation")
+ .desc("Number of memory ordering violations");
+}
+
+template<class Impl>
+void
OzoneLWLSQ<Impl>::clearLQ()
{
loadQueue.clear();
@@ -257,7 +271,7 @@ unsigned
OzoneLWLSQ<Impl>::numFreeEntries()
{
unsigned free_lq_entries = LQEntries - loads;
- unsigned free_sq_entries = SQEntries - stores;
+ unsigned free_sq_entries = SQEntries - (stores + storesInFlight);
// Both the LQ and SQ entries have an extra dummy entry to differentiate
// empty/full conditions. Subtract 1 from the free entries.
@@ -397,6 +411,7 @@ OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = (*lq_it);
+ ++lsqMemOrderViolation;
return TheISA::genMachineCheckFault();
}
@@ -483,8 +498,8 @@ OzoneLWLSQ<Impl>::writebackStores()
if ((*sq_it).size == 0 && !(*sq_it).completed) {
sq_it--;
- completeStore(inst->sqIdx);
-
+ removeStore(inst->sqIdx);
+ completeStore(inst);
continue;
}
@@ -540,6 +555,8 @@ OzoneLWLSQ<Impl>::writebackStores()
inst->sqIdx,inst->readPC(),
req->paddr, *(req->data),
inst->seqNum);
+ DPRINTF(OzoneLSQ, "StoresInFlight: %i\n",
+ storesInFlight + 1);
if (dcacheInterface) {
assert(!req->completionEvent);
@@ -601,6 +618,8 @@ OzoneLWLSQ<Impl>::writebackStores()
}
sq_it--;
}
+ ++storesInFlight;
+// removeStore(inst->sqIdx);
} else {
panic("Must HAVE DCACHE!!!!!\n");
}
@@ -617,7 +636,7 @@ void
OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
{
DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
- "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+ "(Loads:%i Stores:%i)\n",squashed_num,loads,stores+storesInFlight);
LQIt lq_it = loadQueue.begin();
@@ -732,7 +751,7 @@ OzoneLWLSQ<Impl>::dumpInsts()
template <class Impl>
void
-OzoneLWLSQ<Impl>::completeStore(int store_idx)
+OzoneLWLSQ<Impl>::removeStore(int store_idx)
{
SQHashIt sq_hash_it = SQItHash.find(store_idx);
assert(sq_hash_it != SQItHash.end());
@@ -742,8 +761,6 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
(*sq_it).completed = true;
DynInstPtr inst = (*sq_it).inst;
- --storesToWB;
-
if (isStalled() &&
inst->seqNum == stallingStoreIsn) {
DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
@@ -761,6 +778,13 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
SQItHash.erase(sq_hash_it);
SQIndices.push(inst->sqIdx);
storeQueue.erase(sq_it);
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::completeStore(DynInstPtr &inst)
+{
+ --storesToWB;
--stores;
inst->setCompleted();
@@ -839,9 +863,14 @@ OzoneLWLSQ<Impl>::switchOut()
}
// Clear the queue to free up resources
+ assert(stores == 0);
+ assert(storeQueue.empty());
+ assert(loads == 0);
+ assert(loadQueue.empty());
+ assert(storesInFlight == 0);
storeQueue.clear();
loadQueue.clear();
- loads = stores = storesToWB = 0;
+ loads = stores = storesToWB = storesInFlight = 0;
}
template <class Impl>
diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh
index 7b5c6f67b..d28d040f8 100644
--- a/cpu/ozone/simple_params.hh
+++ b/cpu/ozone/simple_params.hh
@@ -70,10 +70,11 @@ class SimpleParams : public BaseCPU::Params
unsigned cachePorts;
unsigned width;
+ unsigned frontEndLatency;
unsigned frontEndWidth;
+ unsigned backEndLatency;
unsigned backEndWidth;
unsigned backEndSquashLatency;
- unsigned backEndLatency;
unsigned maxInstBufferSize;
unsigned numPhysicalRegs;
unsigned maxOutstandingMemOps;
@@ -149,6 +150,7 @@ class SimpleParams : public BaseCPU::Params
//
unsigned LQEntries;
unsigned SQEntries;
+ bool lsqLimits;
//
// Memory dependence