summaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
authorAli Saidi <Ali.Saidi@ARM.com>2012-06-05 01:23:09 -0400
committerAli Saidi <Ali.Saidi@ARM.com>2012-06-05 01:23:09 -0400
commit6df196b71e058b2c827e1027416155ac8ec8cf9f (patch)
treee2adf25e5628078f8e7c7d89c97130c8962e0ab0 /src/cpu
parentaec7a4411683d8b10684f8f70093bcbbc2de8b55 (diff)
downloadgem5-6df196b71e058b2c827e1027416155ac8ec8cf9f.tar.xz
O3: Clean up the O3 structures and try to pack them a bit better.
DynInst is extremely large the hope is that this re-organization will put the most used members close to each other.
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/base_dyn_inst.hh409
-rw-r--r--src/cpu/base_dyn_inst_impl.hh22
-rw-r--r--src/cpu/o3/bpred_unit.hh17
-rw-r--r--src/cpu/o3/comm.hh23
-rw-r--r--src/cpu/o3/commit_impl.hh10
-rw-r--r--src/cpu/o3/decode_impl.hh2
-rw-r--r--src/cpu/o3/dyn_inst.hh23
-rw-r--r--src/cpu/o3/dyn_inst_impl.hh3
-rw-r--r--src/cpu/o3/iew_impl.hh4
-rw-r--r--src/cpu/o3/inst_queue_impl.hh20
-rw-r--r--src/cpu/o3/lsq_unit.hh35
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh18
-rw-r--r--src/cpu/o3/rename_impl.hh4
13 files changed, 288 insertions, 302 deletions
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index a9cb60070..20278bd30 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -98,92 +98,16 @@ class BaseDynInst : public RefCounted
MaxInstDestRegs = TheISA::MaxInstDestRegs /// Max dest regs
};
- /** The StaticInst used by this BaseDynInst. */
- StaticInstPtr staticInst;
- StaticInstPtr macroop;
-
- ////////////////////////////////////////////
- //
- // INSTRUCTION EXECUTION
- //
- ////////////////////////////////////////////
- /** InstRecord that tracks this instructions. */
- Trace::InstRecord *traceData;
-
- void demapPage(Addr vaddr, uint64_t asn)
- {
- cpu->demapPage(vaddr, asn);
- }
- void demapInstPage(Addr vaddr, uint64_t asn)
- {
- cpu->demapPage(vaddr, asn);
- }
- void demapDataPage(Addr vaddr, uint64_t asn)
- {
- cpu->demapPage(vaddr, asn);
- }
-
- Fault readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags);
-
- Fault writeMem(uint8_t *data, unsigned size,
- Addr addr, unsigned flags, uint64_t *res);
-
- /** Splits a request in two if it crosses a dcache block. */
- void splitRequest(RequestPtr req, RequestPtr &sreqLow,
- RequestPtr &sreqHigh);
-
- /** Initiate a DTB address translation. */
- void initiateTranslation(RequestPtr req, RequestPtr sreqLow,
- RequestPtr sreqHigh, uint64_t *res,
- BaseTLB::Mode mode);
-
- /** Finish a DTB address translation. */
- void finishTranslation(WholeTranslationState *state);
-
- /** True if the DTB address translation has started. */
- bool translationStarted;
-
- /** True if the DTB address translation has completed. */
- bool translationCompleted;
-
- /** True if this address was found to match a previous load and they issued
- * out of order. If that happend, then it's only a problem if an incoming
- * snoop invalidate modifies the line, in which case we need to squash.
- * If nothing modified the line the order doesn't matter.
- */
- bool possibleLoadViolation;
-
- /** True if the address hit a external snoop while sitting in the LSQ.
- * If this is true and a older instruction sees it, this instruction must
- * reexecute
- */
- bool hitExternalSnoop;
-
- /**
- * Returns true if the DTB address translation is being delayed due to a hw
- * page table walk.
- */
- bool isTranslationDelayed() const
- {
- return (translationStarted && !translationCompleted);
- }
-
- /**
- * Saved memory requests (needed when the DTB address translation is
- * delayed due to a hw page table walk).
- */
- RequestPtr savedReq;
- RequestPtr savedSreqLow;
- RequestPtr savedSreqHigh;
-
- // Need a copy of main request pointer to verify on writes.
- RequestPtr reqToVerify;
-
- /** @todo: Consider making this private. */
- public:
- /** The sequence number of the instruction. */
- InstSeqNum seqNum;
+ union Result {
+ uint64_t integer;
+ double dbl;
+ void set(uint64_t i) { integer = i; }
+ void set(double d) { dbl = d; }
+ void get(uint64_t& i) { i = integer; }
+ void get(double& d) { d = dbl; }
+ };
+ protected:
enum Status {
IqEntry, /// Instruction is in the IQ
RobEntry, /// Instruction is in the ROB
@@ -210,17 +134,31 @@ class BaseDynInst : public RefCounted
NumStatus
};
- /** The status of this BaseDynInst. Several bits can be set. */
- std::bitset<NumStatus> status;
-
- /** The thread this instruction is from. */
- ThreadID threadNumber;
+ enum Flags {
+ TranslationStarted,
+ TranslationCompleted,
+ PossibleLoadViolation,
+ HitExternalSnoop,
+ EffAddrValid,
+ RecordResult,
+ Predicate,
+ PredTaken,
+ /** Whether or not the effective address calculation is completed.
+ * @todo: Consider if this is necessary or not.
+ */
+ EACalcDone,
+ IsUncacheable,
+ ReqMade,
+ MemOpDone,
+ MaxFlags
+ };
- /** data address space ID, for loads & stores. */
- short asid;
+ public:
+ /** The sequence number of the instruction. */
+ InstSeqNum seqNum;
- /** How many source registers are ready. */
- unsigned readyRegs;
+ /** The StaticInst used by this BaseDynInst. */
+ StaticInstPtr staticInst;
/** Pointer to the Impl's CPU object. */
ImplCPU *cpu;
@@ -231,17 +169,50 @@ class BaseDynInst : public RefCounted
/** The kind of fault this instruction has generated. */
Fault fault;
- /** Pointer to the data for the memory access. */
- uint8_t *memData;
+ /** InstRecord that tracks this instructions. */
+ Trace::InstRecord *traceData;
- /** The effective virtual address (lds & stores only). */
- Addr effAddr;
+ protected:
+ /** The result of the instruction; assumes an instruction can have many
+ * destination registers.
+ */
+ std::queue<Result> instResult;
- /** The size of the request */
- Addr effSize;
+ /** PC state for this instruction. */
+ TheISA::PCState pc;
- /** Is the effective virtual address valid. */
- bool effAddrValid;
+ /* An amalgamation of a lot of boolean values into one */
+ std::bitset<MaxFlags> instFlags;
+
+ /** The status of this BaseDynInst. Several bits can be set. */
+ std::bitset<NumStatus> status;
+
+ /** Whether or not the source register is ready.
+ * @todo: Not sure this should be here vs the derived class.
+ */
+ std::bitset<MaxInstSrcRegs> _readySrcRegIdx;
+
+ public:
+ /** The thread this instruction is from. */
+ ThreadID threadNumber;
+
+ /** Iterator pointing to this BaseDynInst in the list of all insts. */
+ ListIt instListIt;
+
+ ////////////////////// Branch Data ///////////////
+ /** Predicted PC state after this instruction. */
+ TheISA::PCState predPC;
+
+ /** The Macroop if one exists */
+ StaticInstPtr macroop;
+
+ /** How many source registers are ready. */
+ uint8_t readyRegs;
+
+ public:
+ /////////////////////// Load Store Data //////////////////////
+ /** The effective virtual address (lds & stores only). */
+ Addr effAddr;
/** The effective physical address. */
Addr physEffAddr;
@@ -249,46 +220,40 @@ class BaseDynInst : public RefCounted
/** The memory request flags (from translation). */
unsigned memReqFlags;
- union Result {
- uint64_t integer;
- double dbl;
- void set(uint64_t i) { integer = i; }
- void set(double d) { dbl = d; }
- void get(uint64_t& i) { i = integer; }
- void get(double& d) { d = dbl; }
- };
-
- /** The result of the instruction; assumes an instruction can have many
- * destination registers.
- */
- std::queue<Result> instResult;
+ /** data address space ID, for loads & stores. */
+ short asid;
- /** Records changes to result? */
- bool recordResult;
+ /** The size of the request */
+ uint8_t effSize;
- /** Did this instruction execute, or is it predicated false */
- bool predicate;
+ /** Pointer to the data for the memory access. */
+ uint8_t *memData;
- protected:
- /** PC state for this instruction. */
- TheISA::PCState pc;
+ /** Load queue index. */
+ int16_t lqIdx;
- /** Predicted PC state after this instruction. */
- TheISA::PCState predPC;
+ /** Store queue index. */
+ int16_t sqIdx;
- /** If this is a branch that was predicted taken */
- bool predTaken;
- public:
+ /////////////////////// TLB Miss //////////////////////
+ /**
+ * Saved memory requests (needed when the DTB address translation is
+ * delayed due to a hw page table walk).
+ */
+ RequestPtr savedReq;
+ RequestPtr savedSreqLow;
+ RequestPtr savedSreqHigh;
-#ifdef DEBUG
- void dumpSNList();
-#endif
+ /////////////////////// Checker //////////////////////
+ // Need a copy of main request pointer to verify on writes.
+ RequestPtr reqToVerify;
- /** Whether or not the source register is ready.
- * @todo: Not sure this should be here vs the derived class.
+ private:
+ /** Instruction effective address.
+ * @todo: Consider if this is necessary or not.
*/
- bool _readySrcRegIdx[MaxInstSrcRegs];
+ Addr instEffAddr;
protected:
/** Flattened register index of the destination registers of this
@@ -296,11 +261,6 @@ class BaseDynInst : public RefCounted
*/
TheISA::RegIndex _flatDestRegIdx[TheISA::MaxInstDestRegs];
- /** Flattened register index of the source registers of this
- * instruction.
- */
- TheISA::RegIndex _flatSrcRegIdx[TheISA::MaxInstSrcRegs];
-
/** Physical register index of the destination registers of this
* instruction.
*/
@@ -316,7 +276,91 @@ class BaseDynInst : public RefCounted
*/
PhysRegIndex _prevDestRegIdx[TheISA::MaxInstDestRegs];
+
+ public:
+ /** Records changes to result? */
+ void recordResult(bool f) { instFlags[RecordResult] = f; }
+
+ /** Is the effective virtual address valid. */
+ bool effAddrValid() const { return instFlags[EffAddrValid]; }
+
+ /** Whether or not the memory operation is done. */
+ bool memOpDone() const { return instFlags[MemOpDone]; }
+ void memOpDone(bool f) { instFlags[MemOpDone] = f; }
+
+
+ ////////////////////////////////////////////
+ //
+ // INSTRUCTION EXECUTION
+ //
+ ////////////////////////////////////////////
+
+ void demapPage(Addr vaddr, uint64_t asn)
+ {
+ cpu->demapPage(vaddr, asn);
+ }
+ void demapInstPage(Addr vaddr, uint64_t asn)
+ {
+ cpu->demapPage(vaddr, asn);
+ }
+ void demapDataPage(Addr vaddr, uint64_t asn)
+ {
+ cpu->demapPage(vaddr, asn);
+ }
+
+ Fault readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags);
+
+ Fault writeMem(uint8_t *data, unsigned size,
+ Addr addr, unsigned flags, uint64_t *res);
+
+ /** Splits a request in two if it crosses a dcache block. */
+ void splitRequest(RequestPtr req, RequestPtr &sreqLow,
+ RequestPtr &sreqHigh);
+
+ /** Initiate a DTB address translation. */
+ void initiateTranslation(RequestPtr req, RequestPtr sreqLow,
+ RequestPtr sreqHigh, uint64_t *res,
+ BaseTLB::Mode mode);
+
+ /** Finish a DTB address translation. */
+ void finishTranslation(WholeTranslationState *state);
+
+ /** True if the DTB address translation has started. */
+ bool translationStarted() const { return instFlags[TranslationStarted]; }
+ void translationStarted(bool f) { instFlags[TranslationStarted] = f; }
+
+ /** True if the DTB address translation has completed. */
+ bool translationCompleted() const { return instFlags[TranslationCompleted]; }
+ void translationCompleted(bool f) { instFlags[TranslationCompleted] = f; }
+
+ /** True if this address was found to match a previous load and they issued
+ * out of order. If that happend, then it's only a problem if an incoming
+ * snoop invalidate modifies the line, in which case we need to squash.
+ * If nothing modified the line the order doesn't matter.
+ */
+ bool possibleLoadViolation() const { return instFlags[PossibleLoadViolation]; }
+ void possibleLoadViolation(bool f) { instFlags[PossibleLoadViolation] = f; }
+
+ /** True if the address hit a external snoop while sitting in the LSQ.
+ * If this is true and a older instruction sees it, this instruction must
+ * reexecute
+ */
+ bool hitExternalSnoop() const { return instFlags[HitExternalSnoop]; }
+ void hitExternalSnoop(bool f) { instFlags[HitExternalSnoop] = f; }
+
+ /**
+ * Returns true if the DTB address translation is being delayed due to a hw
+ * page table walk.
+ */
+ bool isTranslationDelayed() const
+ {
+ return (translationStarted() && !translationCompleted());
+ }
+
public:
+#ifdef DEBUG
+ void dumpSNList();
+#endif
/** Returns the physical register index of the i'th destination
* register.
@@ -329,6 +373,7 @@ class BaseDynInst : public RefCounted
/** Returns the physical register index of the i'th source register. */
PhysRegIndex renamedSrcRegIdx(int idx) const
{
+ assert(TheISA::MaxInstSrcRegs > idx);
return _srcRegIdx[idx];
}
@@ -340,12 +385,6 @@ class BaseDynInst : public RefCounted
return _flatDestRegIdx[idx];
}
- /** Returns the flattened register index of the i'th source register */
- TheISA::RegIndex flattenedSrcRegIdx(int idx) const
- {
- return _flatSrcRegIdx[idx];
- }
-
/** Returns the physical register index of the previous physical register
* that remapped to the same logical register index.
*/
@@ -374,13 +413,6 @@ class BaseDynInst : public RefCounted
_srcRegIdx[idx] = renamed_src;
}
- /** Flattens a source architectural register index into a logical index.
- */
- void flattenSrcReg(int idx, TheISA::RegIndex flattened_src)
- {
- _flatSrcRegIdx[idx] = flattened_src;
- }
-
/** Flattens a destination architectural register index into a logical
* index.
*/
@@ -457,12 +489,12 @@ class BaseDynInst : public RefCounted
/** Returns whether the instruction was predicted taken or not. */
bool readPredTaken()
{
- return predTaken;
+ return instFlags[PredTaken];
}
void setPredTaken(bool predicted_taken)
{
- predTaken = predicted_taken;
+ instFlags[PredTaken] = predicted_taken;
}
/** Returns whether the instruction mispredicted. */
@@ -588,7 +620,7 @@ class BaseDynInst : public RefCounted
template <class T>
void setResult(T t)
{
- if (recordResult) {
+ if (instFlags[RecordResult]) {
Result instRes;
instRes.set(t);
instResult.push(instRes);
@@ -774,12 +806,12 @@ class BaseDynInst : public RefCounted
bool readPredicate()
{
- return predicate;
+ return instFlags[Predicate];
}
void setPredicate(bool val)
{
- predicate = val;
+ instFlags[Predicate] = val;
if (traceData) {
traceData->setPredicate(val);
@@ -798,54 +830,24 @@ class BaseDynInst : public RefCounted
/** Returns the thread context. */
ThreadContext *tcBase() { return thread->getTC(); }
- private:
- /** Instruction effective address.
- * @todo: Consider if this is necessary or not.
- */
- Addr instEffAddr;
-
- /** Whether or not the effective address calculation is completed.
- * @todo: Consider if this is necessary or not.
- */
- bool eaCalcDone;
-
- /** Is this instruction's memory access uncacheable. */
- bool isUncacheable;
-
- /** Has this instruction generated a memory request. */
- bool reqMade;
-
public:
/** Sets the effective address. */
- void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
+ void setEA(Addr &ea) { instEffAddr = ea; instFlags[EACalcDone] = true; }
/** Returns the effective address. */
const Addr &getEA() const { return instEffAddr; }
/** Returns whether or not the eff. addr. calculation has been completed. */
- bool doneEACalc() { return eaCalcDone; }
+ bool doneEACalc() { return instFlags[EACalcDone]; }
/** Returns whether or not the eff. addr. source registers are ready. */
bool eaSrcsReady();
- /** Whether or not the memory operation is done. */
- bool memOpDone;
-
/** Is this instruction's memory access uncacheable. */
- bool uncacheable() { return isUncacheable; }
+ bool uncacheable() { return instFlags[IsUncacheable]; }
/** Has this instruction generated a memory request. */
- bool hasRequest() { return reqMade; }
-
- public:
- /** Load queue index. */
- int16_t lqIdx;
-
- /** Store queue index. */
- int16_t sqIdx;
-
- /** Iterator pointing to this BaseDynInst in the list of all insts. */
- ListIt instListIt;
+ bool hasRequest() { return instFlags[ReqMade]; }
/** Returns iterator to this instruction in the list of all insts. */
ListIt &getInstListIt() { return instListIt; }
@@ -868,12 +870,12 @@ Fault
BaseDynInst<Impl>::readMem(Addr addr, uint8_t *data,
unsigned size, unsigned flags)
{
- reqMade = true;
+ instFlags[ReqMade] = true;
Request *req = NULL;
Request *sreqLow = NULL;
Request *sreqHigh = NULL;
- if (reqMade && translationStarted) {
+ if (instFlags[ReqMade] && translationStarted()) {
req = savedReq;
sreqLow = savedSreqLow;
sreqHigh = savedSreqHigh;
@@ -888,11 +890,11 @@ BaseDynInst<Impl>::readMem(Addr addr, uint8_t *data,
initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
}
- if (translationCompleted) {
+ if (translationCompleted()) {
if (fault == NoFault) {
effAddr = req->getVaddr();
effSize = size;
- effAddrValid = true;
+ instFlags[EffAddrValid] = true;
if (cpu->checker) {
if (reqToVerify != NULL) {
@@ -931,12 +933,12 @@ BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size,
traceData->setAddr(addr);
}
- reqMade = true;
+ instFlags[ReqMade] = true;
Request *req = NULL;
Request *sreqLow = NULL;
Request *sreqHigh = NULL;
- if (reqMade && translationStarted) {
+ if (instFlags[ReqMade] && translationStarted()) {
req = savedReq;
sreqLow = savedSreqLow;
sreqHigh = savedSreqHigh;
@@ -951,10 +953,10 @@ BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size,
initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);
}
- if (fault == NoFault && translationCompleted) {
+ if (fault == NoFault && translationCompleted()) {
effAddr = req->getVaddr();
effSize = size;
- effAddrValid = true;
+ instFlags[EffAddrValid] = true;
if (cpu->checker) {
if (reqToVerify != NULL) {
@@ -991,7 +993,7 @@ BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr sreqLow,
RequestPtr sreqHigh, uint64_t *res,
BaseTLB::Mode mode)
{
- translationStarted = true;
+ translationStarted(true);
if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) {
WholeTranslationState *state =
@@ -1001,7 +1003,7 @@ BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr sreqLow,
DataTranslation<BaseDynInstPtr> *trans =
new DataTranslation<BaseDynInstPtr>(this, state);
cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
- if (!translationCompleted) {
+ if (!translationCompleted()) {
// Save memory requests.
savedReq = state->mainReq;
savedSreqLow = state->sreqLow;
@@ -1019,7 +1021,7 @@ BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr sreqLow,
cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode);
cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode);
- if (!translationCompleted) {
+ if (!translationCompleted()) {
// Save memory requests.
savedReq = state->mainReq;
savedSreqLow = state->sreqLow;
@@ -1034,8 +1036,7 @@ BaseDynInst<Impl>::finishTranslation(WholeTranslationState *state)
{
fault = state->getFault();
- if (state->isUncacheable())
- isUncacheable = true;
+ instFlags[IsUncacheable] = state->isUncacheable();
if (fault == NoFault) {
physEffAddr = state->getPaddr();
@@ -1051,7 +1052,7 @@ BaseDynInst<Impl>::finishTranslation(WholeTranslationState *state)
}
delete state;
- translationCompleted = true;
+ translationCompleted(true);
}
#endif // __CPU_BASE_DYN_INST_HH__
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index 05f9b7767..663159b94 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -60,13 +60,12 @@ BaseDynInst<Impl>::BaseDynInst(StaticInstPtr _staticInst,
StaticInstPtr _macroop,
TheISA::PCState _pc, TheISA::PCState _predPC,
InstSeqNum seq_num, ImplCPU *cpu)
- : staticInst(_staticInst), macroop(_macroop), traceData(NULL), cpu(cpu)
+ : staticInst(_staticInst), cpu(cpu), traceData(NULL), macroop(_macroop)
{
seqNum = seq_num;
pc = _pc;
predPC = _predPC;
- predTaken = false;
initVars();
}
@@ -74,7 +73,7 @@ BaseDynInst<Impl>::BaseDynInst(StaticInstPtr _staticInst,
template <class Impl>
BaseDynInst<Impl>::BaseDynInst(StaticInstPtr _staticInst,
StaticInstPtr _macroop)
- : staticInst(_staticInst), macroop(_macroop), traceData(NULL)
+ : staticInst(_staticInst), traceData(NULL), macroop(_macroop)
{
seqNum = 0;
initVars();
@@ -86,25 +85,14 @@ BaseDynInst<Impl>::initVars()
{
memData = NULL;
effAddr = 0;
- effAddrValid = false;
physEffAddr = 0;
-
- translationStarted = false;
- translationCompleted = false;
- possibleLoadViolation = false;
- hitExternalSnoop = false;
-
- isUncacheable = false;
- reqMade = false;
readyRegs = 0;
- recordResult = true;
-
status.reset();
- eaCalcDone = false;
- memOpDone = false;
- predicate = true;
+ instFlags.reset();
+ instFlags[RecordResult] = true;
+ instFlags[Predicate] = true;
lqIdx = -1;
sqIdx = -1;
diff --git a/src/cpu/o3/bpred_unit.hh b/src/cpu/o3/bpred_unit.hh
index 673472b69..8bfab11a9 100644
--- a/src/cpu/o3/bpred_unit.hh
+++ b/src/cpu/o3/bpred_unit.hh
@@ -206,9 +206,9 @@ class BPredUnit
PredictorHistory(const InstSeqNum &seq_num, Addr instPC,
bool pred_taken, void *bp_history,
ThreadID _tid)
- : seqNum(seq_num), pc(instPC), RASTarget(0), RASIndex(0),
- tid(_tid), predTaken(pred_taken), usedRAS(0),
- wasCall(0), wasReturn(0), validBTB(0), bpHistory(bp_history)
+ : seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0),
+ RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0),
+ wasCall(0), wasReturn(0), validBTB(0)
{}
bool operator==(const PredictorHistory &entry) const {
@@ -221,6 +221,12 @@ class BPredUnit
/** The PC associated with the sequence number. */
Addr pc;
+ /** Pointer to the history object passed back from the branch
+ * predictor. It is used to update or restore state of the
+ * branch predictor.
+ */
+ void *bpHistory;
+
/** The RAS target (only valid if a return). */
TheISA::PCState RASTarget;
@@ -243,11 +249,6 @@ class BPredUnit
bool wasReturn;
/** Whether or not the instruction had a valid BTB entry. */
bool validBTB;
- /** Pointer to the history object passed back from the branch
- * predictor. It is used to update or restore state of the
- * branch predictor.
- */
- void *bpHistory;
};
typedef std::list<PredictorHistory> History;
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index 053d4f6be..31d252c73 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -96,15 +96,14 @@ struct DefaultIEWDefaultCommit {
int size;
DynInstPtr insts[Impl::MaxWidth];
-
- bool squash[Impl::MaxThreads];
- bool branchMispredict[Impl::MaxThreads];
DynInstPtr mispredictInst[Impl::MaxThreads];
- bool branchTaken[Impl::MaxThreads];
Addr mispredPC[Impl::MaxThreads];
- TheISA::PCState pc[Impl::MaxThreads];
InstSeqNum squashedSeqNum[Impl::MaxThreads];
+ TheISA::PCState pc[Impl::MaxThreads];
+ bool squash[Impl::MaxThreads];
+ bool branchMispredict[Impl::MaxThreads];
+ bool branchTaken[Impl::MaxThreads];
bool includeSquashInst[Impl::MaxThreads];
};
@@ -122,21 +121,17 @@ template<class Impl>
struct TimeBufStruct {
typedef typename Impl::DynInstPtr DynInstPtr;
struct decodeComm {
- bool squash;
- bool predIncorrect;
uint64_t branchAddr;
-
InstSeqNum doneSeqNum;
-
- // @todo: Might want to package this kind of branch stuff into a single
- // struct as it is used pretty frequently.
- bool branchMispredict;
DynInstPtr mispredictInst;
- bool branchTaken;
+ DynInstPtr squashInst;
Addr mispredPC;
TheISA::PCState nextPC;
- DynInstPtr squashInst;
unsigned branchCount;
+ bool squash;
+ bool predIncorrect;
+ bool branchMispredict;
+ bool branchTaken;
};
decodeComm decodeInfo[Impl::MaxThreads];
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 1bf493871..45f5bc02b 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -1244,11 +1244,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
head_inst->microPC(),
head_inst->seqNum,
head_inst->staticInst->disassemble(head_inst->instAddr()));
- DPRINTFR(O3PipeView, "O3PipeView:decode:%llu\n", head_inst->decodeTick);
- DPRINTFR(O3PipeView, "O3PipeView:rename:%llu\n", head_inst->renameTick);
- DPRINTFR(O3PipeView, "O3PipeView:dispatch:%llu\n", head_inst->dispatchTick);
- DPRINTFR(O3PipeView, "O3PipeView:issue:%llu\n", head_inst->issueTick);
- DPRINTFR(O3PipeView, "O3PipeView:complete:%llu\n", head_inst->completeTick);
+ DPRINTFR(O3PipeView, "O3PipeView:decode:%llu\n", head_inst->fetchTick + head_inst->decodeTick);
+ DPRINTFR(O3PipeView, "O3PipeView:rename:%llu\n", head_inst->fetchTick + head_inst->renameTick);
+ DPRINTFR(O3PipeView, "O3PipeView:dispatch:%llu\n", head_inst->fetchTick + head_inst->dispatchTick);
+ DPRINTFR(O3PipeView, "O3PipeView:issue:%llu\n", head_inst->fetchTick + head_inst->issueTick);
+ DPRINTFR(O3PipeView, "O3PipeView:complete:%llu\n", head_inst->fetchTick + head_inst->completeTick);
DPRINTFR(O3PipeView, "O3PipeView:retire:%llu\n", curTick());
#endif
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index fd8dc834b..315d53155 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -709,7 +709,7 @@ DefaultDecode<Impl>::decodeInsts(ThreadID tid)
--insts_available;
#if TRACING_ON
- inst->decodeTick = curTick();
+ inst->decodeTick = curTick() - inst->fetchTick;
#endif
// Ensure that if it was predicted as a branch, it really is a
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index ed947d92f..8acbf3443 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -107,26 +107,28 @@ class BaseO3DynInst : public BaseDynInst<Impl>
void initVars();
protected:
+ /** Values to be written to the destination misc. registers. */
+ MiscReg _destMiscRegVal[TheISA::MaxMiscDestRegs];
+
/** Indexes of the destination misc. registers. They are needed to defer
* the write accesses to the misc. registers until the commit stage, when
* the instruction is out of its speculative state.
*/
- int _destMiscRegIdx[MaxInstDestRegs];
- /** Values to be written to the destination misc. registers. */
- MiscReg _destMiscRegVal[MaxInstDestRegs];
+ short _destMiscRegIdx[TheISA::MaxMiscDestRegs];
+
/** Number of destination misc. registers. */
- int _numDestMiscRegs;
+ uint8_t _numDestMiscRegs;
- public:
+ public:
#if TRACING_ON
/** Tick records used for the pipeline activity viewer. */
Tick fetchTick;
- Tick decodeTick;
- Tick renameTick;
- Tick dispatchTick;
- Tick issueTick;
- Tick completeTick;
+ uint32_t decodeTick;
+ uint32_t renameTick;
+ uint32_t dispatchTick;
+ uint32_t issueTick;
+ uint32_t completeTick;
#endif
/** Reads a misc. register, including any side-effects the read
@@ -145,6 +147,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
/** Writes to misc. registers are recorded and deferred until the
* commit stage, when updateMiscRegs() is called.
*/
+ assert(_numDestMiscRegs < TheISA::MaxMiscDestRegs);
_destMiscRegIdx[_numDestMiscRegs] = misc_reg;
_destMiscRegVal[_numDestMiscRegs] = val;
_numDestMiscRegs++;
diff --git a/src/cpu/o3/dyn_inst_impl.hh b/src/cpu/o3/dyn_inst_impl.hh
index 2870d40fe..85778aadc 100644
--- a/src/cpu/o3/dyn_inst_impl.hh
+++ b/src/cpu/o3/dyn_inst_impl.hh
@@ -75,9 +75,10 @@ BaseO3DynInst<Impl>::initVars()
for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i);
- this->_readySrcRegIdx[i] = 0;
}
+ this->_readySrcRegIdx.reset();
+
_numDestMiscRegs = 0;
#if TRACING_ON
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index b306e6e58..60f4604a2 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1152,7 +1152,7 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
++iewDispatchedInsts;
#if TRACING_ON
- inst->dispatchTick = curTick();
+ inst->dispatchTick = curTick() - inst->fetchTick;
#endif
}
@@ -1617,7 +1617,7 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
iewExecutedInsts++;
#if TRACING_ON
- inst->completeTick = curTick();
+ inst->completeTick = curTick() - inst->fetchTick;
#endif
//
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 2c0779a03..ae5f93c38 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -859,7 +859,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
++total_issued;
#if TRACING_ON
- issuing_inst->issueTick = curTick();
+ issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
#endif
if (!issuing_inst->isMemRef()) {
@@ -1054,8 +1054,8 @@ InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
// Reset DTB translation state
- resched_inst->translationStarted = false;
- resched_inst->translationCompleted = false;
+ resched_inst->translationStarted(false);
+ resched_inst->translationCompleted(false);
resched_inst->clearCanIssue();
memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
@@ -1079,7 +1079,7 @@ InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
++freeEntries;
- completed_inst->memOpDone = true;
+ completed_inst->memOpDone(true);
memDepUnit[tid].completed(completed_inst);
count[tid]--;
@@ -1098,7 +1098,7 @@ InstructionQueue<Impl>::getDeferredMemInstToExecute()
{
for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
++it) {
- if ((*it)->translationCompleted || (*it)->isSquashed()) {
+ if ((*it)->translationCompleted() || (*it)->isSquashed()) {
DynInstPtr ret = *it;
deferredMemInsts.erase(it);
return ret;
@@ -1165,7 +1165,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
if (!squashed_inst->isIssued() ||
(squashed_inst->isMemRef() &&
- !squashed_inst->memOpDone)) {
+ !squashed_inst->memOpDone())) {
DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %s squashed.\n",
tid, squashed_inst->seqNum, squashed_inst->pcState());
@@ -1456,7 +1456,7 @@ InstructionQueue<Impl>::dumpInsts()
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
+ !(*inst_list_it)->memOpDone()) {
// Loads that have not been marked as executed
// still count towards the total instructions.
++valid_num;
@@ -1473,7 +1473,7 @@ InstructionQueue<Impl>::dumpInsts()
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
}
cprintf("\n");
@@ -1498,7 +1498,7 @@ InstructionQueue<Impl>::dumpInsts()
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
+ !(*inst_list_it)->memOpDone()) {
// Loads that have not been marked as executed
// still count towards the total instructions.
++valid_num;
@@ -1515,7 +1515,7 @@ InstructionQueue<Impl>::dumpInsts()
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
}
cprintf("\n");
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index c3bb8f7cd..7093b5fee 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -275,28 +275,28 @@ class LSQUnit {
public:
/** Default constructor. */
LSQSenderState()
- : noWB(false), isSplit(false), pktToSend(false), outstanding(1),
- mainPkt(NULL), pendingPacket(NULL)
- { }
+ : mainPkt(NULL), pendingPacket(NULL), outstanding(1),
+ noWB(false), isSplit(false), pktToSend(false)
+ { }
/** Instruction who initiated the access to memory. */
DynInstPtr inst;
+ /** The main packet from a split load, used during writeback. */
+ PacketPtr mainPkt;
+ /** A second packet from a split store that needs sending. */
+ PacketPtr pendingPacket;
+ /** The LQ/SQ index of the instruction. */
+ uint8_t idx;
+ /** Number of outstanding packets to complete. */
+ uint8_t outstanding;
/** Whether or not it is a load. */
bool isLoad;
- /** The LQ/SQ index of the instruction. */
- int idx;
/** Whether or not the instruction will need to writeback. */
bool noWB;
/** Whether or not this access is split in two. */
bool isSplit;
/** Whether or not there is a packet that needs sending. */
bool pktToSend;
- /** Number of outstanding packets to complete. */
- int outstanding;
- /** The main packet from a split load, used during writeback. */
- PacketPtr mainPkt;
- /** A second packet from a split store that needs sending. */
- PacketPtr pendingPacket;
/** Completes a packet and returns whether the access is finished. */
inline bool complete() { return --outstanding == 0; }
@@ -342,7 +342,8 @@ class LSQUnit {
{
std::memset(data, 0, sizeof(data));
}
-
+ /** The store data. */
+ char data[16];
/** The store instruction. */
DynInstPtr inst;
/** The request for the store. */
@@ -351,9 +352,7 @@ class LSQUnit {
RequestPtr sreqLow;
RequestPtr sreqHigh;
/** The size of the store. */
- int size;
- /** The store data. */
- char data[16];
+ uint8_t size;
/** Whether or not the store is split into two requests. */
bool isSplit;
/** Whether or not the store can writeback. */
@@ -593,9 +592,9 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
// Disable recording the result temporarily. Writing to misc
// regs normally updates the result, but this is not the
// desired behavior when handling store conditionals.
- load_inst->recordResult = false;
+ load_inst->recordResult(false);
TheISA::handleLockedRead(load_inst.get(), req);
- load_inst->recordResult = true;
+ load_inst->recordResult(true);
}
if (req->isMmappedIpr()) {
@@ -651,7 +650,7 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
else if (storeQueue[store_idx].inst->uncacheable())
continue;
- assert(storeQueue[store_idx].inst->effAddrValid);
+ assert(storeQueue[store_idx].inst->effAddrValid());
// Check if the store data is within the lower and upper bounds of
// addresses that the request needs.
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 4f82ad9e3..a878b1540 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -465,7 +465,7 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
while (load_idx != loadTail) {
DynInstPtr ld_inst = loadQueue[load_idx];
- if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
+ if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) {
incrLdIdx(load_idx);
continue;
}
@@ -475,7 +475,7 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
ld_inst->seqNum, load_addr, invalidate_addr);
if (load_addr == invalidate_addr) {
- if (ld_inst->possibleLoadViolation) {
+ if (ld_inst->possibleLoadViolation()) {
DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
ld_inst->physEffAddr, pkt->getAddr(), ld_inst->seqNum);
@@ -485,7 +485,7 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
// If a older load checks this and it's true
// then we might have missed the snoop
// in which case we need to invalidate to be sure
- ld_inst->hitExternalSnoop = true;
+ ld_inst->hitExternalSnoop(true);
}
}
incrLdIdx(load_idx);
@@ -507,7 +507,7 @@ LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
*/
while (load_idx != loadTail) {
DynInstPtr ld_inst = loadQueue[load_idx];
- if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
+ if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) {
incrLdIdx(load_idx);
continue;
}
@@ -521,7 +521,7 @@ LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
// If this load is to the same block as an external snoop
// invalidate that we've observed then the load needs to be
// squashed as it could have newer data
- if (ld_inst->hitExternalSnoop) {
+ if (ld_inst->hitExternalSnoop()) {
if (!memDepViolator ||
ld_inst->seqNum < memDepViolator->seqNum) {
DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
@@ -540,7 +540,7 @@ LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
// Otherwise, mark the load has a possible load violation
// and if we see a snoop before it's commited, we need to squash
- ld_inst->possibleLoadViolation = true;
+ ld_inst->possibleLoadViolation(true);
DPRINTF(LSQUnit, "Found possible load violaiton at addr: %#x"
" between instructions [sn:%lli] and [sn:%lli]\n",
inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
@@ -610,7 +610,7 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
} else if (!loadBlocked()) {
- assert(inst->effAddrValid);
+ assert(inst->effAddrValid());
int load_idx = inst->lqIdx;
incrLdIdx(load_idx);
@@ -857,9 +857,9 @@ LSQUnit<Impl>::writebackStores()
// Disable recording the result temporarily. Writing to
// misc regs normally updates the result, but this is not
// the desired behavior when handling store conditionals.
- inst->recordResult = false;
+ inst->recordResult(false);
bool success = TheISA::handleLockedWrite(inst.get(), req);
- inst->recordResult = true;
+ inst->recordResult(true);
if (!success) {
// Instantly complete this store.
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 678927813..592bc059f 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -692,7 +692,7 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
++renamed_insts;
#if TRACING_ON
- inst->renameTick = curTick();
+ inst->renameTick = curTick() - inst->fetchTick;
#endif
// Put instruction in rename queue.
@@ -997,8 +997,6 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst, ThreadID tid)
panic("Reg index is out of bound: %d.", src_reg);
}
- inst->flattenSrcReg(src_idx, flat_src_reg);
-
// Look up the source registers to get the phys. register they've
// been renamed to, and set the sources to those registers.
PhysRegIndex renamed_reg = renameMap[tid]->lookup(flat_src_reg);