summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/alpha/cpu.hh41
-rw-r--r--src/cpu/o3/alpha/cpu_impl.hh29
-rw-r--r--src/cpu/o3/alpha/dyn_inst.hh121
-rw-r--r--src/cpu/o3/alpha/dyn_inst_impl.hh9
-rw-r--r--src/cpu/o3/alpha/thread_context.hh9
-rw-r--r--src/cpu/o3/bpred_unit_impl.hh11
-rw-r--r--src/cpu/o3/comm.hh5
-rw-r--r--src/cpu/o3/commit_impl.hh34
-rw-r--r--src/cpu/o3/cpu.cc8
-rw-r--r--src/cpu/o3/cpu.hh24
-rw-r--r--src/cpu/o3/decode_impl.hh25
-rw-r--r--src/cpu/o3/fetch.hh22
-rw-r--r--src/cpu/o3/fetch_impl.hh196
-rw-r--r--src/cpu/o3/iew_impl.hh50
-rw-r--r--src/cpu/o3/lsq_unit.hh6
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh19
-rw-r--r--src/cpu/o3/mips/cpu_impl.hh12
-rwxr-xr-xsrc/cpu/o3/mips/dyn_inst.hh88
-rwxr-xr-xsrc/cpu/o3/mips/dyn_inst_impl.hh12
-rw-r--r--src/cpu/o3/rename.hh8
-rw-r--r--src/cpu/o3/rename_impl.hh95
-rw-r--r--src/cpu/o3/sparc/cpu.hh42
-rw-r--r--src/cpu/o3/sparc/cpu_builder.cc4
-rw-r--r--src/cpu/o3/sparc/cpu_impl.hh41
-rw-r--r--src/cpu/o3/sparc/dyn_inst.hh156
-rw-r--r--src/cpu/o3/sparc/dyn_inst_impl.hh14
-rw-r--r--src/cpu/o3/sparc/thread_context.hh8
-rwxr-xr-xsrc/cpu/o3/thread_context.hh8
-rwxr-xr-xsrc/cpu/o3/thread_context_impl.hh3
29 files changed, 493 insertions, 607 deletions
diff --git a/src/cpu/o3/alpha/cpu.hh b/src/cpu/o3/alpha/cpu.hh
index 0078db69f..4a2086296 100644
--- a/src/cpu/o3/alpha/cpu.hh
+++ b/src/cpu/o3/alpha/cpu.hh
@@ -37,12 +37,6 @@
#include "cpu/o3/cpu.hh"
#include "sim/byteswap.hh"
-namespace TheISA
-{
- class ITB;
- class DTB;
-}
-
class EndQuiesceEvent;
namespace Kernel {
class Statistics;
@@ -61,14 +55,6 @@ class TranslatingPort;
template <class Impl>
class AlphaO3CPU : public FullO3CPU<Impl>
{
- protected:
- typedef TheISA::IntReg IntReg;
- typedef TheISA::FloatReg FloatReg;
- typedef TheISA::FloatRegBits FloatRegBits;
- typedef TheISA::MiscReg MiscReg;
- typedef TheISA::RegFile RegFile;
- typedef TheISA::MiscRegFile MiscRegFile;
-
public:
typedef O3ThreadState<Impl> ImplState;
typedef O3ThreadState<Impl> Thread;
@@ -77,13 +63,6 @@ class AlphaO3CPU : public FullO3CPU<Impl>
/** Constructs an AlphaO3CPU with the given parameters. */
AlphaO3CPU(Params *params);
-#if FULL_SYSTEM
- /** ITB pointer. */
- AlphaISA::ITB *itb;
- /** DTB pointer. */
- AlphaISA::DTB *dtb;
-#endif
-
/** Registers statistics. */
void regStats();
@@ -91,19 +70,19 @@ class AlphaO3CPU : public FullO3CPU<Impl>
/** Translates instruction requestion. */
Fault translateInstReq(RequestPtr &req, Thread *thread)
{
- return itb->translate(req, thread->getTC());
+ return this->itb->translate(req, thread->getTC());
}
/** Translates data read request. */
Fault translateDataReadReq(RequestPtr &req, Thread *thread)
{
- return dtb->translate(req, thread->getTC(), false);
+ return this->dtb->translate(req, thread->getTC(), false);
}
/** Translates data write request. */
Fault translateDataWriteReq(RequestPtr &req, Thread *thread)
{
- return dtb->translate(req, thread->getTC(), true);
+ return this->dtb->translate(req, thread->getTC(), true);
}
#else
@@ -127,20 +106,22 @@ class AlphaO3CPU : public FullO3CPU<Impl>
#endif
/** Reads a miscellaneous register. */
- MiscReg readMiscReg(int misc_reg, unsigned tid);
+ TheISA::MiscReg readMiscReg(int misc_reg, unsigned tid);
/** Reads a misc. register, including any side effects the read
* might have as defined by the architecture.
*/
- MiscReg readMiscRegWithEffect(int misc_reg, unsigned tid);
+ TheISA::MiscReg readMiscRegWithEffect(int misc_reg, unsigned tid);
/** Sets a miscellaneous register. */
- void setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
+ void setMiscReg(int misc_reg, const TheISA::MiscReg &val,
+ unsigned tid);
/** Sets a misc. register, including any side effects the write
* might have as defined by the architecture.
*/
- void setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
+ void setMiscRegWithEffect(int misc_reg, const TheISA::MiscReg &val,
+ unsigned tid);
/** Initiates a squash of all in-flight instructions for a given
* thread. The source of the squash is an external update of
@@ -175,10 +156,10 @@ class AlphaO3CPU : public FullO3CPU<Impl>
*/
void syscall(int64_t callnum, int tid);
/** Gets a syscall argument. */
- IntReg getSyscallArg(int i, int tid);
+ TheISA::IntReg getSyscallArg(int i, int tid);
/** Used to shift args for indirect syscall. */
- void setSyscallArg(int i, IntReg val, int tid);
+ void setSyscallArg(int i, TheISA::IntReg val, int tid);
/** Sets the return value of a syscall. */
void setSyscallReturn(SyscallReturn return_value, int tid);
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
index 98fd0699a..fb0962056 100644
--- a/src/cpu/o3/alpha/cpu_impl.hh
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -55,12 +55,7 @@
#endif
template <class Impl>
-AlphaO3CPU<Impl>::AlphaO3CPU(Params *params)
-#if FULL_SYSTEM
- : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb)
-#else
- : FullO3CPU<Impl>(params)
-#endif
+AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params)
{
DPRINTF(O3CPU, "Creating AlphaO3CPU object.\n");
@@ -173,15 +168,16 @@ AlphaO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, unsigned tid)
template <class Impl>
void
-AlphaO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid)
+AlphaO3CPU<Impl>::setMiscReg(int misc_reg, const TheISA::MiscReg &val,
+ unsigned tid)
{
this->regFile.setMiscReg(misc_reg, val, tid);
}
template <class Impl>
void
-AlphaO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val,
- unsigned tid)
+AlphaO3CPU<Impl>::setMiscRegWithEffect(int misc_reg,
+ const TheISA::MiscReg &val, unsigned tid)
{
this->regFile.setMiscRegWithEffect(misc_reg, val, tid);
}
@@ -315,7 +311,7 @@ AlphaO3CPU<Impl>::getSyscallArg(int i, int tid)
template <class Impl>
void
-AlphaO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid)
+AlphaO3CPU<Impl>::setSyscallArg(int i, TheISA::IntReg val, int tid)
{
this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid);
}
@@ -324,17 +320,6 @@ template <class Impl>
void
AlphaO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
{
- // check for error condition. Alpha syscall convention is to
- // indicate success/failure in reg a3 (r19) and put the
- // return value itself in the standard return value reg (v0).
- if (return_value.successful()) {
- // no error
- this->setArchIntReg(TheISA::SyscallSuccessReg, 0, tid);
- this->setArchIntReg(TheISA::ReturnValueReg, return_value.value(), tid);
- } else {
- // got an error, return details
- this->setArchIntReg(TheISA::SyscallSuccessReg, (IntReg) -1, tid);
- this->setArchIntReg(TheISA::ReturnValueReg, -return_value.value(), tid);
- }
+ TheISA::setSyscallReturn(return_value, this->tcBase(tid));
}
#endif
diff --git a/src/cpu/o3/alpha/dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh
index 49cc5a201..603a1b52d 100644
--- a/src/cpu/o3/alpha/dyn_inst.hh
+++ b/src/cpu/o3/alpha/dyn_inst.hh
@@ -73,8 +73,9 @@ class AlphaDynInst : public BaseDynInst<Impl>
public:
/** BaseDynInst constructor given a binary instruction. */
- AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num,
- O3CPU *cpu);
+ AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
+ Addr Pred_PC, Addr Pred_NPC,
+ InstSeqNum seq_num, O3CPU *cpu);
/** BaseDynInst constructor given a static inst pointer. */
AlphaDynInst(StaticInstPtr &_staticInst);
@@ -123,6 +124,44 @@ class AlphaDynInst : public BaseDynInst<Impl>
this->threadNumber);
}
+ /** Reads a miscellaneous register. */
+ TheISA::MiscReg readMiscRegOperand(const StaticInst *si, int idx)
+ {
+ return this->cpu->readMiscReg(
+ si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+ this->threadNumber);
+ }
+
+ /** Reads a misc. register, including any side-effects the read
+ * might have as defined by the architecture.
+ */
+ TheISA::MiscReg readMiscRegOperandWithEffect(const StaticInst *si, int idx)
+ {
+ return this->cpu->readMiscRegWithEffect(
+ si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+ this->threadNumber);
+ }
+
+ /** Sets a misc. register. */
+ void setMiscRegOperand(const StaticInst * si, int idx, const MiscReg &val)
+ {
+ this->instResult.integer = val;
+ return this->cpu->setMiscReg(
+ si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+ val, this->threadNumber);
+ }
+
+ /** Sets a misc. register, including any side-effects the write
+ * might have as defined by the architecture.
+ */
+ void setMiscRegOperandWithEffect(const StaticInst *si, int idx,
+ const MiscReg &val)
+ {
+ return this->cpu->setMiscRegWithEffect(
+ si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+ val, this->threadNumber);
+ }
+
#if FULL_SYSTEM
/** Calls hardware return from error interrupt. */
Fault hwrei();
@@ -134,22 +173,6 @@ class AlphaDynInst : public BaseDynInst<Impl>
void syscall(int64_t callnum);
#endif
- private:
- /** Physical register index of the destination registers of this
- * instruction.
- */
- PhysRegIndex _destRegIdx[MaxInstDestRegs];
-
- /** Physical register index of the source registers of this
- * instruction.
- */
- PhysRegIndex _srcRegIdx[MaxInstSrcRegs];
-
- /** Physical register index of the previous producers of the
- * architected destinations.
- */
- PhysRegIndex _prevDestRegIdx[MaxInstDestRegs];
-
public:
// The register accessor methods provide the index of the
@@ -165,28 +188,28 @@ class AlphaDynInst : public BaseDynInst<Impl>
uint64_t readIntRegOperand(const StaticInst *si, int idx)
{
- return this->cpu->readIntReg(_srcRegIdx[idx]);
+ return this->cpu->readIntReg(this->_srcRegIdx[idx]);
}
FloatReg readFloatRegOperand(const StaticInst *si, int idx, int width)
{
- return this->cpu->readFloatReg(_srcRegIdx[idx], width);
+ return this->cpu->readFloatReg(this->_srcRegIdx[idx], width);
}
FloatReg readFloatRegOperand(const StaticInst *si, int idx)
{
- return this->cpu->readFloatReg(_srcRegIdx[idx]);
+ return this->cpu->readFloatReg(this->_srcRegIdx[idx]);
}
FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx,
int width)
{
- return this->cpu->readFloatRegBits(_srcRegIdx[idx], width);
+ return this->cpu->readFloatRegBits(this->_srcRegIdx[idx], width);
}
FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
{
- return this->cpu->readFloatRegBits(_srcRegIdx[idx]);
+ return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]);
}
/** @todo: Make results into arrays so they can handle multiple dest
@@ -194,79 +217,37 @@ class AlphaDynInst : public BaseDynInst<Impl>
*/
void setIntRegOperand(const StaticInst *si, int idx, uint64_t val)
{
- this->cpu->setIntReg(_destRegIdx[idx], val);
+ this->cpu->setIntReg(this->_destRegIdx[idx], val);
BaseDynInst<Impl>::setIntRegOperand(si, idx, val);
}
void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val,
int width)
{
- this->cpu->setFloatReg(_destRegIdx[idx], val, width);
+ this->cpu->setFloatReg(this->_destRegIdx[idx], val, width);
BaseDynInst<Impl>::setFloatRegOperand(si, idx, val, width);
}
void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
{
- this->cpu->setFloatReg(_destRegIdx[idx], val);
+ this->cpu->setFloatReg(this->_destRegIdx[idx], val);
BaseDynInst<Impl>::setFloatRegOperand(si, idx, val);
}
void setFloatRegOperandBits(const StaticInst *si, int idx,
FloatRegBits val, int width)
{
- this->cpu->setFloatRegBits(_destRegIdx[idx], val, width);
+ this->cpu->setFloatRegBits(this->_destRegIdx[idx], val, width);
BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
}
void setFloatRegOperandBits(const StaticInst *si, int idx,
FloatRegBits val)
{
- this->cpu->setFloatRegBits(_destRegIdx[idx], val);
+ this->cpu->setFloatRegBits(this->_destRegIdx[idx], val);
BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
}
- /** Returns the physical register index of the i'th destination
- * register.
- */
- PhysRegIndex renamedDestRegIdx(int idx) const
- {
- return _destRegIdx[idx];
- }
-
- /** Returns the physical register index of the i'th source register. */
- PhysRegIndex renamedSrcRegIdx(int idx) const
- {
- return _srcRegIdx[idx];
- }
-
- /** Returns the physical register index of the previous physical register
- * that remapped to the same logical register index.
- */
- PhysRegIndex prevDestRegIdx(int idx) const
- {
- return _prevDestRegIdx[idx];
- }
-
- /** Renames a destination register to a physical register. Also records
- * the previous physical register that the logical register mapped to.
- */
- void renameDestReg(int idx,
- PhysRegIndex renamed_dest,
- PhysRegIndex previous_rename)
- {
- _destRegIdx[idx] = renamed_dest;
- _prevDestRegIdx[idx] = previous_rename;
- }
-
- /** Renames a source logical register to the physical register which
- * has/will produce that logical register's result.
- * @todo: add in whether or not the source register is ready.
- */
- void renameSrcReg(int idx, PhysRegIndex renamed_src)
- {
- _srcRegIdx[idx] = renamed_src;
- }
-
public:
/** Calculates EA part of a memory instruction. Currently unused,
* though it may be useful in the future if we want to split
diff --git a/src/cpu/o3/alpha/dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh
index 6fc548a85..50cdec408 100644
--- a/src/cpu/o3/alpha/dyn_inst_impl.hh
+++ b/src/cpu/o3/alpha/dyn_inst_impl.hh
@@ -31,9 +31,10 @@
#include "cpu/o3/alpha/dyn_inst.hh"
template <class Impl>
-AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
+AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
+ Addr Pred_PC, Addr Pred_NPC,
InstSeqNum seq_num, O3CPU *cpu)
- : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+ : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
{
initVars();
}
@@ -53,11 +54,11 @@ AlphaDynInst<Impl>::initVars()
// as the normal register entries. It will allow the IQ to work
// without any modifications.
for (int i = 0; i < this->staticInst->numDestRegs(); i++) {
- _destRegIdx[i] = this->staticInst->destRegIdx(i);
+ this->_destRegIdx[i] = this->staticInst->destRegIdx(i);
}
for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
- _srcRegIdx[i] = this->staticInst->srcRegIdx(i);
+ this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i);
this->_readySrcRegIdx[i] = 0;
}
}
diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh
index bcecb7087..6d61501ac 100644
--- a/src/cpu/o3/alpha/thread_context.hh
+++ b/src/cpu/o3/alpha/thread_context.hh
@@ -36,12 +36,6 @@ class AlphaTC : public O3ThreadContext<Impl>
{
public:
#if FULL_SYSTEM
- /** Returns a pointer to the ITB. */
- virtual AlphaISA::ITB *getITBPtr() { return this->cpu->itb; }
-
- /** Returns a pointer to the DTB. */
- virtual AlphaISA::DTB *getDTBPtr() { return this->cpu->dtb; }
-
/** Returns pointer to the quiesce event. */
virtual EndQuiesceEvent *getQuiesceEvent()
{
@@ -51,8 +45,7 @@ class AlphaTC : public O3ThreadContext<Impl>
virtual uint64_t readNextNPC()
{
- panic("Alpha has no NextNPC!");
- return 0;
+ return this->readNextPC() + sizeof(TheISA::MachInst);
}
virtual void setNextNPC(uint64_t val)
diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh
index 477c8e4cb..84c50b4da 100644
--- a/src/cpu/o3/bpred_unit_impl.hh
+++ b/src/cpu/o3/bpred_unit_impl.hh
@@ -149,7 +149,7 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
using TheISA::MachInst;
bool pred_taken = false;
- Addr target;
+ Addr target = PC;
++lookups;
@@ -233,14 +233,7 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
}
}
- if (pred_taken) {
- // Set the PC and the instruction's predicted target.
- PC = target;
- inst->setPredTarg(target);
- } else {
- PC = PC + sizeof(MachInst);
- inst->setPredTarg(PC);
- }
+ PC = target;
predHist[tid].push_front(predict_record);
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index aa58fc20e..d96919007 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -87,9 +87,10 @@ struct DefaultIEWDefaultCommit {
bool squash[Impl::MaxThreads];
bool branchMispredict[Impl::MaxThreads];
bool branchTaken[Impl::MaxThreads];
- bool condDelaySlotBranch[Impl::MaxThreads];
+ bool squashDelaySlot[Impl::MaxThreads];
uint64_t mispredPC[Impl::MaxThreads];
uint64_t nextPC[Impl::MaxThreads];
+ uint64_t nextNPC[Impl::MaxThreads];
InstSeqNum squashedSeqNum[Impl::MaxThreads];
bool includeSquashInst[Impl::MaxThreads];
@@ -121,6 +122,7 @@ struct TimeBufStruct {
bool branchTaken;
uint64_t mispredPC;
uint64_t nextPC;
+ uint64_t nextNPC;
unsigned branchCount;
};
@@ -160,6 +162,7 @@ struct TimeBufStruct {
bool branchTaken;
uint64_t mispredPC;
uint64_t nextPC;
+ uint64_t nextNPC;
// Represents the instruction that has either been retired or
// squashed. Similar to having a single bus that broadcasts the
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 96f094926..75fe1ed2a 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -519,6 +519,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
toIEW->commitInfo[tid].branchMispredict = false;
toIEW->commitInfo[tid].nextPC = PC[tid];
+ toIEW->commitInfo[tid].nextNPC = nextPC[tid];
}
template <class Impl>
@@ -735,27 +736,15 @@ DefaultCommit<Impl>::commit()
InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
#if ISA_HAS_DELAY_SLOT
- InstSeqNum bdelay_done_seq_num;
- bool squash_bdelay_slot;
-
- if (fromIEW->branchMispredict[tid]) {
- if (fromIEW->branchTaken[tid] &&
- fromIEW->condDelaySlotBranch[tid]) {
- DPRINTF(Commit, "[tid:%i]: Cond. delay slot branch"
- "mispredicted as taken. Squashing after previous "
- "inst, [sn:%i]\n",
- tid, squashed_inst);
- bdelay_done_seq_num = squashed_inst;
- squash_bdelay_slot = true;
- } else {
- DPRINTF(Commit, "[tid:%i]: Branch Mispredict. Squashing "
- "after delay slot [sn:%i]\n", tid, squashed_inst+1);
- bdelay_done_seq_num = squashed_inst + 1;
- squash_bdelay_slot = false;
- }
- } else {
- bdelay_done_seq_num = squashed_inst;
- squash_bdelay_slot = true;
+ InstSeqNum bdelay_done_seq_num = squashed_inst;
+ bool squash_bdelay_slot = fromIEW->squashDelaySlot[tid];
+ bool branchMispredict = fromIEW->branchMispredict[tid];
+
+ // Squashing/not squashing the branch delay slot only makes
+ // sense when you're squashing from a branch, ie from a branch
+ // mispredict.
+ if (branchMispredict && !squash_bdelay_slot) {
+ bdelay_done_seq_num++;
}
#endif
@@ -794,6 +783,7 @@ DefaultCommit<Impl>::commit()
fromIEW->branchTaken[tid];
toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
+ toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid];
toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
@@ -1123,7 +1113,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Update the commit rename map
for (int i = 0; i < head_inst->numDestRegs(); i++) {
- renameMap[tid]->setEntry(head_inst->destRegIdx(i),
+ renameMap[tid]->setEntry(head_inst->flattenedDestRegIdx(i),
head_inst->renamedDestRegIdx(i));
}
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 18cc87c0b..66c75a12d 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -150,6 +150,10 @@ FullO3CPU<Impl>::DeallocateContextEvent::description()
template <class Impl>
FullO3CPU<Impl>::FullO3CPU(Params *params)
: BaseO3CPU(params),
+#if FULL_SYSTEM
+ itb(params->itb),
+ dtb(params->dtb),
+#endif
tickEvent(this),
removeInstsThisCycle(false),
fetch(params),
@@ -657,9 +661,7 @@ FullO3CPU<Impl>::insertThread(unsigned tid)
//Set PC/NPC/NNPC
setPC(src_tc->readPC(), tid);
setNextPC(src_tc->readNextPC(), tid);
-#if ISA_HAS_DELAY_SLOT
setNextNPC(src_tc->readNextNPC(), tid);
-#endif
src_tc->setStatus(ThreadContext::Active);
@@ -698,7 +700,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
// Squash Throughout Pipeline
InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
- fetch.squash(0, squash_seq_num, true, tid);
+ fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid);
decode.squash(tid);
rename.squash(squash_seq_num, tid);
iew.squash(tid);
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 2bf9cb23b..d217a3e85 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -91,9 +91,6 @@ template <class Impl>
class FullO3CPU : public BaseO3CPU
{
public:
- typedef TheISA::FloatReg FloatReg;
- typedef TheISA::FloatRegBits FloatRegBits;
-
// Typedefs from the Impl here.
typedef typename Impl::CPUPol CPUPolicy;
typedef typename Impl::Params Params;
@@ -114,6 +111,11 @@ class FullO3CPU : public BaseO3CPU
SwitchedOut
};
+#if FULL_SYSTEM
+ TheISA::ITB * itb;
+ TheISA::DTB * dtb;
+#endif
+
/** Overall CPU status. */
Status _status;
@@ -382,23 +384,23 @@ class FullO3CPU : public BaseO3CPU
/** Register accessors. Index refers to the physical register index. */
uint64_t readIntReg(int reg_idx);
- FloatReg readFloatReg(int reg_idx);
+ TheISA::FloatReg readFloatReg(int reg_idx);
- FloatReg readFloatReg(int reg_idx, int width);
+ TheISA::FloatReg readFloatReg(int reg_idx, int width);
- FloatRegBits readFloatRegBits(int reg_idx);
+ TheISA::FloatRegBits readFloatRegBits(int reg_idx);
- FloatRegBits readFloatRegBits(int reg_idx, int width);
+ TheISA::FloatRegBits readFloatRegBits(int reg_idx, int width);
void setIntReg(int reg_idx, uint64_t val);
- void setFloatReg(int reg_idx, FloatReg val);
+ void setFloatReg(int reg_idx, TheISA::FloatReg val);
- void setFloatReg(int reg_idx, FloatReg val, int width);
+ void setFloatReg(int reg_idx, TheISA::FloatReg val, int width);
- void setFloatRegBits(int reg_idx, FloatRegBits val);
+ void setFloatRegBits(int reg_idx, TheISA::FloatRegBits val);
- void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
+ void setFloatRegBits(int reg_idx, TheISA::FloatRegBits val, int width);
uint64_t readArchIntReg(int reg_idx, unsigned tid);
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 26ed40c67..79a0bfdbf 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -282,6 +282,10 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].squash = true;
toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
+ ///FIXME There needs to be a way to set the nextPC and nextNPC
+ ///explicitly for ISAs with delay slots.
+ toFetch->decodeInfo[tid].nextNPC =
+ inst->branchTarget() + sizeof(TheISA::MachInst);
#if ISA_HAS_DELAY_SLOT
toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
(inst->readNextPC() + sizeof(TheISA::MachInst));
@@ -743,9 +747,9 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Ensure that if it was predicted as a branch, it really is a
// branch.
- if (inst->predTaken() && !inst->isControl()) {
- DPRINTF(Decode, "PredPC : %#x != NextPC: %#x\n",inst->predPC,
- inst->nextPC + 4);
+ if (inst->readPredTaken() && !inst->isControl()) {
+ DPRINTF(Decode, "PredPC : %#x != NextPC: %#x\n",
+ inst->readPredPC(), inst->readNextPC() + 4);
panic("Instruction predicted as a branch!");
@@ -762,26 +766,29 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
++decodeBranchResolved;
- if (inst->branchTarget() != inst->readPredTarg()) {
+ if (inst->branchTarget() != inst->readPredPC()) {
++decodeBranchMispred;
// Might want to set some sort of boolean and just do
// a check at the end
#if !ISA_HAS_DELAY_SLOT
squash(inst, inst->threadNumber);
- inst->setPredTarg(inst->branchTarget());
+ Addr target = inst->branchTarget();
+ inst->setPredTarg(target, target + sizeof(TheISA::MachInst));
break;
#else
// If mispredicted as taken, then ignore delay slot
// instruction... else keep delay slot and squash
// after it is sent to rename
- if (inst->predTaken() && inst->isCondDelaySlot()) {
+ if (inst->readPredTaken() && inst->isCondDelaySlot()) {
DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst."
"[sn:%i] PC %#x mispredicted as taken.\n", tid,
inst->seqNum, inst->PC);
bdelayDoneSeqNum[tid] = inst->seqNum;
squash(inst, inst->threadNumber);
- inst->setPredTarg(inst->branchTarget());
+ Addr target = inst->branchTarget();
+ inst->setPredTarg(target,
+ target + sizeof(TheISA::MachInst));
break;
} else {
DPRINTF(Decode, "[tid:%i]: Misprediction detected at "
@@ -800,7 +807,9 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
if (squashAfterDelaySlot[tid]) {
assert(!inst->isSquashed());
squash(squashInst[tid], squashInst[tid]->threadNumber);
- squashInst[tid]->setPredTarg(squashInst[tid]->branchTarget());
+ Addr target = squashInst[tid]->branchTarget();
+ squashInst[tid]->setPredTarg(target,
+ target + sizeof(TheISA::MachInst));
assert(!inst->isSquashed());
break;
}
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index e880e14e4..8347ed775 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -239,13 +239,13 @@ class DefaultFetch
bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
/** Squashes a specific thread and resets the PC. */
- inline void doSquash(const Addr &new_PC, unsigned tid);
+ inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid);
/** Squashes a specific thread and resets the PC. Also tells the CPU to
* remove any instructions between fetch and decode that should be sqaushed.
*/
- void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num,
- unsigned tid);
+ void squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
+ const InstSeqNum &seq_num, unsigned tid);
/** Checks if a thread is stalled. */
bool checkStall(unsigned tid) const;
@@ -259,7 +259,8 @@ class DefaultFetch
* remove any instructions that are not in the ROB. The source of this
* squash should be the commit stage.
*/
- void squash(const Addr &new_PC, const InstSeqNum &seq_num,
+ void squash(const Addr &new_PC, const Addr &new_NPC,
+ const InstSeqNum &seq_num,
bool squash_delay_slot, unsigned tid);
/** Ticks the fetch stage, processing all inputs signals and fetching
@@ -361,19 +362,6 @@ class DefaultFetch
/** Tracks how many instructions has been fetched this cycle. */
int numInst;
- /** Tracks delay slot information for threads in ISAs which use
- * delay slots;
- */
- struct DelaySlotInfo {
- InstSeqNum delaySlotSeqNum;
- InstSeqNum branchSeqNum;
- int numInsts;
- Addr targetAddr;
- bool targetReady;
- };
-
- DelaySlotInfo delaySlotInfo[Impl::MaxThreads];
-
/** Source of possible stalls. */
struct Stalls {
bool decode;
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index fe320fa79..90d3868a5 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -319,9 +319,7 @@ DefaultFetch<Impl>::initStage()
for (int tid = 0; tid < numThreads; tid++) {
PC[tid] = cpu->readPC(tid);
nextPC[tid] = cpu->readNextPC(tid);
-#if ISA_HAS_DELAY_SLOT
nextNPC[tid] = cpu->readNextNPC(tid);
-#endif
}
// Size of cache block.
@@ -343,11 +341,6 @@ DefaultFetch<Impl>::initStage()
cacheDataPC[tid] = 0;
cacheDataValid[tid] = false;
- delaySlotInfo[tid].branchSeqNum = -1;
- delaySlotInfo[tid].numInsts = 0;
- delaySlotInfo[tid].targetAddr = 0;
- delaySlotInfo[tid].targetReady = false;
-
stalls[tid].decode = false;
stalls[tid].rename = false;
stalls[tid].iew = false;
@@ -441,10 +434,8 @@ DefaultFetch<Impl>::takeOverFrom()
nextPC[i] = cpu->readNextPC(i);
#if ISA_HAS_DELAY_SLOT
nextNPC[i] = cpu->readNextNPC(i);
- delaySlotInfo[i].branchSeqNum = -1;
- delaySlotInfo[i].numInsts = 0;
- delaySlotInfo[i].targetAddr = 0;
- delaySlotInfo[i].targetReady = false;
+#else
+ nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst);
#endif
fetchStatus[i] = Running;
}
@@ -503,54 +494,41 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
bool predict_taken;
if (!inst->isControl()) {
-#if ISA_HAS_DELAY_SLOT
- Addr cur_PC = next_PC;
- next_PC = cur_PC + instSize; //next_NPC;
- next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
- inst->setPredTarg(next_NPC);
-#else
- next_PC = next_PC + instSize;
- inst->setPredTarg(next_PC);
-#endif
+ next_PC = next_NPC;
+ next_NPC = next_NPC + instSize;
+ inst->setPredTarg(next_PC, next_NPC);
+ inst->setPredTaken(false);
return false;
}
int tid = inst->threadNumber;
-#if ISA_HAS_DELAY_SLOT
Addr pred_PC = next_PC;
predict_taken = branchPred.predict(inst, pred_PC, tid);
- if (predict_taken) {
- DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+/* if (predict_taken) {
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n",
+ tid, pred_PC);
} else {
- DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
- }
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
+ }*/
- if (predict_taken) {
- next_PC = next_NPC;
+#if ISA_HAS_DELAY_SLOT
+ next_PC = next_NPC;
+ if (predict_taken)
next_NPC = pred_PC;
-
- // Update delay slot info
- ++delaySlotInfo[tid].numInsts;
- delaySlotInfo[tid].targetAddr = pred_PC;
- DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
- delaySlotInfo[tid].numInsts);
- } else { // !predict_taken
- if (inst->isCondDelaySlot()) {
- next_PC = pred_PC;
- // The delay slot is skipped here if there is on
- // prediction
- } else {
- next_PC = next_NPC;
- // No need to declare a delay slot here since
- // there is no for the pred. target to jump
- }
-
- next_NPC = next_NPC + instSize;
- }
+ else
+ next_NPC += instSize;
#else
- predict_taken = branchPred.predict(inst, next_PC, tid);
+ if (predict_taken)
+ next_PC = pred_PC;
+ else
+ next_PC += instSize;
+ next_NPC = next_PC + instSize;
#endif
+/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
+ tid, next_PC, next_NPC);*/
+ inst->setPredTarg(next_PC, next_NPC);
+ inst->setPredTaken(predict_taken);
++fetchedBranches;
@@ -671,14 +649,15 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
template <class Impl>
inline void
-DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
+DefaultFetch<Impl>::doSquash(const Addr &new_PC,
+ const Addr &new_NPC, unsigned tid)
{
- DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
- tid, new_PC);
+ DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
+ tid, new_PC, new_NPC);
PC[tid] = new_PC;
- nextPC[tid] = new_PC + instSize;
- nextNPC[tid] = new_PC + (2 * instSize);
+ nextPC[tid] = new_NPC;
+ nextNPC[tid] = new_NPC + instSize;
// Clear the icache miss if it's outstanding.
if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -704,21 +683,13 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
template<class Impl>
void
-DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
+DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
const InstSeqNum &seq_num,
unsigned tid)
{
DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
- doSquash(new_PC, tid);
-
-#if ISA_HAS_DELAY_SLOT
- if (seq_num <= delaySlotInfo[tid].branchSeqNum) {
- delaySlotInfo[tid].numInsts = 0;
- delaySlotInfo[tid].targetAddr = 0;
- delaySlotInfo[tid].targetReady = false;
- }
-#endif
+ doSquash(new_PC, new_NPC, tid);
// Tell the CPU to remove any instructions that are in flight between
// fetch and decode.
@@ -793,20 +764,15 @@ DefaultFetch<Impl>::updateFetchStatus()
template <class Impl>
void
-DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
+ const InstSeqNum &seq_num,
bool squash_delay_slot, unsigned tid)
{
DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
- doSquash(new_PC, tid);
+ doSquash(new_PC, new_NPC, tid);
#if ISA_HAS_DELAY_SLOT
- if (seq_num <= delaySlotInfo[tid].branchSeqNum) {
- delaySlotInfo[tid].numInsts = 0;
- delaySlotInfo[tid].targetAddr = 0;
- delaySlotInfo[tid].targetReady = false;
- }
-
// Tell the CPU to remove any instructions that are not in the ROB.
cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
#else
@@ -929,6 +895,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
#endif
// In any case, squash.
squash(fromCommit->commitInfo[tid].nextPC,
+ fromCommit->commitInfo[tid].nextNPC,
doneSeqNum,
fromCommit->commitInfo[tid].squashDelaySlot,
tid);
@@ -984,8 +951,12 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
#else
InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
#endif
+ DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n",
+ fromDecode->decodeInfo[tid].nextPC,
+ fromDecode->decodeInfo[tid].nextNPC);
// Squash unless we're already squashing
squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
+ fromDecode->decodeInfo[tid].nextNPC,
doneSeqNum,
tid);
@@ -1042,6 +1013,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// The current PC.
Addr &fetch_PC = PC[tid];
+ Addr &fetch_NPC = nextPC[tid];
+
// Fault code for memory access.
Fault fault = NoFault;
@@ -1098,7 +1071,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
}
Addr next_PC = fetch_PC;
- Addr next_NPC = next_PC + instSize;
+ Addr next_NPC = fetch_NPC;
+
InstSeqNum inst_seq;
MachInst inst;
ExtMachInst ext_inst;
@@ -1117,15 +1091,22 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// ended this fetch block.
bool predicted_branch = false;
- // Need to keep track of whether or not a delay slot
- // instruction has been fetched
-
for (;
offset < cacheBlkSize &&
numInst < fetchWidth &&
- (!predicted_branch || delaySlotInfo[tid].numInsts > 0);
+ !predicted_branch;
++numInst) {
+ // If we're branching after this instruction, quite fetching
+ // from the same block then.
+ predicted_branch =
+ (fetch_PC + sizeof(TheISA::MachInst) != fetch_NPC);
+ if (predicted_branch) {
+ DPRINTF(Fetch, "Branch detected with PC = %#x, NPC = %#x\n",
+ fetch_PC, fetch_NPC);
+ }
+
+
// Get a sequence number.
inst_seq = cpu->getAndIncrementInstSeq();
@@ -1145,8 +1126,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
#endif
// Create a new DynInst from the instruction fetched.
- DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
- next_PC,
+ DynInstPtr instruction = new DynInst(ext_inst,
+ fetch_PC, fetch_NPC,
+ next_PC, next_NPC,
inst_seq, cpu);
instruction->setTid(tid);
@@ -1158,6 +1140,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
"[sn:%lli]\n",
tid, instruction->readPC(), inst_seq);
+ //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
+
DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
tid, instruction->staticInst->disassemble(fetch_PC));
@@ -1166,8 +1150,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
instruction->staticInst,
instruction->readPC());
- predicted_branch = lookupAndUpdateNextPC(instruction, next_PC,
- next_NPC);
+ lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
+ predicted_branch |= (next_PC != fetch_NPC);
// Add instruction to the CPU's list of instructions.
instruction->setInstListIt(cpu->addInst(instruction));
@@ -1183,6 +1167,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// Move to the next instruction, unless we have a branch.
fetch_PC = next_PC;
+ fetch_NPC = next_NPC;
if (instruction->isQuiesce()) {
DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
@@ -1194,29 +1179,6 @@ DefaultFetch<Impl>::fetch(bool &status_change)
}
offset += instSize;
-
-#if ISA_HAS_DELAY_SLOT
- if (predicted_branch) {
- delaySlotInfo[tid].branchSeqNum = inst_seq;
-
- DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n",
- tid, inst_seq);
- continue;
- } else if (delaySlotInfo[tid].numInsts > 0) {
- --delaySlotInfo[tid].numInsts;
-
- // It's OK to set PC to target of branch
- if (delaySlotInfo[tid].numInsts == 0) {
- delaySlotInfo[tid].targetReady = true;
-
- // Break the looping condition
- predicted_branch = true;
- }
-
- DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to"
- " process.\n", tid, delaySlotInfo[tid].numInsts);
- }
-#endif
}
if (offset >= cacheBlkSize) {
@@ -1225,7 +1187,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
} else if (numInst >= fetchWidth) {
DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
"for this cycle.\n", tid);
- } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) {
+ } else if (predicted_branch) {
DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
"instruction encountered.\n", tid);
}
@@ -1238,26 +1200,13 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// Now that fetching is completed, update the PC to signify what the next
// cycle will be.
if (fault == NoFault) {
+ PC[tid] = next_PC;
+ nextPC[tid] = next_NPC;
+ nextNPC[tid] = next_NPC + instSize;
#if ISA_HAS_DELAY_SLOT
- if (delaySlotInfo[tid].targetReady &&
- delaySlotInfo[tid].numInsts == 0) {
- // Set PC to target
- PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
- nextPC[tid] = next_PC + instSize; //next_NPC
- nextNPC[tid] = next_PC + (2 * instSize);
-
- delaySlotInfo[tid].targetReady = false;
- } else {
- PC[tid] = next_PC;
- nextPC[tid] = next_NPC;
- nextNPC[tid] = next_NPC + instSize;
- }
-
DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
#else
- DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
- PC[tid] = next_PC;
- nextPC[tid] = next_PC + instSize;
+ DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
#endif
} else {
// We shouldn't be in an icache miss and also have a fault (an ITB
@@ -1277,10 +1226,11 @@ DefaultFetch<Impl>::fetch(bool &status_change)
ext_inst = TheISA::NoopMachInst;
// Create a new DynInst from the dummy nop.
- DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
- next_PC,
+ DynInstPtr instruction = new DynInst(ext_inst,
+ fetch_PC, fetch_NPC,
+ next_PC, next_NPC,
inst_seq, cpu);
- instruction->setPredTarg(next_PC + instSize);
+ instruction->setPredTarg(next_PC, next_NPC);
instruction->setTid(tid);
instruction->setASID(tid);
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index a8962f2f7..f24eaf2c4 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -480,23 +480,37 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
toCommit->mispredPC[tid] = inst->readPC();
toCommit->branchMispredict[tid] = true;
+ int instSize = sizeof(TheISA::MachInst);
#if ISA_HAS_DELAY_SLOT
- bool branch_taken = inst->readNextNPC() !=
- (inst->readNextPC() + sizeof(TheISA::MachInst));
+ bool branch_taken =
+ !(inst->readNextPC() + instSize == inst->readNextNPC() &&
+ (inst->readNextPC() == inst->readPC() + instSize ||
+ inst->readNextPC() == inst->readPC() + 2 * instSize));
+ DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n",
+ branch_taken ? "true": "false", inst->seqNum);
toCommit->branchTaken[tid] = branch_taken;
- toCommit->condDelaySlotBranch[tid] = inst->isCondDelaySlot();
-
- if (inst->isCondDelaySlot() && branch_taken) {
+ bool squashDelaySlot = true;
+// (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
+ DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n",
+ squashDelaySlot ? "true": "false", inst->seqNum);
+ toCommit->squashDelaySlot[tid] = squashDelaySlot;
+ //If we're squashing the delay slot, we need to pick back up at NextPC.
+ //Otherwise, NextPC isn't being squashed, so we should pick back up at
+ //NextNPC.
+ if (squashDelaySlot) {
toCommit->nextPC[tid] = inst->readNextPC();
+ toCommit->nextNPC[tid] = inst->readNextNPC();
} else {
toCommit->nextPC[tid] = inst->readNextNPC();
+ toCommit->nextNPC[tid] = inst->readNextNPC() + instSize;
}
#else
toCommit->branchTaken[tid] = inst->readNextPC() !=
(inst->readPC() + sizeof(TheISA::MachInst));
toCommit->nextPC[tid] = inst->readNextPC();
+ toCommit->nextNPC[tid] = inst->readNextPC() + instSize;
#endif
toCommit->includeSquashInst[tid] = false;
@@ -514,6 +528,11 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid)
toCommit->squash[tid] = true;
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->nextPC[tid] = inst->readNextPC();
+#if ISA_HAS_DELAY_SLOT
+ toCommit->nextNPC[tid] = inst->readNextNPC();
+#else
+ toCommit->nextNPC[tid] = inst->readNextPC() + sizeof(TheISA::MachInst);
+#endif
toCommit->branchMispredict[tid] = false;
toCommit->includeSquashInst[tid] = false;
@@ -531,6 +550,11 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
toCommit->squash[tid] = true;
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->nextPC[tid] = inst->readPC();
+#if ISA_HAS_DELAY_SLOT
+ toCommit->nextNPC[tid] = inst->readNextPC();
+#else
+ toCommit->nextNPC[tid] = inst->readPC() + sizeof(TheISA::MachInst);
+#endif
toCommit->branchMispredict[tid] = false;
// Must include the broadcasted SN in the squash.
@@ -1341,17 +1365,15 @@ DefaultIEW<Impl>::executeInsts()
fetchRedirect[tid] = true;
DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
-#if ISA_HAS_DELAY_SLOT
- DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
- inst->nextNPC);
-#else
- DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
- inst->nextPC);
-#endif
+ DPRINTF(IEW, "Predicted target was %#x, %#x.\n",
+ inst->readPredPC(), inst->readPredNPC());
+ DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x,"
+ " NPC: %#x.\n", inst->readNextPC(),
+ inst->readNextNPC());
// If incorrect, then signal the ROB that it must be squashed.
squashDueToBranch(inst, tid);
- if (inst->predTaken()) {
+ if (inst->readPredTaken()) {
predictedTakenIncorrect++;
} else {
predictedNotTakenIncorrect++;
@@ -1421,7 +1443,7 @@ DefaultIEW<Impl>::writebackInsts()
// mark scoreboard that this instruction is finally complete.
// Either have IEW have direct access to scoreboard, or have this
// as part of backwards communication.
- for (int inst_num = 0; inst_num < issueWidth &&
+ for (int inst_num = 0; inst_num < wbWidth &&
toCommit->insts[inst_num]; inst_num++) {
DynInstPtr inst = toCommit->insts[inst_num];
int tid = inst->threadNumber;
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 14f9d5031..9c7eb7780 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -559,6 +559,12 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// Cast this to type T?
data = storeQueue[store_idx].data >> shift_amt;
+ // When the data comes from the store queue entry, it's in host
+ // order. When it gets sent to the load, it needs to be in guest
+ // order so when the load converts it again, it ends up back
+ // in host order like the inst expects.
+ data = TheISA::htog(data);
+
assert(!load_inst->memData);
load_inst->memData = new uint8_t[64];
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 3b84d3411..ebd9301f6 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -597,8 +597,21 @@ LSQUnit<Impl>::writebackStores()
assert(!inst->memData);
inst->memData = new uint8_t[64];
- memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data,
- req->getSize());
+
+ TheISA::IntReg convertedData =
+ TheISA::htog(storeQueue[storeWBIdx].data);
+
+ //FIXME This is a hack to get SPARC working. It, along with endianness
+ //in the memory system in general, need to be straightened out more
+ //formally. The problem is that the data's endianness is swapped when
+ //it's in the 64 bit data field in the store queue. The data that you
+ //want won't start at the beginning of the field anymore unless it was
+ //a 64 bit access.
+ memcpy(inst->memData,
+ (uint8_t *)&convertedData +
+ (TheISA::ByteOrderDiffers ?
+ (sizeof(TheISA::IntReg) - req->getSize()) : 0),
+ req->getSize());
PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
data_pkt->dataStatic(inst->memData);
@@ -612,7 +625,7 @@ LSQUnit<Impl>::writebackStores()
DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
"to Addr:%#x, data:%#x [sn:%lli]\n",
storeWBIdx, inst->readPC(),
- req->getPaddr(), *(inst->memData),
+ req->getPaddr(), (int)*(inst->memData),
inst->seqNum);
// @todo: Remove this SC hack once the memory system handles it.
diff --git a/src/cpu/o3/mips/cpu_impl.hh b/src/cpu/o3/mips/cpu_impl.hh
index 08e9ba483..e7dbd3aba 100644
--- a/src/cpu/o3/mips/cpu_impl.hh
+++ b/src/cpu/o3/mips/cpu_impl.hh
@@ -220,16 +220,6 @@ template <class Impl>
void
MipsO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
{
- // check for error condition.
- if (return_value.successful()) {
- // no error
- this->setArchIntReg(TheISA::SyscallSuccessReg, 0, tid);
- this->setArchIntReg(TheISA::ReturnValueReg, return_value.value(), tid);
- } else {
- // got an error, return details
- this->setArchIntReg(TheISA::SyscallSuccessReg,
- (TheISA::IntReg) -1, tid);
- this->setArchIntReg(TheISA::ReturnValueReg, -return_value.value(), tid);
- }
+ TheISA::setSyscallReturn(return_value, this->tcBase(tid));
}
#endif
diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh
index 833371e00..f53530908 100755
--- a/src/cpu/o3/mips/dyn_inst.hh
+++ b/src/cpu/o3/mips/dyn_inst.hh
@@ -54,10 +54,6 @@ class MipsDynInst : public BaseDynInst<Impl>
/** Typedef for the CPU. */
typedef typename Impl::O3CPU O3CPU;
- /** Binary machine instruction type. */
- typedef TheISA::MachInst MachInst;
- /** Extended machine instruction type. */
- typedef TheISA::ExtMachInst ExtMachInst;
/** Logical register index type. */
typedef TheISA::RegIndex RegIndex;
/** Integer register index type. */
@@ -74,8 +70,10 @@ class MipsDynInst : public BaseDynInst<Impl>
public:
/** BaseDynInst constructor given a binary instruction. */
- MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num,
- O3CPU *cpu);
+ MipsDynInst(ExtMachInst inst,
+ Addr PC, Addr NPC,
+ Addr Pred_PC, Addr Pred_NPC,
+ InstSeqNum seq_num, O3CPU *cpu);
/** BaseDynInst constructor given a static inst pointer. */
MipsDynInst(StaticInstPtr &_staticInst);
@@ -127,22 +125,6 @@ class MipsDynInst : public BaseDynInst<Impl>
/** Calls a syscall. */
void syscall(int64_t callnum);
- private:
- /** Physical register index of the destination registers of this
- * instruction.
- */
- PhysRegIndex _destRegIdx[MaxInstDestRegs];
-
- /** Physical register index of the source registers of this
- * instruction.
- */
- PhysRegIndex _srcRegIdx[MaxInstSrcRegs];
-
- /** Physical register index of the previous producers of the
- * architected destinations.
- */
- PhysRegIndex _prevDestRegIdx[MaxInstDestRegs];
-
public:
// The register accessor methods provide the index of the
@@ -158,28 +140,28 @@ class MipsDynInst : public BaseDynInst<Impl>
uint64_t readIntRegOperand(const StaticInst *si, int idx)
{
- return this->cpu->readIntReg(_srcRegIdx[idx]);
+ return this->cpu->readIntReg(this->_srcRegIdx[idx]);
}
FloatReg readFloatRegOperand(const StaticInst *si, int idx, int width)
{
- return this->cpu->readFloatReg(_srcRegIdx[idx], width);
+ return this->cpu->readFloatReg(this->_srcRegIdx[idx], width);
}
FloatReg readFloatRegOperand(const StaticInst *si, int idx)
{
- return this->cpu->readFloatReg(_srcRegIdx[idx]);
+ return this->cpu->readFloatReg(this->_srcRegIdx[idx]);
}
FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx,
int width)
{
- return this->cpu->readFloatRegBits(_srcRegIdx[idx], width);
+ return this->cpu->readFloatRegBits(this->_srcRegIdx[idx], width);
}
FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
{
- return this->cpu->readFloatRegBits(_srcRegIdx[idx]);
+ return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]);
}
/** @todo: Make results into arrays so they can handle multiple dest
@@ -187,79 +169,37 @@ class MipsDynInst : public BaseDynInst<Impl>
*/
void setIntRegOperand(const StaticInst *si, int idx, uint64_t val)
{
- this->cpu->setIntReg(_destRegIdx[idx], val);
+ this->cpu->setIntReg(this->_destRegIdx[idx], val);
BaseDynInst<Impl>::setIntRegOperand(si, idx, val);
}
void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val,
int width)
{
- this->cpu->setFloatReg(_destRegIdx[idx], val, width);
+ this->cpu->setFloatReg(this->_destRegIdx[idx], val, width);
BaseDynInst<Impl>::setFloatRegOperand(si, idx, val, width);
}
void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
{
- this->cpu->setFloatReg(_destRegIdx[idx], val);
+ this->cpu->setFloatReg(this->_destRegIdx[idx], val);
BaseDynInst<Impl>::setFloatRegOperand(si, idx, val);
}
void setFloatRegOperandBits(const StaticInst *si, int idx,
FloatRegBits val, int width)
{
- this->cpu->setFloatRegBits(_destRegIdx[idx], val, width);
+ this->cpu->setFloatRegBits(this->_destRegIdx[idx], val, width);
BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
}
void setFloatRegOperandBits(const StaticInst *si, int idx,
FloatRegBits val)
{
- this->cpu->setFloatRegBits(_destRegIdx[idx], val);
+ this->cpu->setFloatRegBits(this->_destRegIdx[idx], val);
BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
}
- /** Returns the physical register index of the i'th destination
- * register.
- */
- PhysRegIndex renamedDestRegIdx(int idx) const
- {
- return _destRegIdx[idx];
- }
-
- /** Returns the physical register index of the i'th source register. */
- PhysRegIndex renamedSrcRegIdx(int idx) const
- {
- return _srcRegIdx[idx];
- }
-
- /** Returns the physical register index of the previous physical register
- * that remapped to the same logical register index.
- */
- PhysRegIndex prevDestRegIdx(int idx) const
- {
- return _prevDestRegIdx[idx];
- }
-
- /** Renames a destination register to a physical register. Also records
- * the previous physical register that the logical register mapped to.
- */
- void renameDestReg(int idx,
- PhysRegIndex renamed_dest,
- PhysRegIndex previous_rename)
- {
- _destRegIdx[idx] = renamed_dest;
- _prevDestRegIdx[idx] = previous_rename;
- }
-
- /** Renames a source logical register to the physical register which
- * has/will produce that logical register's result.
- * @todo: add in whether or not the source register is ready.
- */
- void renameSrcReg(int idx, PhysRegIndex renamed_src)
- {
- _srcRegIdx[idx] = renamed_src;
- }
-
public:
/** Calculates EA part of a memory instruction. Currently unused,
* though it may be useful in the future if we want to split
diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh
index 5bc01b9b3..c0f9ae771 100755
--- a/src/cpu/o3/mips/dyn_inst_impl.hh
+++ b/src/cpu/o3/mips/dyn_inst_impl.hh
@@ -31,9 +31,11 @@
#include "cpu/o3/mips/dyn_inst.hh"
template <class Impl>
-MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
- InstSeqNum seq_num, O3CPU *cpu)
- : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst,
+ Addr PC, Addr NPC,
+ Addr Pred_PC, Addr Pred_NPC,
+ InstSeqNum seq_num, O3CPU *cpu)
+ : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
{
initVars();
}
@@ -53,11 +55,11 @@ MipsDynInst<Impl>::initVars()
// as the normal register entries. It will allow the IQ to work
// without any modifications.
for (int i = 0; i < this->staticInst->numDestRegs(); i++) {
- _destRegIdx[i] = this->staticInst->destRegIdx(i);
+ this->_destRegIdx[i] = this->staticInst->destRegIdx(i);
}
for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
- _srcRegIdx[i] = this->staticInst->srcRegIdx(i);
+ this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i);
this->_readySrcRegIdx[i] = 0;
}
}
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 177b9cb87..6b4628f92 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -411,6 +411,14 @@ class DefaultRename
/** Whether or not rename needs to block this cycle. */
bool blockThisCycle;
+ /** Whether or not rename needs to resume a serialize instruction
+ * after squashing. */
+ bool resumeSerialize;
+
+ /** Whether or not rename needs to resume clearing out the skidbuffer
+ * after squashing. */
+ bool resumeUnblocking;
+
/** The number of threads active in rename. */
unsigned numThreads;
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 3a8e503a0..e303f1cee 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -31,6 +31,8 @@
#include <list>
+#include "arch/isa_traits.hh"
+#include "arch/regfile.hh"
#include "config/full_system.hh"
#include "cpu/o3/rename.hh"
@@ -41,6 +43,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
commitToRenameDelay(params->commitToRenameDelay),
renameWidth(params->renameWidth),
commitWidth(params->commitWidth),
+ resumeSerialize(false),
+ resumeUnblocking(false),
numThreads(params->numberOfThreads),
maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
{
@@ -332,12 +336,22 @@ DefaultRename<Impl>::squash(const InstSeqNum &squash_seq_num, unsigned tid)
// If it still needs to block, the blocking should happen the next
// cycle and there should be space to hold everything due to the squash.
if (renameStatus[tid] == Blocked ||
- renameStatus[tid] == Unblocking ||
- renameStatus[tid] == SerializeStall) {
-
+ renameStatus[tid] == Unblocking) {
toDecode->renameUnblock[tid] = 1;
+ resumeSerialize = false;
serializeInst[tid] = NULL;
+ } else if (renameStatus[tid] == SerializeStall) {
+ if (serializeInst[tid]->seqNum <= squash_seq_num) {
+ DPRINTF(Rename, "Rename will resume serializing after squash\n");
+ resumeSerialize = true;
+ assert(serializeInst[tid]);
+ } else {
+ resumeSerialize = false;
+ toDecode->renameUnblock[tid] = 1;
+
+ serializeInst[tid] = NULL;
+ }
}
// Set the status to Squashing.
@@ -392,6 +406,9 @@ DefaultRename<Impl>::squash(const InstSeqNum &squash_seq_num, unsigned tid)
}
slist_it++;
}
+ resumeUnblocking = (skidBuffer[tid].size() != 0);
+ DPRINTF(Rename, "Resume unblocking set to %s\n",
+ resumeUnblocking ? "true" : "false");
#else
skidBuffer[tid].clear();
#endif
@@ -476,6 +493,20 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
++renameSquashCycles;
} else if (renameStatus[tid] == SerializeStall) {
++renameSerializeStallCycles;
+ // If we are currently in SerializeStall and resumeSerialize
+ // was set, then that means that we are resuming serializing
+ // this cycle. Tell the previous stages to block.
+ if (resumeSerialize) {
+ resumeSerialize = false;
+ block(tid);
+ toDecode->renameUnblock[tid] = false;
+ }
+ } else if (renameStatus[tid] == Unblocking) {
+ if (resumeUnblocking) {
+ block(tid);
+ resumeUnblocking = false;
+ toDecode->renameUnblock[tid] = false;
+ }
}
if (renameStatus[tid] == Running ||
@@ -741,7 +772,17 @@ DefaultRename<Impl>::skidInsert(unsigned tid)
}
if (skidBuffer[tid].size() > skidBufferMax)
+ {
+ typename InstQueue::iterator it;
+ warn("Skidbuffer contents:\n");
+ for(it = skidBuffer[tid].begin(); it != skidBuffer[tid].end(); it++)
+ {
+ warn("[tid:%u]: %s [sn:%i].\n", tid,
+ (*it)->staticInst->disassemble(inst->readPC()),
+ (*it)->seqNum);
+ }
panic("Skidbuffer Exceeded Max Size");
+ }
}
template <class Impl>
@@ -830,7 +871,10 @@ DefaultRename<Impl>::block(unsigned tid)
// Only signal backwards to block if the previous stages do not think
// rename is already blocked.
if (renameStatus[tid] != Blocked) {
- if (renameStatus[tid] != Unblocking) {
+ // If resumeUnblocking is set, we unblocked during the squash,
+ // but now we're have unblocking status. We need to tell earlier
+ // stages to block.
+ if (resumeUnblocking || renameStatus[tid] != Unblocking) {
toDecode->renameBlock[tid] = true;
toDecode->renameUnblock[tid] = false;
wroteToTimeBuffer = true;
@@ -963,13 +1007,19 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
// Will need to mark dependencies though.
for (int src_idx = 0; src_idx < num_src_regs; src_idx++) {
RegIndex src_reg = inst->srcRegIdx(src_idx);
+ RegIndex flat_src_reg = src_reg;
+ if (src_reg < TheISA::FP_Base_DepTag) {
+ flat_src_reg = TheISA::flattenIntIndex(inst->tcBase(), src_reg);
+ DPRINTF(Rename, "Flattening index %d to %d.\n", (int)src_reg, (int)flat_src_reg);
+ }
+ inst->flattenSrcReg(src_idx, flat_src_reg);
// Look up the source registers to get the phys. register they've
// been renamed to, and set the sources to those registers.
- PhysRegIndex renamed_reg = renameMap[tid]->lookup(src_reg);
+ PhysRegIndex renamed_reg = renameMap[tid]->lookup(flat_src_reg);
DPRINTF(Rename, "[tid:%u]: Looking up arch reg %i, got "
- "physical reg %i.\n", tid, (int)src_reg,
+ "physical reg %i.\n", tid, (int)flat_src_reg,
(int)renamed_reg);
inst->renameSrcReg(src_idx, renamed_reg);
@@ -996,20 +1046,27 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
// Rename the destination registers.
for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) {
RegIndex dest_reg = inst->destRegIdx(dest_idx);
+ RegIndex flat_dest_reg = dest_reg;
+ if (dest_reg < TheISA::FP_Base_DepTag) {
+ flat_dest_reg = TheISA::flattenIntIndex(inst->tcBase(), dest_reg);
+ DPRINTF(Rename, "Flattening index %d to %d.\n", (int)dest_reg, (int)flat_dest_reg);
+ }
+
+ inst->flattenDestReg(dest_idx, flat_dest_reg);
// Get the physical register that the destination will be
// renamed to.
- rename_result = renameMap[tid]->rename(dest_reg);
+ rename_result = renameMap[tid]->rename(flat_dest_reg);
//Mark Scoreboard entry as not ready
scoreboard->unsetReg(rename_result.first);
DPRINTF(Rename, "[tid:%u]: Renaming arch reg %i to physical "
- "reg %i.\n", tid, (int)dest_reg,
+ "reg %i.\n", tid, (int)flat_dest_reg,
(int)rename_result.first);
// Record the rename information so that a history can be kept.
- RenameHistory hb_entry(inst->seqNum, dest_reg,
+ RenameHistory hb_entry(inst->seqNum, flat_dest_reg,
rename_result.first,
rename_result.second);
@@ -1233,12 +1290,24 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
if (renameStatus[tid] == Squashing) {
// Switch status to running if rename isn't being told to block or
// squash this cycle.
- DPRINTF(Rename, "[tid:%u]: Done squashing, switching to running.\n",
- tid);
+ if (resumeSerialize) {
+ DPRINTF(Rename, "[tid:%u]: Done squashing, switching to serialize.\n",
+ tid);
- renameStatus[tid] = Running;
+ renameStatus[tid] = SerializeStall;
+ return true;
+ } else if (resumeUnblocking) {
+ DPRINTF(Rename, "[tid:%u]: Done squashing, switching to unblocking.\n",
+ tid);
+ renameStatus[tid] = Unblocking;
+ return true;
+ } else {
+ DPRINTF(Rename, "[tid:%u]: Done squashing, switching to running.\n",
+ tid);
- return false;
+ renameStatus[tid] = Running;
+ return false;
+ }
}
if (renameStatus[tid] == SerializeStall) {
diff --git a/src/cpu/o3/sparc/cpu.hh b/src/cpu/o3/sparc/cpu.hh
index c4df79832..08ebd2710 100644
--- a/src/cpu/o3/sparc/cpu.hh
+++ b/src/cpu/o3/sparc/cpu.hh
@@ -37,12 +37,6 @@
#include "cpu/o3/cpu.hh"
#include "sim/byteswap.hh"
-namespace TheISA
-{
- class ITB;
- class DTB;
-}
-
class EndQuiesceEvent;
namespace Kernel {
class Statistics;
@@ -61,14 +55,6 @@ class TranslatingPort;
template <class Impl>
class SparcO3CPU : public FullO3CPU<Impl>
{
- protected:
- typedef TheISA::IntReg IntReg;
- typedef TheISA::FloatReg FloatReg;
- typedef TheISA::FloatRegBits FloatRegBits;
- typedef TheISA::MiscReg MiscReg;
- typedef TheISA::RegFile RegFile;
- typedef TheISA::MiscRegFile MiscRegFile;
-
public:
typedef O3ThreadState<Impl> ImplState;
typedef O3ThreadState<Impl> Thread;
@@ -77,13 +63,6 @@ class SparcO3CPU : public FullO3CPU<Impl>
/** Constructs an AlphaO3CPU with the given parameters. */
SparcO3CPU(Params *params);
-#if FULL_SYSTEM
- /** ITB pointer. */
- SparcISA::ITB *itb;
- /** DTB pointer. */
- SparcISA::DTB *dtb;
-#endif
-
/** Registers statistics. */
void regStats();
@@ -91,19 +70,19 @@ class SparcO3CPU : public FullO3CPU<Impl>
/** Translates instruction requestion. */
Fault translateInstReq(RequestPtr &req, Thread *thread)
{
- return itb->translate(req, thread->getTC());
+ return this->itb->translate(req, thread->getTC());
}
/** Translates data read request. */
Fault translateDataReadReq(RequestPtr &req, Thread *thread)
{
- return dtb->translate(req, thread->getTC(), false);
+ return this->dtb->translate(req, thread->getTC(), false);
}
/** Translates data write request. */
Fault translateDataWriteReq(RequestPtr &req, Thread *thread)
{
- return dtb->translate(req, thread->getTC(), true);
+ return this->dtb->translate(req, thread->getTC(), true);
}
#else
@@ -127,20 +106,21 @@ class SparcO3CPU : public FullO3CPU<Impl>
#endif
/** Reads a miscellaneous register. */
- MiscReg readMiscReg(int misc_reg, unsigned tid);
+ TheISA::MiscReg readMiscReg(int misc_reg, unsigned tid);
/** Reads a misc. register, including any side effects the read
* might have as defined by the architecture.
*/
- MiscReg readMiscRegWithEffect(int misc_reg, unsigned tid);
+ TheISA::MiscReg readMiscRegWithEffect(int misc_reg, unsigned tid);
/** Sets a miscellaneous register. */
- void setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
+ void setMiscReg(int misc_reg, const TheISA::MiscReg &val, unsigned tid);
/** Sets a misc. register, including any side effects the write
* might have as defined by the architecture.
*/
- void setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
+ void setMiscRegWithEffect(int misc_reg, const TheISA::MiscReg &val,
+ unsigned tid);
/** Initiates a squash of all in-flight instructions for a given
* thread. The source of the squash is an external update of
@@ -175,10 +155,10 @@ class SparcO3CPU : public FullO3CPU<Impl>
*/
void syscall(int64_t callnum, int tid);
/** Gets a syscall argument. */
- IntReg getSyscallArg(int i, int tid);
+ TheISA::IntReg getSyscallArg(int i, int tid);
/** Used to shift args for indirect syscall. */
- void setSyscallArg(int i, IntReg val, int tid);
+ void setSyscallArg(int i, TheISA::IntReg val, int tid);
/** Sets the return value of a syscall. */
void setSyscallReturn(SyscallReturn return_value, int tid);
@@ -204,4 +184,4 @@ class SparcO3CPU : public FullO3CPU<Impl>
bool lockFlag;
};
-#endif // __CPU_O3_ALPHA_CPU_HH__
+#endif // __CPU_O3_SPARC_CPU_HH__
diff --git a/src/cpu/o3/sparc/cpu_builder.cc b/src/cpu/o3/sparc/cpu_builder.cc
index 81f419ee0..3cac89bad 100644
--- a/src/cpu/o3/sparc/cpu_builder.cc
+++ b/src/cpu/o3/sparc/cpu_builder.cc
@@ -55,8 +55,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
#if FULL_SYSTEM
SimObjectParam<System *> system;
Param<int> cpu_id;
- SimObjectParam<AlphaISA::ITB *> itb;
- SimObjectParam<AlphaISA::DTB *> dtb;
+ SimObjectParam<SparcISA::ITB *> itb;
+ SimObjectParam<SparcISA::DTB *> dtb;
Param<Tick> profile;
Param<bool> do_quiesce;
diff --git a/src/cpu/o3/sparc/cpu_impl.hh b/src/cpu/o3/sparc/cpu_impl.hh
index 536a620bf..4a194cbda 100644
--- a/src/cpu/o3/sparc/cpu_impl.hh
+++ b/src/cpu/o3/sparc/cpu_impl.hh
@@ -55,12 +55,7 @@
#endif
template <class Impl>
-SparcO3CPU<Impl>::SparcO3CPU(Params *params)
-#if FULL_SYSTEM
- : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb)
-#else
- : FullO3CPU<Impl>(params)
-#endif
+SparcO3CPU<Impl>::SparcO3CPU(Params *params) : FullO3CPU<Impl>(params)
{
DPRINTF(O3CPU, "Creating SparcO3CPU object.\n");
@@ -172,15 +167,16 @@ SparcO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, unsigned tid)
template <class Impl>
void
-SparcO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid)
+SparcO3CPU<Impl>::setMiscReg(int misc_reg,
+ const SparcISA::MiscReg &val, unsigned tid)
{
this->regFile.setMiscReg(misc_reg, val, tid);
}
template <class Impl>
void
-SparcO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val,
- unsigned tid)
+SparcO3CPU<Impl>::setMiscRegWithEffect(int misc_reg,
+ const SparcISA::MiscReg &val, unsigned tid)
{
this->regFile.setMiscRegWithEffect(misc_reg, val, tid);
}
@@ -285,35 +281,24 @@ template <class Impl>
TheISA::IntReg
SparcO3CPU<Impl>::getSyscallArg(int i, int tid)
{
- return this->readArchIntReg(SparcISA::ArgumentReg0 + i, tid);
+ TheISA::IntReg idx = TheISA::flattenIntIndex(this->tcBase(tid),
+ SparcISA::ArgumentReg0 + i);
+ return this->readArchIntReg(idx, tid);
}
template <class Impl>
void
-SparcO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid)
+SparcO3CPU<Impl>::setSyscallArg(int i, TheISA::IntReg val, int tid)
{
- this->setArchIntReg(SparcISA::ArgumentReg0 + i, val, tid);
+ TheISA::IntReg idx = TheISA::flattenIntIndex(this->tcBase(tid),
+ SparcISA::ArgumentReg0 + i);
+ this->setArchIntReg(idx, val, tid);
}
template <class Impl>
void
SparcO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
{
- // check for error condition. SPARC syscall convention is to
- // indicate success/failure in reg the carry bit of the ccr
- // and put the return value itself in the standard return value reg ().
- if (return_value.successful()) {
- // no error, clear XCC.C
- this->setMiscReg(SparcISA::MISCREG_CCR,
- this->readMiscReg(SparcISA::MISCREG_CCR, tid) & 0xEE, tid);
- this->setArchIntReg(SparcISA::ReturnValueReg,
- return_value.value(), tid);
- } else {
- // got an error, set XCC.C
- this->setMiscReg(SparcISA::MISCREG_CCR,
- this->readMiscReg(SparcISA::MISCREG_CCR, tid) | 0x11, tid);
- this->setArchIntReg(SparcISA::ReturnValueReg,
- return_value.value(), tid);
- }
+ TheISA::setSyscallReturn(return_value, this->tcBase(tid));
}
#endif
diff --git a/src/cpu/o3/sparc/dyn_inst.hh b/src/cpu/o3/sparc/dyn_inst.hh
index f8d6bb63f..e95ae2fd5 100644
--- a/src/cpu/o3/sparc/dyn_inst.hh
+++ b/src/cpu/o3/sparc/dyn_inst.hh
@@ -32,6 +32,7 @@
#define __CPU_O3_SPARC_DYN_INST_HH__
#include "arch/sparc/isa_traits.hh"
+#include "arch/sparc/types.hh"
#include "cpu/base_dyn_inst.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/sparc/cpu.hh"
@@ -55,8 +56,8 @@ class SparcDynInst : public BaseDynInst<Impl>
public:
/** BaseDynInst constructor given a binary instruction. */
- SparcDynInst(TheISA::ExtMachInst inst, Addr PC,
- Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu);
+ SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
+ Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu);
/** BaseDynInst constructor given a static inst pointer. */
SparcDynInst(StaticInstPtr &_staticInst);
@@ -105,6 +106,45 @@ class SparcDynInst : public BaseDynInst<Impl>
this->threadNumber);
}
+ /** Reads a miscellaneous register. */
+ TheISA::MiscReg readMiscRegOperand(const StaticInst *si, int idx)
+ {
+ return this->cpu->readMiscReg(
+ si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+ this->threadNumber);
+ }
+
+ /** Reads a misc. register, including any side-effects the read
+ * might have as defined by the architecture.
+ */
+ TheISA::MiscReg readMiscRegOperandWithEffect(const StaticInst *si, int idx)
+ {
+ return this->cpu->readMiscRegWithEffect(
+ si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+ this->threadNumber);
+ }
+
+ /** Sets a misc. register. */
+ void setMiscRegOperand(const StaticInst * si,
+ int idx, const TheISA::MiscReg &val)
+ {
+ this->instResult.integer = val;
+ return this->cpu->setMiscReg(
+ si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+ val, this->threadNumber);
+ }
+
+ /** Sets a misc. register, including any side-effects the write
+ * might have as defined by the architecture.
+ */
+ void setMiscRegOperandWithEffect(
+ const StaticInst *si, int idx, const TheISA::MiscReg &val)
+ {
+ return this->cpu->setMiscRegWithEffect(
+ si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+ val, this->threadNumber);
+ }
+
#if FULL_SYSTEM
/** Calls hardware return from error interrupt. */
Fault hwrei();
@@ -116,22 +156,6 @@ class SparcDynInst : public BaseDynInst<Impl>
void syscall(int64_t callnum);
#endif
- private:
- /** Physical register index of the destination registers of this
- * instruction.
- */
- PhysRegIndex _destRegIdx[TheISA::MaxInstDestRegs];
-
- /** Physical register index of the source registers of this
- * instruction.
- */
- PhysRegIndex _srcRegIdx[TheISA::MaxInstSrcRegs];
-
- /** Physical register index of the previous producers of the
- * architected destinations.
- */
- PhysRegIndex _prevDestRegIdx[TheISA::MaxInstDestRegs];
-
public:
// The register accessor methods provide the index of the
@@ -145,108 +169,70 @@ class SparcDynInst : public BaseDynInst<Impl>
// storage (which is pretty hard to imagine they would have reason
// to do).
- uint64_t readIntReg(const StaticInst *si, int idx)
+ uint64_t readIntRegOperand(const StaticInst *si, int idx)
{
- return this->cpu->readIntReg(_srcRegIdx[idx]);
+ uint64_t val = this->cpu->readIntReg(this->_srcRegIdx[idx]);
+ DPRINTF(Sparc, "Reading int reg %d (%d, %d) as %x\n", (int)this->_flatSrcRegIdx[idx], (int)this->_srcRegIdx[idx], idx, val);
+ return this->cpu->readIntReg(this->_srcRegIdx[idx]);
}
- TheISA::FloatReg readFloatReg(const StaticInst *si, int idx, int width)
+ TheISA::FloatReg readFloatRegOperand(const StaticInst *si,
+ int idx, int width)
{
- return this->cpu->readFloatReg(_srcRegIdx[idx], width);
+ return this->cpu->readFloatReg(this->_srcRegIdx[idx], width);
}
- TheISA::FloatReg readFloatReg(const StaticInst *si, int idx)
+ TheISA::FloatReg readFloatRegOperand(const StaticInst *si, int idx)
{
- return this->cpu->readFloatReg(_srcRegIdx[idx]);
+ return this->cpu->readFloatReg(this->_srcRegIdx[idx]);
}
- TheISA::FloatRegBits readFloatRegBits(const StaticInst *si,
+ TheISA::FloatRegBits readFloatRegOperandBits(const StaticInst *si,
int idx, int width)
{
- return this->cpu->readFloatRegBits(_srcRegIdx[idx], width);
+ return this->cpu->readFloatRegBits(this->_srcRegIdx[idx], width);
}
- TheISA::FloatRegBits readFloatRegBits(const StaticInst *si, int idx)
+ TheISA::FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
{
- return this->cpu->readFloatRegBits(_srcRegIdx[idx]);
+ return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]);
}
/** @todo: Make results into arrays so they can handle multiple dest
* registers.
*/
- void setIntReg(const StaticInst *si, int idx, uint64_t val)
+ void setIntRegOperand(const StaticInst *si, int idx, uint64_t val)
{
- this->cpu->setIntReg(_destRegIdx[idx], val);
- BaseDynInst<Impl>::setIntReg(si, idx, val);
+ DPRINTF(Sparc, "Setting int reg %d (%d, %d) to %x\n", (int)this->_flatDestRegIdx[idx], (int)this->_destRegIdx[idx], idx, val);
+ this->cpu->setIntReg(this->_destRegIdx[idx], val);
+ BaseDynInst<Impl>::setIntRegOperand(si, idx, val);
}
- void setFloatReg(const StaticInst *si, int idx,
+ void setFloatRegOperand(const StaticInst *si, int idx,
TheISA::FloatReg val, int width)
{
- this->cpu->setFloatReg(_destRegIdx[idx], val, width);
- BaseDynInst<Impl>::setFloatReg(si, idx, val, width);
+ this->cpu->setFloatReg(this->_destRegIdx[idx], val, width);
+ BaseDynInst<Impl>::setFloatRegOperand(si, idx, val, width);
}
- void setFloatReg(const StaticInst *si, int idx, TheISA::FloatReg val)
+ void setFloatRegOperand(const StaticInst *si, int idx, TheISA::FloatReg val)
{
- this->cpu->setFloatReg(_destRegIdx[idx], val);
- BaseDynInst<Impl>::setFloatReg(si, idx, val);
+ this->cpu->setFloatReg(this->_destRegIdx[idx], val);
+ BaseDynInst<Impl>::setFloatRegOperand(si, idx, val);
}
- void setFloatRegBits(const StaticInst *si, int idx,
+ void setFloatRegOperandBits(const StaticInst *si, int idx,
TheISA::FloatRegBits val, int width)
{
- this->cpu->setFloatRegBits(_destRegIdx[idx], val, width);
- BaseDynInst<Impl>::setFloatRegBits(si, idx, val);
+ this->cpu->setFloatRegBits(this->_destRegIdx[idx], val, width);
+ BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
}
- void setFloatRegBits(const StaticInst *si,
+ void setFloatRegOperandBits(const StaticInst *si,
int idx, TheISA::FloatRegBits val)
{
- this->cpu->setFloatRegBits(_destRegIdx[idx], val);
- BaseDynInst<Impl>::setFloatRegBits(si, idx, val);
- }
-
- /** Returns the physical register index of the i'th destination
- * register.
- */
- PhysRegIndex renamedDestRegIdx(int idx) const
- {
- return _destRegIdx[idx];
- }
-
- /** Returns the physical register index of the i'th source register. */
- PhysRegIndex renamedSrcRegIdx(int idx) const
- {
- return _srcRegIdx[idx];
- }
-
- /** Returns the physical register index of the previous physical register
- * that remapped to the same logical register index.
- */
- PhysRegIndex prevDestRegIdx(int idx) const
- {
- return _prevDestRegIdx[idx];
- }
-
- /** Renames a destination register to a physical register. Also records
- * the previous physical register that the logical register mapped to.
- */
- void renameDestReg(int idx,
- PhysRegIndex renamed_dest,
- PhysRegIndex previous_rename)
- {
- _destRegIdx[idx] = renamed_dest;
- _prevDestRegIdx[idx] = previous_rename;
- }
-
- /** Renames a source logical register to the physical register which
- * has/will produce that logical register's result.
- * @todo: add in whether or not the source register is ready.
- */
- void renameSrcReg(int idx, PhysRegIndex renamed_src)
- {
- _srcRegIdx[idx] = renamed_src;
+ this->cpu->setFloatRegBits(this->_destRegIdx[idx], val);
+ BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
}
public:
diff --git a/src/cpu/o3/sparc/dyn_inst_impl.hh b/src/cpu/o3/sparc/dyn_inst_impl.hh
index 210daace2..c4d30b6f4 100644
--- a/src/cpu/o3/sparc/dyn_inst_impl.hh
+++ b/src/cpu/o3/sparc/dyn_inst_impl.hh
@@ -31,9 +31,10 @@
#include "cpu/o3/sparc/dyn_inst.hh"
template <class Impl>
-SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst, Addr PC,
- Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu)
- : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
+ Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC,
+ InstSeqNum seq_num, O3CPU *cpu)
+ : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
{
initVars();
}
@@ -53,11 +54,11 @@ SparcDynInst<Impl>::initVars()
// as the normal register entries. It will allow the IQ to work
// without any modifications.
for (int i = 0; i < this->staticInst->numDestRegs(); i++) {
- _destRegIdx[i] = this->staticInst->destRegIdx(i);
+ this->_destRegIdx[i] = this->staticInst->destRegIdx(i);
}
for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
- _srcRegIdx[i] = this->staticInst->srcRegIdx(i);
+ this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i);
this->_readySrcRegIdx[i] = 0;
}
}
@@ -126,7 +127,8 @@ template <class Impl>
bool
SparcDynInst<Impl>::simPalCheck(int palFunc)
{
- return this->cpu->simPalCheck(palFunc, this->threadNumber);
+ panic("simPalCheck called, but there's no PAL in SPARC!\n");
+ return false;
}
#else
template <class Impl>
diff --git a/src/cpu/o3/sparc/thread_context.hh b/src/cpu/o3/sparc/thread_context.hh
index 69d1e2d04..7497959e4 100644
--- a/src/cpu/o3/sparc/thread_context.hh
+++ b/src/cpu/o3/sparc/thread_context.hh
@@ -36,12 +36,6 @@ class SparcTC : public O3ThreadContext<Impl>
{
public:
#if FULL_SYSTEM
- /** Returns a pointer to the ITB. */
- virtual SparcISA::ITB *getITBPtr() { return this->cpu->itb; }
-
- /** Returns a pointer to the DTB. */
- virtual SparcISA::DTB *getDTBPtr() { return this->cpu->dtb; }
-
/** Returns pointer to the quiesce event. */
virtual EndQuiesceEvent *getQuiesceEvent()
{
@@ -62,7 +56,7 @@ class SparcTC : public O3ThreadContext<Impl>
virtual void changeRegFileContext(TheISA::RegContextParam param,
TheISA::RegContextVal val)
{
- panic("This doesn't make sense!\n");
+ //XXX Ignore this for now. This -really- needs to get fixed.
}
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index 390569c3d..4987d6eb4 100755
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -66,6 +66,14 @@ class O3ThreadContext : public ThreadContext
/** Pointer to the thread state that this TC corrseponds to. */
O3ThreadState<Impl> *thread;
+#if FULL_SYSTEM
+ /** Returns a pointer to the ITB. */
+ TheISA::ITB *getITBPtr() { return cpu->itb; }
+
+ /** Returns a pointer to the DTB. */
+ TheISA::DTB *getDTBPtr() { return cpu->dtb; }
+#endif
+
/** Returns a pointer to this CPU. */
virtual BaseCPU *getCpuPtr() { return cpu; }
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index afebf294f..af98fa1f5 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -29,6 +29,7 @@
* Korey Sewell
*/
+#include "arch/regfile.hh"
#include "cpu/o3/thread_context.hh"
#include "cpu/quiesce_event.hh"
@@ -305,6 +306,7 @@ template <class Impl>
uint64_t
O3ThreadContext<Impl>::readIntReg(int reg_idx)
{
+ reg_idx = TheISA::flattenIntIndex(this, reg_idx);
return cpu->readArchIntReg(reg_idx, thread->readTid());
}
@@ -349,6 +351,7 @@ template <class Impl>
void
O3ThreadContext<Impl>::setIntReg(int reg_idx, uint64_t val)
{
+ reg_idx = TheISA::flattenIntIndex(this, reg_idx);
cpu->setArchIntReg(reg_idx, val, thread->readTid());
// Squash if we're not already in a state update mode.