summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/alpha/dyn_inst.hh9
-rw-r--r--src/cpu/o3/alpha/dyn_inst_impl.hh19
-rw-r--r--src/cpu/o3/comm.hh26
-rw-r--r--src/cpu/o3/commit.hh34
-rw-r--r--src/cpu/o3/commit_impl.hh86
-rw-r--r--src/cpu/o3/cpu.cc40
-rw-r--r--src/cpu/o3/cpu.hh25
-rw-r--r--src/cpu/o3/decode_impl.hh69
-rw-r--r--src/cpu/o3/fetch.hh20
-rw-r--r--src/cpu/o3/fetch_impl.hh227
-rw-r--r--src/cpu/o3/free_list.hh2
-rw-r--r--src/cpu/o3/iew.hh3
-rw-r--r--src/cpu/o3/iew_impl.hh75
-rw-r--r--src/cpu/o3/inst_queue_impl.hh4
-rwxr-xr-xsrc/cpu/o3/mips/dyn_inst.hh10
-rwxr-xr-xsrc/cpu/o3/mips/dyn_inst_impl.hh18
-rw-r--r--src/cpu/o3/regfile.hh4
-rw-r--r--src/cpu/o3/rename_impl.hh55
-rw-r--r--src/cpu/o3/rename_map.cc4
-rw-r--r--src/cpu/o3/scoreboard.cc15
-rw-r--r--src/cpu/o3/sparc/cpu_impl.hh5
-rw-r--r--src/cpu/o3/sparc/dyn_inst.hh10
-rw-r--r--src/cpu/o3/sparc/dyn_inst_impl.hh17
23 files changed, 334 insertions, 443 deletions
diff --git a/src/cpu/o3/alpha/dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh
index 20759d849..a6fb7b885 100644
--- a/src/cpu/o3/alpha/dyn_inst.hh
+++ b/src/cpu/o3/alpha/dyn_inst.hh
@@ -73,8 +73,13 @@ class AlphaDynInst : public BaseDynInst<Impl>
public:
/** BaseDynInst constructor given a binary instruction. */
- AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
- Addr Pred_PC, Addr Pred_NPC,
+ AlphaDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+ InstSeqNum seq_num, O3CPU *cpu);
+
+ /** BaseDynInst constructor given a binary instruction. */
+ AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
InstSeqNum seq_num, O3CPU *cpu);
/** BaseDynInst constructor given a static inst pointer. */
diff --git a/src/cpu/o3/alpha/dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh
index fdce1ade5..6dfe0ccdd 100644
--- a/src/cpu/o3/alpha/dyn_inst_impl.hh
+++ b/src/cpu/o3/alpha/dyn_inst_impl.hh
@@ -31,10 +31,25 @@
#include "cpu/o3/alpha/dyn_inst.hh"
template <class Impl>
-AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
+AlphaDynInst<Impl>::AlphaDynInst(StaticInstPtr staticInst,
+ Addr PC, Addr NPC, Addr microPC,
Addr Pred_PC, Addr Pred_NPC,
+ Addr Pred_MicroPC,
InstSeqNum seq_num, O3CPU *cpu)
- : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
+ : BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
+ Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
+{
+ initVars();
+}
+
+template <class Impl>
+AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst,
+ Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC,
+ Addr Pred_MicroPC,
+ InstSeqNum seq_num, O3CPU *cpu)
+ : BaseDynInst<Impl>(inst, PC, NPC, microPC,
+ Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
{
initVars();
}
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index d96919007..fb772060b 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -87,10 +87,10 @@ struct DefaultIEWDefaultCommit {
bool squash[Impl::MaxThreads];
bool branchMispredict[Impl::MaxThreads];
bool branchTaken[Impl::MaxThreads];
- bool squashDelaySlot[Impl::MaxThreads];
- uint64_t mispredPC[Impl::MaxThreads];
- uint64_t nextPC[Impl::MaxThreads];
- uint64_t nextNPC[Impl::MaxThreads];
+ Addr mispredPC[Impl::MaxThreads];
+ Addr nextPC[Impl::MaxThreads];
+ Addr nextNPC[Impl::MaxThreads];
+ Addr nextMicroPC[Impl::MaxThreads];
InstSeqNum squashedSeqNum[Impl::MaxThreads];
bool includeSquashInst[Impl::MaxThreads];
@@ -114,15 +114,15 @@ struct TimeBufStruct {
uint64_t branchAddr;
InstSeqNum doneSeqNum;
- InstSeqNum bdelayDoneSeqNum;
// @todo: Might want to package this kind of branch stuff into a single
// struct as it is used pretty frequently.
bool branchMispredict;
bool branchTaken;
- uint64_t mispredPC;
- uint64_t nextPC;
- uint64_t nextNPC;
+ Addr mispredPC;
+ Addr nextPC;
+ Addr nextNPC;
+ Addr nextMicroPC;
unsigned branchCount;
};
@@ -160,18 +160,16 @@ struct TimeBufStruct {
bool branchMispredict;
bool branchTaken;
- uint64_t mispredPC;
- uint64_t nextPC;
- uint64_t nextNPC;
+ Addr mispredPC;
+ Addr nextPC;
+ Addr nextNPC;
+ Addr nextMicroPC;
// Represents the instruction that has either been retired or
// squashed. Similar to having a single bus that broadcasts the
// retired or squashed sequence number.
InstSeqNum doneSeqNum;
- InstSeqNum bdelayDoneSeqNum;
- bool squashDelaySlot;
-
//Just in case we want to do a commit/squash on a cycle
//(necessary for multiple ROBs?)
bool commitInsts;
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index fba618c14..27bdd20c5 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -279,25 +279,37 @@ class DefaultCommit
/** Returns the PC of the head instruction of the ROB.
* @todo: Probably remove this function as it returns only thread 0.
*/
- uint64_t readPC() { return PC[0]; }
+ Addr readPC() { return PC[0]; }
/** Returns the PC of a specific thread. */
- uint64_t readPC(unsigned tid) { return PC[tid]; }
+ Addr readPC(unsigned tid) { return PC[tid]; }
/** Sets the PC of a specific thread. */
- void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
+ void setPC(Addr val, unsigned tid) { PC[tid] = val; }
+
+ /** Reads the micro PC of a specific thread. */
+ Addr readMicroPC(unsigned tid) { return microPC[tid]; }
+
+ /** Sets the micro PC of a specific thread */
+ void setMicroPC(Addr val, unsigned tid) { microPC[tid] = val; }
/** Reads the next PC of a specific thread. */
- uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
+ Addr readNextPC(unsigned tid) { return nextPC[tid]; }
/** Sets the next PC of a specific thread. */
- void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
+ void setNextPC(Addr val, unsigned tid) { nextPC[tid] = val; }
/** Reads the next NPC of a specific thread. */
- uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; }
+ Addr readNextNPC(unsigned tid) { return nextNPC[tid]; }
/** Sets the next NPC of a specific thread. */
- void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; }
+ void setNextNPC(Addr val, unsigned tid) { nextNPC[tid] = val; }
+
+ /** Reads the micro PC of a specific thread. */
+ Addr readNextMicroPC(unsigned tid) { return nextMicroPC[tid]; }
+
+ /** Sets the micro PC of a specific thread */
+ void setNextMicroPC(Addr val, unsigned tid) { nextMicroPC[tid] = val; }
private:
/** Time buffer interface. */
@@ -402,12 +414,20 @@ class DefaultCommit
*/
Addr PC[Impl::MaxThreads];
+ /** The commit micro PC of each thread. Refers to the instruction that
+ * is currently being processed/committed.
+ */
+ Addr microPC[Impl::MaxThreads];
+
/** The next PC of each thread. */
Addr nextPC[Impl::MaxThreads];
/** The next NPC of each thread. */
Addr nextNPC[Impl::MaxThreads];
+ /** The next micro PC of each thread. */
+ Addr nextMicroPC[Impl::MaxThreads];
+
/** The sequence number of the youngest valid instruction in the ROB. */
InstSeqNum youngestSeqNum[Impl::MaxThreads];
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index dd4c333d3..9411c6c62 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -124,7 +124,7 @@ DefaultCommit<Impl>::DefaultCommit(O3CPU *_cpu, Params *params)
committedStores[i] = false;
trapSquash[i] = false;
tcSquash[i] = false;
- PC[i] = nextPC[i] = nextNPC[i] = 0;
+ microPC[i] = nextMicroPC[i] = PC[i] = nextPC[i] = nextNPC[i] = 0;
}
#if FULL_SYSTEM
interrupt = NoFault;
@@ -508,6 +508,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
toIEW->commitInfo[tid].nextPC = PC[tid];
toIEW->commitInfo[tid].nextNPC = nextPC[tid];
+ toIEW->commitInfo[tid].nextMicroPC = nextMicroPC[tid];
}
template <class Impl>
@@ -738,38 +739,15 @@ DefaultCommit<Impl>::commit()
// then use one older sequence number.
InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
-#if ISA_HAS_DELAY_SLOT
- InstSeqNum bdelay_done_seq_num = squashed_inst;
- bool squash_bdelay_slot = fromIEW->squashDelaySlot[tid];
- bool branchMispredict = fromIEW->branchMispredict[tid];
-
- // Squashing/not squashing the branch delay slot only makes
- // sense when you're squashing from a branch, ie from a branch
- // mispredict.
- if (branchMispredict && !squash_bdelay_slot) {
- bdelay_done_seq_num++;
- }
-#endif
-
if (fromIEW->includeSquashInst[tid] == true) {
squashed_inst--;
-#if ISA_HAS_DELAY_SLOT
- bdelay_done_seq_num--;
-#endif
}
// All younger instructions will be squashed. Set the sequence
// number as the youngest instruction in the ROB.
youngestSeqNum[tid] = squashed_inst;
-#if ISA_HAS_DELAY_SLOT
- rob->squash(bdelay_done_seq_num, tid);
- toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot;
- toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num;
-#else
rob->squash(squashed_inst, tid);
- toIEW->commitInfo[tid].squashDelaySlot = true;
-#endif
changedROBNumEntries[tid] = true;
toIEW->commitInfo[tid].doneSeqNum = squashed_inst;
@@ -788,6 +766,7 @@ DefaultCommit<Impl>::commit()
toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid];
+ toIEW->commitInfo[tid].nextMicroPC = fromIEW->nextMicroPC[tid];
toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
@@ -806,10 +785,6 @@ DefaultCommit<Impl>::commit()
// Try to commit any instructions.
commitInsts();
- } else {
-#if ISA_HAS_DELAY_SLOT
- skidInsert();
-#endif
}
//Check for any activity
@@ -901,6 +876,7 @@ DefaultCommit<Impl>::commitInsts()
PC[tid] = head_inst->readPC();
nextPC[tid] = head_inst->readNextPC();
nextNPC[tid] = head_inst->readNextNPC();
+ nextMicroPC[tid] = head_inst->readNextMicroPC();
// Increment the total number of non-speculative instructions
// executed.
@@ -929,12 +905,10 @@ DefaultCommit<Impl>::commitInsts()
}
PC[tid] = nextPC[tid];
-#if ISA_HAS_DELAY_SLOT
nextPC[tid] = nextNPC[tid];
nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst);
-#else
- nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
-#endif
+ microPC[tid] = nextMicroPC[tid];
+ nextMicroPC[tid] = microPC[tid] + 1;
#if FULL_SYSTEM
int count = 0;
@@ -1161,37 +1135,13 @@ DefaultCommit<Impl>::getInsts()
{
DPRINTF(Commit, "Getting instructions from Rename stage.\n");
-#if ISA_HAS_DELAY_SLOT
- // Read any renamed instructions and place them into the ROB.
- int insts_to_process = std::min((int)renameWidth,
- (int)(fromRename->size + skidBuffer.size()));
- int rename_idx = 0;
-
- DPRINTF(Commit, "%i insts available to process. Rename Insts:%i "
- "SkidBuffer Insts:%i\n", insts_to_process, fromRename->size,
- skidBuffer.size());
-#else
// Read any renamed instructions and place them into the ROB.
int insts_to_process = std::min((int)renameWidth, fromRename->size);
-#endif
-
for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) {
DynInstPtr inst;
-#if ISA_HAS_DELAY_SLOT
- // Get insts from skidBuffer or from Rename
- if (skidBuffer.size() > 0) {
- DPRINTF(Commit, "Grabbing skidbuffer inst.\n");
- inst = skidBuffer.front();
- skidBuffer.pop();
- } else {
- DPRINTF(Commit, "Grabbing rename inst.\n");
- inst = fromRename->insts[rename_idx++];
- }
-#else
inst = fromRename->insts[inst_num];
-#endif
int tid = inst->threadNumber;
if (!inst->isSquashed() &&
@@ -1213,30 +1163,6 @@ DefaultCommit<Impl>::getInsts()
inst->readPC(), inst->seqNum, tid);
}
}
-
-#if ISA_HAS_DELAY_SLOT
- if (rename_idx < fromRename->size) {
- DPRINTF(Commit,"Placing Rename Insts into skidBuffer.\n");
-
- for (;
- rename_idx < fromRename->size;
- rename_idx++) {
- DynInstPtr inst = fromRename->insts[rename_idx];
-
- if (!inst->isSquashed()) {
- DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
- "skidBuffer.\n", inst->readPC(), inst->seqNum,
- inst->threadNumber);
- skidBuffer.push(inst);
- } else {
- DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
- "squashed, skipping.\n",
- inst->readPC(), inst->seqNum, inst->threadNumber);
- }
- }
- }
-#endif
-
}
template <class Impl>
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index a775b66d5..8e4c625df 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -696,7 +696,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
// Squash Throughout Pipeline
InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
- fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid);
+ fetch.squash(0, sizeof(TheISA::MachInst), 0, squash_seq_num, tid);
decode.squash(tid);
rename.squash(squash_seq_num, tid);
iew.squash(tid);
@@ -1152,6 +1152,20 @@ FullO3CPU<Impl>::setPC(Addr new_PC,unsigned tid)
template <class Impl>
uint64_t
+FullO3CPU<Impl>::readMicroPC(unsigned tid)
+{
+ return commit.readMicroPC(tid);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setMicroPC(Addr new_PC,unsigned tid)
+{
+ commit.setMicroPC(new_PC, tid);
+}
+
+template <class Impl>
+uint64_t
FullO3CPU<Impl>::readNextPC(unsigned tid)
{
return commit.readNextPC(tid);
@@ -1179,6 +1193,20 @@ FullO3CPU<Impl>::setNextNPC(uint64_t val,unsigned tid)
}
template <class Impl>
+uint64_t
+FullO3CPU<Impl>::readNextMicroPC(unsigned tid)
+{
+ return commit.readNextMicroPC(tid);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setNextMicroPC(Addr new_PC,unsigned tid)
+{
+ commit.setNextMicroPC(new_PC, tid);
+}
+
+template <class Impl>
typename FullO3CPU<Impl>::ListIt
FullO3CPU<Impl>::addInst(DynInstPtr &inst)
{
@@ -1226,9 +1254,7 @@ FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst)
template <class Impl>
void
-FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid,
- bool squash_delay_slot,
- const InstSeqNum &delay_slot_seq_num)
+FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid)
{
DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction"
" list.\n", tid);
@@ -1259,12 +1285,6 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid,
while (inst_it != end_it) {
assert(!instList.empty());
-#if ISA_HAS_DELAY_SLOT
- if(!squash_delay_slot &&
- delay_slot_seq_num >= (*inst_it)->seqNum) {
- break;
- }
-#endif
squashInstIt(inst_it, tid);
inst_it--;
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index e71d05c8e..bff78bf9e 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -433,22 +433,34 @@ class FullO3CPU : public BaseO3CPU
void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
/** Reads the commit PC of a specific thread. */
- uint64_t readPC(unsigned tid);
+ Addr readPC(unsigned tid);
/** Sets the commit PC of a specific thread. */
void setPC(Addr new_PC, unsigned tid);
+ /** Reads the commit micro PC of a specific thread. */
+ Addr readMicroPC(unsigned tid);
+
+ /** Sets the commmit micro PC of a specific thread. */
+ void setMicroPC(Addr new_microPC, unsigned tid);
+
/** Reads the next PC of a specific thread. */
- uint64_t readNextPC(unsigned tid);
+ Addr readNextPC(unsigned tid);
/** Sets the next PC of a specific thread. */
- void setNextPC(uint64_t val, unsigned tid);
+ void setNextPC(Addr val, unsigned tid);
/** Reads the next NPC of a specific thread. */
- uint64_t readNextNPC(unsigned tid);
+ Addr readNextNPC(unsigned tid);
/** Sets the next NPC of a specific thread. */
- void setNextNPC(uint64_t val, unsigned tid);
+ void setNextNPC(Addr val, unsigned tid);
+
+ /** Reads the commit next micro PC of a specific thread. */
+ Addr readNextMicroPC(unsigned tid);
+
+ /** Sets the commit next micro PC of a specific thread. */
+ void setNextMicroPC(Addr val, unsigned tid);
/** Function to add instruction onto the head of the list of the
* instructions. Used when new instructions are fetched.
@@ -468,8 +480,7 @@ class FullO3CPU : public BaseO3CPU
/** Remove all instructions that are not currently in the ROB.
* There's also an option to not squash delay slot instructions.*/
- void removeInstsNotInROB(unsigned tid, bool squash_delay_slot,
- const InstSeqNum &delay_slot_seq_num);
+ void removeInstsNotInROB(unsigned tid);
/** Remove all instructions younger than the given sequence number. */
void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 314864f94..ce6738456 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -49,8 +49,6 @@ DefaultDecode<Impl>::DefaultDecode(O3CPU *_cpu, Params *params)
stalls[i].rename = false;
stalls[i].iew = false;
stalls[i].commit = false;
-
- squashAfterDelaySlot[i] = false;
}
// @todo: Make into a parameter
@@ -275,20 +273,16 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
///explicitly for ISAs with delay slots.
toFetch->decodeInfo[tid].nextNPC =
inst->branchTarget() + sizeof(TheISA::MachInst);
+ toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC();
#if ISA_HAS_DELAY_SLOT
toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
(inst->readNextPC() + sizeof(TheISA::MachInst));
-
- toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid];
- squashAfterDelaySlot[tid] = false;
-
- InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid];
#else
toFetch->decodeInfo[tid].branchTaken =
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
+#endif
InstSeqNum squash_seq_num = inst->seqNum;
-#endif
// Might have to tell fetch to unblock.
if (decodeStatus[tid] == Blocked ||
@@ -309,30 +303,10 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
// Clear the instruction list and skid buffer in case they have any
// insts in them.
while (!insts[tid].empty()) {
-
-#if ISA_HAS_DELAY_SLOT
- if (insts[tid].front()->seqNum <= squash_seq_num) {
- DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode "
- "instructions before delay slot [sn:%i]. %i insts"
- "left in decode.\n", tid, squash_seq_num,
- insts[tid].size());
- break;
- }
-#endif
insts[tid].pop();
}
while (!skidBuffer[tid].empty()) {
-
-#if ISA_HAS_DELAY_SLOT
- if (skidBuffer[tid].front()->seqNum <= squash_seq_num) {
- DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer "
- "instructions before delay slot [sn:%i]. %i insts"
- "left in decode.\n", tid, squash_seq_num,
- insts[tid].size());
- break;
- }
-#endif
skidBuffer[tid].pop();
}
@@ -760,48 +734,13 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Might want to set some sort of boolean and just do
// a check at the end
-#if !ISA_HAS_DELAY_SLOT
squash(inst, inst->threadNumber);
Addr target = inst->branchTarget();
- inst->setPredTarg(target, target + sizeof(TheISA::MachInst));
+ //The micro pc after an instruction level branch should be 0
+ inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0);
break;
-#else
- // If mispredicted as taken, then ignore delay slot
- // instruction... else keep delay slot and squash
- // after it is sent to rename
- if (inst->readPredTaken() && inst->isCondDelaySlot()) {
- DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst."
- "[sn:%i] PC %#x mispredicted as taken.\n", tid,
- inst->seqNum, inst->PC);
- bdelayDoneSeqNum[tid] = inst->seqNum;
- squash(inst, inst->threadNumber);
- Addr target = inst->branchTarget();
- inst->setPredTarg(target,
- target + sizeof(TheISA::MachInst));
- break;
- } else {
- DPRINTF(Decode, "[tid:%i]: Misprediction detected at "
- "[sn:%i] PC %#x, will squash after delay slot "
- "inst. is sent to Rename\n",
- tid, inst->seqNum, inst->PC);
- bdelayDoneSeqNum[tid] = inst->seqNum + 1;
- squashAfterDelaySlot[tid] = true;
- squashInst[tid] = inst;
- continue;
- }
-#endif
}
}
-
- if (squashAfterDelaySlot[tid]) {
- assert(!inst->isSquashed());
- squash(squashInst[tid], squashInst[tid]->threadNumber);
- Addr target = squashInst[tid]->branchTarget();
- squashInst[tid]->setPredTarg(target,
- target + sizeof(TheISA::MachInst));
- assert(!inst->isSquashed());
- break;
- }
}
// If we didn't process all instructions, then we will need to block
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index d9b0a47bd..d954bd1e7 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -227,7 +227,7 @@ class DefaultFetch
* @param next_NPC Used for ISAs which use delay slots.
* @return Whether or not a branch was predicted as taken.
*/
- bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC);
+ bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC, Addr &next_MicroPC);
/**
* Fetches the cache line that contains fetch_PC. Returns any
@@ -242,12 +242,14 @@ class DefaultFetch
bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
/** Squashes a specific thread and resets the PC. */
- inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid);
+ inline void doSquash(const Addr &new_PC, const Addr &new_NPC,
+ const Addr &new_MicroPC, unsigned tid);
/** Squashes a specific thread and resets the PC. Also tells the CPU to
* remove any instructions between fetch and decode that should be sqaushed.
*/
void squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
+ const Addr &new_MicroPC,
const InstSeqNum &seq_num, unsigned tid);
/** Checks if a thread is stalled. */
@@ -263,8 +265,8 @@ class DefaultFetch
* squash should be the commit stage.
*/
void squash(const Addr &new_PC, const Addr &new_NPC,
- const InstSeqNum &seq_num,
- bool squash_delay_slot, unsigned tid);
+ const Addr &new_MicroPC,
+ const InstSeqNum &seq_num, unsigned tid);
/** Ticks the fetch stage, processing all inputs signals and fetching
* as many instructions as possible.
@@ -347,16 +349,12 @@ class DefaultFetch
/** Per-thread fetch PC. */
Addr PC[Impl::MaxThreads];
+ /** Per-thread fetch micro PC. */
+ Addr microPC[Impl::MaxThreads];
+
/** Per-thread next PC. */
Addr nextPC[Impl::MaxThreads];
- /** Per-thread next Next PC.
- * This is not a real register but is used for
- * architectures that use a branch-delay slot.
- * (such as MIPS or Sparc)
- */
- Addr nextNPC[Impl::MaxThreads];
-
/** Memory request used to access cache. */
RequestPtr memReq[Impl::MaxThreads];
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index ab55ec744..0fd1e7bac 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -313,7 +313,7 @@ DefaultFetch<Impl>::initStage()
for (int tid = 0; tid < numThreads; tid++) {
PC[tid] = cpu->readPC(tid);
nextPC[tid] = cpu->readNextPC(tid);
- nextNPC[tid] = cpu->readNextNPC(tid);
+ microPC[tid] = cpu->readMicroPC(tid);
}
for (int tid=0; tid < numThreads; tid++) {
@@ -440,11 +440,7 @@ DefaultFetch<Impl>::takeOverFrom()
stalls[i].commit = 0;
PC[i] = cpu->readPC(i);
nextPC[i] = cpu->readNextPC(i);
-#if ISA_HAS_DELAY_SLOT
- nextNPC[i] = cpu->readNextNPC(i);
-#else
- nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst);
-#endif
+ microPC[i] = cpu->readMicroPC(i);
fetchStatus[i] = Running;
}
numInst = 0;
@@ -494,7 +490,7 @@ DefaultFetch<Impl>::switchToInactive()
template <class Impl>
bool
DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
- Addr &next_NPC)
+ Addr &next_NPC, Addr &next_MicroPC)
{
// Do branch prediction check here.
// A bit of a misnomer...next_PC is actually the current PC until
@@ -502,13 +498,22 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
bool predict_taken;
if (!inst->isControl()) {
- next_PC = next_NPC;
- next_NPC = next_NPC + instSize;
- inst->setPredTarg(next_PC, next_NPC);
+ if (inst->isMicroOp() && !inst->isLastMicroOp()) {
+ next_MicroPC++;
+ } else {
+ next_PC = next_NPC;
+ next_NPC = next_NPC + instSize;
+ next_MicroPC = 0;
+ }
+ inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
inst->setPredTaken(false);
return false;
}
+ //Assume for now that all control flow is to a different macroop which
+ //would reset the micro pc to 0.
+ next_MicroPC = 0;
+
int tid = inst->threadNumber;
Addr pred_PC = next_PC;
predict_taken = branchPred.predict(inst, pred_PC, tid);
@@ -535,7 +540,7 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
#endif
/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
tid, next_PC, next_NPC);*/
- inst->setPredTarg(next_PC, next_NPC);
+ inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
inst->setPredTaken(predict_taken);
++fetchedBranches;
@@ -659,14 +664,14 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
template <class Impl>
inline void
DefaultFetch<Impl>::doSquash(const Addr &new_PC,
- const Addr &new_NPC, unsigned tid)
+ const Addr &new_NPC, const Addr &new_microPC, unsigned tid)
{
DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
tid, new_PC, new_NPC);
PC[tid] = new_PC;
nextPC[tid] = new_NPC;
- nextNPC[tid] = new_NPC + instSize;
+ microPC[tid] = new_microPC;
// Clear the icache miss if it's outstanding.
if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -694,12 +699,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
template<class Impl>
void
DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
- const InstSeqNum &seq_num,
- unsigned tid)
+ const Addr &new_MicroPC,
+ const InstSeqNum &seq_num, unsigned tid)
{
DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
- doSquash(new_PC, new_NPC, tid);
+ doSquash(new_PC, new_NPC, new_MicroPC, tid);
// Tell the CPU to remove any instructions that are in flight between
// fetch and decode.
@@ -775,20 +780,15 @@ DefaultFetch<Impl>::updateFetchStatus()
template <class Impl>
void
DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
- const InstSeqNum &seq_num,
- bool squash_delay_slot, unsigned tid)
+ const Addr &new_MicroPC,
+ const InstSeqNum &seq_num, unsigned tid)
{
DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
- doSquash(new_PC, new_NPC, tid);
+ doSquash(new_PC, new_NPC, new_MicroPC, tid);
-#if ISA_HAS_DELAY_SLOT
// Tell the CPU to remove any instructions that are not in the ROB.
- cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
-#else
- // Tell the CPU to remove any instructions that are not in the ROB.
- cpu->removeInstsNotInROB(tid, true, 0);
-#endif
+ cpu->removeInstsNotInROB(tid);
}
template <class Impl>
@@ -897,17 +897,11 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
"from commit.\n",tid);
-
-#if ISA_HAS_DELAY_SLOT
- InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
-#else
- InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
-#endif
// In any case, squash.
squash(fromCommit->commitInfo[tid].nextPC,
fromCommit->commitInfo[tid].nextNPC,
- doneSeqNum,
- fromCommit->commitInfo[tid].squashDelaySlot,
+ fromCommit->commitInfo[tid].nextMicroPC,
+ fromCommit->commitInfo[tid].doneSeqNum,
tid);
// Also check if there's a mispredict that happened.
@@ -956,18 +950,14 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
if (fetchStatus[tid] != Squashing) {
-#if ISA_HAS_DELAY_SLOT
- InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
-#else
- InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
-#endif
DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n",
fromDecode->decodeInfo[tid].nextPC,
fromDecode->decodeInfo[tid].nextNPC);
// Squash unless we're already squashing
squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
fromDecode->decodeInfo[tid].nextNPC,
- doneSeqNum,
+ fromDecode->decodeInfo[tid].nextMicroPC,
+ fromDecode->decodeInfo[tid].doneSeqNum,
tid);
return true;
@@ -1021,9 +1011,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
// The current PC.
- Addr &fetch_PC = PC[tid];
-
- Addr &fetch_NPC = nextPC[tid];
+ Addr fetch_PC = PC[tid];
+ Addr fetch_NPC = nextPC[tid];
+ Addr fetch_MicroPC = microPC[tid];
// Fault code for memory access.
Fault fault = NoFault;
@@ -1082,6 +1072,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
Addr next_PC = fetch_PC;
Addr next_NPC = fetch_NPC;
+ Addr next_MicroPC = fetch_MicroPC;
InstSeqNum inst_seq;
MachInst inst;
@@ -1089,6 +1080,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// @todo: Fix this hack.
unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
+ StaticInstPtr staticInst = NULL;
+ StaticInstPtr macroop = NULL;
+
if (fault == NoFault) {
// If the read of the first instruction was successful, then grab the
// instructions from the rest of the cache line and put them into the
@@ -1101,11 +1095,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// ended this fetch block.
bool predicted_branch = false;
- for (;
- offset < cacheBlkSize &&
- numInst < fetchWidth &&
- !predicted_branch;
- ++numInst) {
+ while (offset < cacheBlkSize &&
+ numInst < fetchWidth &&
+ !predicted_branch) {
// If we're branching after this instruction, quite fetching
// from the same block then.
@@ -1116,95 +1108,107 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetch_PC, fetch_NPC);
}
-
- // Get a sequence number.
- inst_seq = cpu->getAndIncrementInstSeq();
-
// Make sure this is a valid index.
assert(offset <= cacheBlkSize - instSize);
- // Get the instruction from the array of the cache line.
- inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
- (&cacheData[tid][offset]));
+ if (!macroop) {
+ // Get the instruction from the array of the cache line.
+ inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
+ (&cacheData[tid][offset]));
- predecoder.setTC(cpu->thread[tid]->getTC());
- predecoder.moreBytes(fetch_PC, fetch_PC, 0, inst);
+ predecoder.setTC(cpu->thread[tid]->getTC());
+ predecoder.moreBytes(fetch_PC, fetch_PC, 0, inst);
- ext_inst = predecoder.getExtMachInst();
+ ext_inst = predecoder.getExtMachInst();
+ staticInst = StaticInstPtr(ext_inst);
+ if (staticInst->isMacroOp())
+ macroop = staticInst;
+ }
+ do {
+ if (macroop) {
+ staticInst = macroop->fetchMicroOp(fetch_MicroPC);
+ if (staticInst->isLastMicroOp())
+ macroop = NULL;
+ }
- // Create a new DynInst from the instruction fetched.
- DynInstPtr instruction = new DynInst(ext_inst,
- fetch_PC, fetch_NPC,
- next_PC, next_NPC,
- inst_seq, cpu);
- instruction->setTid(tid);
+ // Get a sequence number.
+ inst_seq = cpu->getAndIncrementInstSeq();
- instruction->setASID(tid);
+ // Create a new DynInst from the instruction fetched.
+ DynInstPtr instruction = new DynInst(staticInst,
+ fetch_PC, fetch_NPC, fetch_MicroPC,
+ next_PC, next_NPC, next_MicroPC,
+ inst_seq, cpu);
+ instruction->setTid(tid);
- instruction->setThreadState(cpu->thread[tid]);
+ instruction->setASID(tid);
- DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
- "[sn:%lli]\n",
- tid, instruction->readPC(), inst_seq);
+ instruction->setThreadState(cpu->thread[tid]);
- //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
+ DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
+ "[sn:%lli]\n",
+ tid, instruction->readPC(), inst_seq);
- DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
- tid, instruction->staticInst->disassemble(fetch_PC));
+ //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
+
+ DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
+ tid, instruction->staticInst->disassemble(fetch_PC));
#if TRACING_ON
- instruction->traceData =
- Trace::getInstRecord(curTick, cpu->tcBase(tid),
- instruction->staticInst,
- instruction->readPC());
+ instruction->traceData =
+ Trace::getInstRecord(curTick, cpu->tcBase(tid),
+ instruction->staticInst,
+ instruction->readPC());
#else
- instruction->traceData = NULL;
+ instruction->traceData = NULL;
#endif
- ///FIXME This needs to be more robust in dealing with delay slots
-#if !ISA_HAS_DELAY_SLOT
-// predicted_branch |=
-#endif
- lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
- predicted_branch |= (next_PC != fetch_NPC);
+ ///FIXME This needs to be more robust in dealing with delay slots
+ predicted_branch |=
+ lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC);
- // Add instruction to the CPU's list of instructions.
- instruction->setInstListIt(cpu->addInst(instruction));
+ // Add instruction to the CPU's list of instructions.
+ instruction->setInstListIt(cpu->addInst(instruction));
- // Write the instruction to the first slot in the queue
- // that heads to decode.
- toDecode->insts[numInst] = instruction;
+ // Write the instruction to the first slot in the queue
+ // that heads to decode.
+ toDecode->insts[numInst] = instruction;
- toDecode->size++;
+ toDecode->size++;
- // Increment stat of fetched instructions.
- ++fetchedInsts;
+ // Increment stat of fetched instructions.
+ ++fetchedInsts;
- // Move to the next instruction, unless we have a branch.
- fetch_PC = next_PC;
- fetch_NPC = next_NPC;
+ // Move to the next instruction, unless we have a branch.
+ fetch_PC = next_PC;
+ fetch_NPC = next_NPC;
+ fetch_MicroPC = next_MicroPC;
- if (instruction->isQuiesce()) {
- DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
- curTick);
- fetchStatus[tid] = QuiescePending;
- ++numInst;
- status_change = true;
- break;
- }
+ if (instruction->isQuiesce()) {
+ DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
+ curTick);
+ fetchStatus[tid] = QuiescePending;
+ ++numInst;
+ status_change = true;
+ break;
+ }
+ ++numInst;
+ } while (staticInst->isMicroOp() &&
+ !staticInst->isLastMicroOp() &&
+ numInst < fetchWidth);
offset += instSize;
}
- if (offset >= cacheBlkSize) {
- DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
- "block.\n", tid);
+ if (predicted_branch) {
+ DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
+ "instruction encountered.\n", tid);
} else if (numInst >= fetchWidth) {
DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
"for this cycle.\n", tid);
- } else if (predicted_branch) {
- DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
- "instruction encountered.\n", tid);
+ } else if (offset >= cacheBlkSize) {
+ DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
+ "block.\n", tid);
}
}
@@ -1217,12 +1221,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
if (fault == NoFault) {
PC[tid] = next_PC;
nextPC[tid] = next_NPC;
- nextNPC[tid] = next_NPC + instSize;
-#if ISA_HAS_DELAY_SLOT
- DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
-#else
+ microPC[tid] = next_MicroPC;
DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
-#endif
} else {
// We shouldn't be in an icache miss and also have a fault (an ITB
// miss)
@@ -1240,8 +1240,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// We will use a nop in order to carry the fault.
ext_inst = TheISA::NoopMachInst;
+ StaticInstPtr staticInst = new StaticInst(ext_inst);
// Create a new DynInst from the dummy nop.
- DynInstPtr instruction = new DynInst(ext_inst,
+ DynInstPtr instruction = new DynInst(staticInst,
fetch_PC, fetch_NPC,
next_PC, next_NPC,
inst_seq, cpu);
diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh
index c669b0b34..42fc0c533 100644
--- a/src/cpu/o3/free_list.hh
+++ b/src/cpu/o3/free_list.hh
@@ -168,7 +168,9 @@ SimpleFreeList::addReg(PhysRegIndex freed_reg)
if (freed_reg != TheISA::ZeroReg)
freeIntRegs.push(freed_reg);
} else if (freed_reg < numPhysicalRegs) {
+#if THE_ISA == ALPHA_ISA
if (freed_reg != (TheISA::ZeroReg + numPhysicalIntRegs))
+#endif
freeFloatRegs.push(freed_reg);
}
}
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index ce2991cfb..eef5a15d2 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -402,9 +402,6 @@ class DefaultIEW
/** Records if there is a fetch redirect on this cycle for each thread. */
bool fetchRedirect[Impl::MaxThreads];
- /** Keeps track of the last valid branch delay slot instss for threads */
- InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads];
-
/** Used to track if all instructions have been dispatched this cycle.
* If they have not, then blocking must have occurred, and the instructions
* would already be added to the skid buffer.
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 62e656e93..399c44909 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -69,7 +69,6 @@ DefaultIEW<Impl>::DefaultIEW(O3CPU *_cpu, Params *params)
dispatchStatus[i] = Running;
stalls[i].commit = false;
fetchRedirect[i] = false;
- bdelayDoneSeqNum[i] = 0;
}
wbMax = wbWidth * params->wbDepth;
@@ -410,31 +409,14 @@ DefaultIEW<Impl>::squash(unsigned tid)
instQueue.squash(tid);
// Tell the LDSTQ to start squashing.
-#if ISA_HAS_DELAY_SLOT
- ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid);
-#else
ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
-#endif
updatedQueues = true;
// Clear the skid buffer in case it has any data in it.
DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n",
- tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum);
+ tid, fromCommit->commitInfo[tid].doneSeqNum);
while (!skidBuffer[tid].empty()) {
-#if ISA_HAS_DELAY_SLOT
- if (skidBuffer[tid].front()->seqNum <=
- fromCommit->commitInfo[tid].bdelayDoneSeqNum) {
- DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions "
- "that occur before delay slot [sn:%i].\n",
- fromCommit->commitInfo[tid].bdelayDoneSeqNum,
- tid);
- break;
- } else {
- DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from "
- "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum);
- }
-#endif
if (skidBuffer[tid].front()->isLoad() ||
skidBuffer[tid].front()->isStore() ) {
toRename->iewInfo[tid].dispatchedToLSQ++;
@@ -445,8 +427,6 @@ DefaultIEW<Impl>::squash(unsigned tid)
skidBuffer[tid].pop();
}
- bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
-
emptyRenameInsts(tid);
}
@@ -462,38 +442,19 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
toCommit->mispredPC[tid] = inst->readPC();
toCommit->branchMispredict[tid] = true;
- int instSize = sizeof(TheISA::MachInst);
#if ISA_HAS_DELAY_SLOT
- bool branch_taken =
+ int instSize = sizeof(TheISA::MachInst);
+ toCommit->branchTaken[tid] =
!(inst->readNextPC() + instSize == inst->readNextNPC() &&
(inst->readNextPC() == inst->readPC() + instSize ||
inst->readNextPC() == inst->readPC() + 2 * instSize));
- DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n",
- branch_taken ? "true": "false", inst->seqNum);
-
- toCommit->branchTaken[tid] = branch_taken;
-
- bool squashDelaySlot = true;
-// (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
- DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n",
- squashDelaySlot ? "true": "false", inst->seqNum);
- toCommit->squashDelaySlot[tid] = squashDelaySlot;
- //If we're squashing the delay slot, we need to pick back up at NextPC.
- //Otherwise, NextPC isn't being squashed, so we should pick back up at
- //NextNPC.
- if (squashDelaySlot) {
- toCommit->nextPC[tid] = inst->readNextPC();
- toCommit->nextNPC[tid] = inst->readNextNPC();
- } else {
- toCommit->nextPC[tid] = inst->readNextNPC();
- toCommit->nextNPC[tid] = inst->readNextNPC() + instSize;
- }
#else
toCommit->branchTaken[tid] = inst->readNextPC() !=
(inst->readPC() + sizeof(TheISA::MachInst));
- toCommit->nextPC[tid] = inst->readNextPC();
- toCommit->nextNPC[tid] = inst->readNextPC() + instSize;
#endif
+ toCommit->nextPC[tid] = inst->readNextPC();
+ toCommit->nextNPC[tid] = inst->readNextNPC();
+ toCommit->nextMicroPC[tid] = inst->readNextMicroPC();
toCommit->includeSquashInst[tid] = false;
@@ -510,11 +471,7 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid)
toCommit->squash[tid] = true;
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->nextPC[tid] = inst->readNextPC();
-#if ISA_HAS_DELAY_SLOT
toCommit->nextNPC[tid] = inst->readNextNPC();
-#else
- toCommit->nextNPC[tid] = inst->readNextPC() + sizeof(TheISA::MachInst);
-#endif
toCommit->branchMispredict[tid] = false;
toCommit->includeSquashInst[tid] = false;
@@ -532,11 +489,7 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
toCommit->squash[tid] = true;
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->nextPC[tid] = inst->readPC();
-#if ISA_HAS_DELAY_SLOT
toCommit->nextNPC[tid] = inst->readNextPC();
-#else
- toCommit->nextNPC[tid] = inst->readPC() + sizeof(TheISA::MachInst);
-#endif
toCommit->branchMispredict[tid] = false;
// Must include the broadcasted SN in the squash.
@@ -880,11 +833,9 @@ DefaultIEW<Impl>::sortInsts()
{
int insts_from_rename = fromRename->size;
#ifdef DEBUG
-#if !ISA_HAS_DELAY_SLOT
for (int i = 0; i < numThreads; i++)
assert(insts[i].empty());
#endif
-#endif
for (int i = 0; i < insts_from_rename; ++i) {
insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]);
}
@@ -894,21 +845,9 @@ template <class Impl>
void
DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
{
- DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until "
- "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]);
+ DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions\n", tid);
while (!insts[tid].empty()) {
-#if ISA_HAS_DELAY_SLOT
- if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) {
- DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction"
- " that occurs at or before delay slot [sn:%i].\n",
- tid, bdelayDoneSeqNum[tid]);
- break;
- } else {
- DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction "
- "[sn:%i].\n", tid, insts[tid].front()->seqNum);
- }
-#endif
if (insts[tid].front()->isLoad() ||
insts[tid].front()->isStore() ) {
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 10c3287f2..bdf5f07aa 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1005,11 +1005,7 @@ InstructionQueue<Impl>::squash(unsigned tid)
// Read instruction sequence number of last instruction out of the
// time buffer.
-#if ISA_HAS_DELAY_SLOT
- squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
-#else
squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
-#endif
// Call doSquash if there are insts in the IQ
if (count[tid] > 0) {
diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh
index 366b4bb23..cf78c0941 100755
--- a/src/cpu/o3/mips/dyn_inst.hh
+++ b/src/cpu/o3/mips/dyn_inst.hh
@@ -70,9 +70,15 @@ class MipsDynInst : public BaseDynInst<Impl>
public:
/** BaseDynInst constructor given a binary instruction. */
+ MipsDynInst(StaticInstPtr staticInst,
+ Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+ InstSeqNum seq_num, O3CPU *cpu);
+
+ /** BaseDynInst constructor given a binary instruction. */
MipsDynInst(ExtMachInst inst,
- Addr PC, Addr NPC,
- Addr Pred_PC, Addr Pred_NPC,
+ Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
InstSeqNum seq_num, O3CPU *cpu);
/** BaseDynInst constructor given a static inst pointer. */
diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh
index c0f9ae771..7e8697b32 100755
--- a/src/cpu/o3/mips/dyn_inst_impl.hh
+++ b/src/cpu/o3/mips/dyn_inst_impl.hh
@@ -31,11 +31,23 @@
#include "cpu/o3/mips/dyn_inst.hh"
template <class Impl>
+MipsDynInst<Impl>::MipsDynInst(StaticInstPtr staticInst,
+ Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+ InstSeqNum seq_num, O3CPU *cpu)
+ : BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
+ Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
+{
+ initVars();
+}
+
+template <class Impl>
MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst,
- Addr PC, Addr NPC,
- Addr Pred_PC, Addr Pred_NPC,
+ Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
InstSeqNum seq_num, O3CPU *cpu)
- : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
+ : BaseDynInst<Impl>(inst, PC, NPC, microPC,
+ Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
{
initVars();
}
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index b5b1cd021..75d3fa6eb 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -179,7 +179,9 @@ class PhysRegFile
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
int(reg_idx), (uint64_t)val);
+#if THE_ISA == ALPHA_ISA
if (reg_idx != TheISA::ZeroReg)
+#endif
floatRegFile[reg_idx].d = val;
}
@@ -194,7 +196,9 @@ class PhysRegFile
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
int(reg_idx), (uint64_t)val);
+#if THE_ISA == ALPHA_ISA
if (reg_idx != TheISA::ZeroReg)
+#endif
floatRegFile[reg_idx].d = val;
}
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 431705e19..d78de2c87 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -356,47 +356,12 @@ DefaultRename<Impl>::squash(const InstSeqNum &squash_seq_num, unsigned tid)
}
// Clear the instruction list and skid buffer in case they have any
- // insts in them. Since we support multiple ISAs, we cant just:
- // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is
- // a possible delay slot inst for different architectures
- // insts[tid].clear();
-#if ISA_HAS_DELAY_SLOT
- DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until "
- "[sn:%i].\n",tid, squash_seq_num);
- ListIt ilist_it = insts[tid].begin();
- while (ilist_it != insts[tid].end()) {
- if ((*ilist_it)->seqNum > squash_seq_num) {
- (*ilist_it)->setSquashed();
- DPRINTF(Rename, "Squashing incoming decode instruction, "
- "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC);
- }
- ilist_it++;
- }
-#else
+ // insts in them.
insts[tid].clear();
-#endif
// Clear the skid buffer in case it has any data in it.
- // See comments above.
- // skidBuffer[tid].clear();
-#if ISA_HAS_DELAY_SLOT
- DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions "
- "until [sn:%i].\n", tid, squash_seq_num);
- ListIt slist_it = skidBuffer[tid].begin();
- while (slist_it != skidBuffer[tid].end()) {
- if ((*slist_it)->seqNum > squash_seq_num) {
- (*slist_it)->setSquashed();
- DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]"
- "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC);
- }
- slist_it++;
- }
- resumeUnblocking = (skidBuffer[tid].size() != 0);
- DPRINTF(Rename, "Resume unblocking set to %s\n",
- resumeUnblocking ? "true" : "false");
-#else
skidBuffer[tid].clear();
-#endif
+
doSquash(squash_seq_num, tid);
}
@@ -776,11 +741,9 @@ DefaultRename<Impl>::sortInsts()
{
int insts_from_decode = fromDecode->size;
#ifdef DEBUG
-#if !ISA_HAS_DELAY_SLOT
for (int i=0; i < numThreads; i++)
assert(insts[i].empty());
#endif
-#endif
for (int i = 0; i < insts_from_decode; ++i) {
DynInstPtr inst = fromDecode->insts[i];
insts[inst->threadNumber].push_back(inst);
@@ -1000,6 +963,7 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
// Floating point and Miscellaneous registers need their indexes
// adjusted to account for the expanded number of flattened int regs.
flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
+ DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", src_reg, flat_src_reg);
}
inst->flattenSrcReg(src_idx, flat_src_reg);
@@ -1016,9 +980,11 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
// See if the register is ready or not.
if (scoreboard->getReg(renamed_reg) == true) {
- DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid);
+ DPRINTF(Rename, "[tid:%u]: Register %d is ready.\n", tid, renamed_reg);
inst->markSrcRegReady(src_idx);
+ } else {
+ DPRINTF(Rename, "[tid:%u]: Register %d is not ready.\n", tid, renamed_reg);
}
++renameRenameLookups;
@@ -1045,6 +1011,7 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
// Floating point and Miscellaneous registers need their indexes
// adjusted to account for the expanded number of flattened int regs.
flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
+ DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", dest_reg, flat_dest_reg);
}
inst->flattenDestReg(dest_idx, flat_dest_reg);
@@ -1248,13 +1215,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from "
"commit.\n", tid);
-#if ISA_HAS_DELAY_SLOT
- InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
-#else
- InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
-#endif
-
- squash(squashed_seq_num, tid);
+ squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
return true;
}
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index b436ec1c3..e6649ce3e 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -165,17 +165,21 @@ SimpleRenameMap::rename(RegIndex arch_reg)
// If it's not referencing the zero register, then rename the
// register.
+#if THE_ISA == ALPHA_ISA
if (arch_reg != floatZeroReg) {
+#endif
renamed_reg = freeList->getFloatReg();
floatRenameMap[arch_reg].physical_reg = renamed_reg;
assert(renamed_reg < numPhysicalRegs &&
renamed_reg >= numPhysicalIntRegs);
+#if THE_ISA == ALPHA_ISA
} else {
// Otherwise return the zero register so nothing bad happens.
renamed_reg = floatZeroReg;
}
+#endif
} else {
// Subtract off the base offset for miscellaneous registers.
arch_reg = arch_reg - numLogicalRegs;
diff --git a/src/cpu/o3/scoreboard.cc b/src/cpu/o3/scoreboard.cc
index 1859b35a4..e7f8b7949 100644
--- a/src/cpu/o3/scoreboard.cc
+++ b/src/cpu/o3/scoreboard.cc
@@ -29,6 +29,7 @@
* Kevin Lim
*/
+#include "arch/isa_specific.hh"
#include "cpu/o3/scoreboard.hh"
Scoreboard::Scoreboard(unsigned activeThreads,
@@ -79,11 +80,18 @@ Scoreboard::name() const
bool
Scoreboard::getReg(PhysRegIndex phys_reg)
{
+#if THE_ISA == ALPHA_ISA
// Always ready if int or fp zero reg.
if (phys_reg == zeroRegIdx ||
phys_reg == (zeroRegIdx + numPhysicalIntRegs)) {
return 1;
}
+#else
+ // Always ready if int zero reg.
+ if (phys_reg == zeroRegIdx) {
+ return 1;
+ }
+#endif
return regScoreBoard[phys_reg];
}
@@ -99,11 +107,18 @@ Scoreboard::setReg(PhysRegIndex phys_reg)
void
Scoreboard::unsetReg(PhysRegIndex ready_reg)
{
+#if THE_ISA == ALPHA_ISA
if (ready_reg == zeroRegIdx ||
ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
// Don't do anything if int or fp zero reg.
return;
}
+#else
+ if (ready_reg == zeroRegIdx) {
+ // Don't do anything if int zero reg.
+ return;
+ }
+#endif
regScoreBoard[ready_reg] = 0;
}
diff --git a/src/cpu/o3/sparc/cpu_impl.hh b/src/cpu/o3/sparc/cpu_impl.hh
index 50d980f55..2e398577e 100644
--- a/src/cpu/o3/sparc/cpu_impl.hh
+++ b/src/cpu/o3/sparc/cpu_impl.hh
@@ -272,7 +272,10 @@ SparcO3CPU<Impl>::getSyscallArg(int i, int tid)
{
TheISA::IntReg idx = TheISA::flattenIntIndex(this->tcBase(tid),
SparcISA::ArgumentReg0 + i);
- return this->readArchIntReg(idx, tid);
+ TheISA::IntReg val = this->readArchIntReg(idx, tid);
+ if (bits(this->readMiscRegNoEffect(SparcISA::MISCREG_PSTATE, tid), 3, 3))
+ val = bits(val, 31, 0);
+ return val;
}
template <class Impl>
diff --git a/src/cpu/o3/sparc/dyn_inst.hh b/src/cpu/o3/sparc/dyn_inst.hh
index 72242b161..a7ab6cd79 100644
--- a/src/cpu/o3/sparc/dyn_inst.hh
+++ b/src/cpu/o3/sparc/dyn_inst.hh
@@ -56,8 +56,14 @@ class SparcDynInst : public BaseDynInst<Impl>
public:
/** BaseDynInst constructor given a binary instruction. */
- SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
- Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu);
+ SparcDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+ InstSeqNum seq_num, O3CPU *cpu);
+
+ /** BaseDynInst constructor given a binary instruction. */
+ SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+ InstSeqNum seq_num, O3CPU *cpu);
/** BaseDynInst constructor given a static inst pointer. */
SparcDynInst(StaticInstPtr &_staticInst);
diff --git a/src/cpu/o3/sparc/dyn_inst_impl.hh b/src/cpu/o3/sparc/dyn_inst_impl.hh
index c4d30b6f4..6bfe97717 100644
--- a/src/cpu/o3/sparc/dyn_inst_impl.hh
+++ b/src/cpu/o3/sparc/dyn_inst_impl.hh
@@ -31,10 +31,23 @@
#include "cpu/o3/sparc/dyn_inst.hh"
template <class Impl>
+SparcDynInst<Impl>::SparcDynInst(StaticInstPtr staticInst,
+ Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
+ InstSeqNum seq_num, O3CPU *cpu)
+ : BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
+ Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
+{
+ initVars();
+}
+
+template <class Impl>
SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
- Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC,
+ Addr PC, Addr NPC, Addr microPC,
+ Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
InstSeqNum seq_num, O3CPU *cpu)
- : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
+ : BaseDynInst<Impl>(inst, PC, NPC, microPC,
+ Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
{
initVars();
}