summaryrefslogtreecommitdiff
path: root/src/cpu/o3/fetch_impl.hh
diff options
context:
space:
mode:
authorKorey Sewell <ksewell@umich.edu>2006-07-23 13:39:42 -0400
committerKorey Sewell <ksewell@umich.edu>2006-07-23 13:39:42 -0400
commit19ca97af79f3a40111991b4f8375592c7ede65fa (patch)
treefd37f5961f29cd5d4e5fc1eba1b37a1a2d17bc67 /src/cpu/o3/fetch_impl.hh
parent87d485945879bfbfcb798e70d564b02e77fbc250 (diff)
downloadgem5-19ca97af79f3a40111991b4f8375592c7ede65fa.tar.xz
This changeset gets the MIPS ISA pretty much working in the O3CPU. It builds, runs, and gets very very close to completing the hello world
succesfully but there are some minor quirks to iron out. Who would've known a DELAY SLOT introduces that much complexity?! arrgh! Anyways, a lot of this stuff had to do with my project at MIPS and me needing to know how I was going to get this working for the MIPS ISA. So I figured I would try to touch it up and throw it in here (I hate to introduce non-completely working components... ) src/arch/alpha/isa/mem.isa: spacing src/arch/mips/faults.cc: src/arch/mips/faults.hh: Gabe really authored this src/arch/mips/isa/decoder.isa: add StoreConditional Flag to instruction src/arch/mips/isa/formats/basic.isa: Steven really did this file src/arch/mips/isa/formats/branch.isa: fix bug for uncond/cond control src/arch/mips/isa/formats/mem.isa: Adjust O3CPU memory access to use new memory model interface. src/arch/mips/isa/formats/util.isa: update LoadStoreBase template src/arch/mips/isa_traits.cc: update SERIALIZE partially src/arch/mips/process.cc: src/arch/mips/process.hh: no need for this for NOW. ASID/Virtual addressing handles it src/arch/mips/regfile/misc_regfile.hh: add in clear() function and comments for future usage of special misc. regs src/cpu/base_dyn_inst.hh: add in nextNPC variable and supporting functions. add isCondDelaySlot function Update predTaken and mispredicted functions src/cpu/base_dyn_inst_impl.hh: init nextNPC src/cpu/o3/SConscript: add MIPS files to compile src/cpu/o3/alpha/thread_context.hh: no need for my name on this file src/cpu/o3/bpred_unit_impl.hh: Update RAS appropriately for MIPS src/cpu/o3/comm.hh: add some extra communication variables to aid in handling the delay slots src/cpu/o3/commit.hh: minor name fix for nextNPC functions. src/cpu/o3/commit_impl.hh: src/cpu/o3/decode_impl.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/rename_impl.hh: Fix necessary variables and functions for squashes with delay slots src/cpu/o3/cpu.cc: Update function interface ... adjust removeInstsNotInROB function to recognize delay slots insts src/cpu/o3/cpu.hh: update removeInstsNotInROB src/cpu/o3/decode.hh: declare necessary variables for handling delay slot src/cpu/o3/dyn_inst.hh: Add in MipsDynInst src/cpu/o3/fetch.hh: src/cpu/o3/iew.hh: src/cpu/o3/rename.hh: declare necessary variables and adjust functions for handling delay slot src/cpu/o3/inst_queue.hh: src/cpu/simple/base.cc: no need for my name here src/cpu/o3/isa_specific.hh: add in MIPS files src/cpu/o3/scoreboard.hh: dont include alpha specific isa traits! src/cpu/o3/thread_context.hh: no need for my name here, i just rearranged where the file goes src/cpu/static_inst.hh: add isCondDelaySlot function src/cpu/o3/mips/cpu.cc: src/cpu/o3/mips/cpu.hh: src/cpu/o3/mips/cpu_builder.cc: src/cpu/o3/mips/cpu_impl.hh: src/cpu/o3/mips/dyn_inst.cc: src/cpu/o3/mips/dyn_inst.hh: src/cpu/o3/mips/dyn_inst_impl.hh: src/cpu/o3/mips/impl.hh: src/cpu/o3/mips/params.hh: src/cpu/o3/mips/thread_context.cc: src/cpu/o3/mips/thread_context.hh: MIPS file for O3CPU...mirrors ALPHA definition --HG-- extra : convert_revision : 9bb199b4085903e49ffd5a4c8ac44d11460d988c
Diffstat (limited to 'src/cpu/o3/fetch_impl.hh')
-rw-r--r--src/cpu/o3/fetch_impl.hh174
1 files changed, 155 insertions, 19 deletions
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 4184e1867..54e35433b 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -165,10 +165,15 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
cacheDataPC[tid] = 0;
cacheDataValid[tid] = false;
- stalls[tid].decode = 0;
- stalls[tid].rename = 0;
- stalls[tid].iew = 0;
- stalls[tid].commit = 0;
+ delaySlotInfo[tid].branchSeqNum = -1;
+ delaySlotInfo[tid].numInsts = 0;
+ delaySlotInfo[tid].targetAddr = 0;
+ delaySlotInfo[tid].targetReady = false;
+
+ stalls[tid].decode = false;
+ stalls[tid].rename = false;
+ stalls[tid].iew = false;
+ stalls[tid].commit = false;
}
// Get the size of an instruction.
@@ -288,6 +293,9 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
}
#endif
+ // Schedule fetch to get the correct PC from the CPU
+ // scheduleFetchStartupEvent(1);
+
// Fetch needs to start fetching instructions at the very beginning,
// so it must start up in active state.
switchToActive();
@@ -426,6 +434,10 @@ DefaultFetch<Impl>::takeOverFrom()
nextPC[i] = cpu->readNextPC(i);
#if THE_ISA != ALPHA_ISA
nextNPC[i] = cpu->readNextNPC(i);
+ delaySlotInfo[i].branchSeqNum = -1;
+ delaySlotInfo[i].numInsts = 0;
+ delaySlotInfo[i].targetAddr = 0;
+ delaySlotInfo[i].targetReady = false;
#endif
fetchStatus[i] = Running;
}
@@ -474,7 +486,8 @@ DefaultFetch<Impl>::switchToInactive()
template <class Impl>
bool
-DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
+DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
+ Addr &next_NPC)
{
// Do branch prediction check here.
// A bit of a misnomer...next_PC is actually the current PC until
@@ -482,12 +495,54 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
bool predict_taken;
if (!inst->isControl()) {
+#if THE_ISA == ALPHA_ISA
next_PC = next_PC + instSize;
inst->setPredTarg(next_PC);
+#else
+ Addr cur_PC = next_PC;
+ next_PC = cur_PC + instSize; //next_NPC;
+ next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
+ inst->setPredTarg(next_NPC);
+#endif
return false;
}
- predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber);
+ int tid = inst->threadNumber;
+#if THE_ISA == ALPHA_ISA
+ predict_taken = branchPred.predict(inst, next_PC, tid);
+#else
+ Addr pred_PC = next_PC;
+ predict_taken = branchPred.predict(inst, pred_PC, tid);
+
+ if (predict_taken) {
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+ } else {
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
+ }
+
+ if (predict_taken) {
+ next_PC = next_NPC;
+ next_NPC = pred_PC;
+
+ // Update delay slot info
+ ++delaySlotInfo[tid].numInsts;
+ delaySlotInfo[tid].targetAddr = pred_PC;
+ DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
+ delaySlotInfo[tid].numInsts);
+ } else { // !predict_taken
+ if (inst->isCondDelaySlot()) {
+ next_PC = pred_PC;
+ // The delay slot is skipped here if there is on
+ // prediction
+ } else {
+ next_PC = next_NPC;
+ // No need to declare a delay slot here since
+ // there is no for the pred. target to jump
+ }
+
+ next_NPC = next_NPC + instSize;
+ }
+#endif
++fetchedBranches;
@@ -606,6 +661,7 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
PC[tid] = new_PC;
nextPC[tid] = new_PC + instSize;
+ nextNPC[tid] = new_PC + (2 * instSize);
// Clear the icache miss if it's outstanding.
if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -639,6 +695,14 @@ DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
doSquash(new_PC, tid);
+#if THE_ISA != ALPHA_ISA
+ if (seq_num <= delaySlotInfo[tid].branchSeqNum) {
+ delaySlotInfo[tid].numInsts = 0;
+ delaySlotInfo[tid].targetAddr = 0;
+ delaySlotInfo[tid].targetReady = false;
+ }
+#endif
+
// Tell the CPU to remove any instructions that are in flight between
// fetch and decode.
cpu->removeInstsUntil(seq_num, tid);
@@ -712,14 +776,26 @@ DefaultFetch<Impl>::updateFetchStatus()
template <class Impl>
void
-DefaultFetch<Impl>::squash(const Addr &new_PC, unsigned tid)
+DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+ bool squash_delay_slot, unsigned tid)
{
DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
doSquash(new_PC, tid);
+#if THE_ISA == ALPHA_ISA
// Tell the CPU to remove any instructions that are not in the ROB.
- cpu->removeInstsNotInROB(tid);
+ cpu->removeInstsNotInROB(tid, true, 0);
+#else
+ if (seq_num <= delaySlotInfo[tid].branchSeqNum) {
+ delaySlotInfo[tid].numInsts = 0;
+ delaySlotInfo[tid].targetAddr = 0;
+ delaySlotInfo[tid].targetReady = false;
+ }
+
+ // Tell the CPU to remove any instructions that are not in the ROB.
+ cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
+#endif
}
template <class Impl>
@@ -828,8 +904,16 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
"from commit.\n",tid);
+#if THE_ISA == ALPHA_ISA
+ InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
+#else
+ InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
+#endif
// In any case, squash.
- squash(fromCommit->commitInfo[tid].nextPC,tid);
+ squash(fromCommit->commitInfo[tid].nextPC,
+ doneSeqNum,
+ fromCommit->commitInfo[tid].squashDelaySlot,
+ tid);
// Also check if there's a mispredict that happened.
if (fromCommit->commitInfo[tid].branchMispredict) {
@@ -876,9 +960,15 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
}
if (fetchStatus[tid] != Squashing) {
+
+#if THE_ISA == ALPHA_ISA
+ InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
+#else
+ InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
+#endif
// Squash unless we're already squashing
squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
- fromDecode->decodeInfo[tid].doneSeqNum,
+ doneSeqNum,
tid);
return true;
@@ -984,6 +1074,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
}
Addr next_PC = fetch_PC;
+ Addr next_NPC = next_PC + instSize;
InstSeqNum inst_seq;
MachInst inst;
ExtMachInst ext_inst;
@@ -1002,10 +1093,13 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// ended this fetch block.
bool predicted_branch = false;
+ // Need to keep track of whether or not a delay slot
+ // instruction has been fetched
+
for (;
offset < cacheBlkSize &&
numInst < fetchWidth &&
- !predicted_branch;
+ (!predicted_branch || delaySlotInfo[tid].numInsts > 0);
++numInst) {
// Get a sequence number.
@@ -1042,7 +1136,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
instruction->staticInst,
instruction->readPC(),tid);
- predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
+ predicted_branch = lookupAndUpdateNextPC(instruction, next_PC,
+ next_NPC);
// Add instruction to the CPU's list of instructions.
instruction->setInstListIt(cpu->addInst(instruction));
@@ -1068,7 +1163,41 @@ DefaultFetch<Impl>::fetch(bool &status_change)
break;
}
- offset+= instSize;
+ offset += instSize;
+
+#if THE_ISA != ALPHA_ISA
+ if (predicted_branch) {
+ delaySlotInfo[tid].branchSeqNum = inst_seq;
+
+ DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n",
+ tid, inst_seq);
+ continue;
+ } else if (delaySlotInfo[tid].numInsts > 0) {
+ --delaySlotInfo[tid].numInsts;
+
+ // It's OK to set PC to target of branch
+ if (delaySlotInfo[tid].numInsts == 0) {
+ delaySlotInfo[tid].targetReady = true;
+
+ // Break the looping condition
+ predicted_branch = true;
+ }
+
+ DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to"
+ " process.\n", tid, delaySlotInfo[tid].numInsts);
+ }
+#endif
+ }
+
+ if (offset >= cacheBlkSize) {
+ DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
+ "block.\n", tid);
+ } else if (numInst >= fetchWidth) {
+ DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
+ "for this cycle.\n", tid);
+ } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) {
+ DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
+ "instruction encountered.\n", tid);
}
}
@@ -1085,12 +1214,19 @@ DefaultFetch<Impl>::fetch(bool &status_change)
PC[tid] = next_PC;
nextPC[tid] = next_PC + instSize;
#else
- PC[tid] = next_PC;
- nextPC[tid] = next_PC + instSize;
- nextPC[tid] = next_PC + instSize;
-
- thread->setNextPC(thread->readNextNPC());
- thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
+ if (delaySlotInfo[tid].targetReady &&
+ delaySlotInfo[tid].numInsts == 0) {
+ // Set PC to target
+ PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
+ nextPC[tid] = next_PC + instSize; //next_NPC
+ nextNPC[tid] = next_PC + (2 * instSize);
+
+ delaySlotInfo[tid].targetReady = false;
+ } else {
+ PC[tid] = next_PC;
+ nextPC[tid] = next_NPC;
+ nextNPC[tid] = next_NPC + instSize;
+ }
#endif
} else {
// We shouldn't be in an icache miss and also have a fault (an ITB