summaryrefslogtreecommitdiff
path: root/cpu/beta_cpu
diff options
context:
space:
mode:
Diffstat (limited to 'cpu/beta_cpu')
-rw-r--r--cpu/beta_cpu/2bit_local_pred.cc110
-rw-r--r--cpu/beta_cpu/2bit_local_pred.hh99
-rw-r--r--cpu/beta_cpu/alpha_dyn_inst.cc105
-rw-r--r--cpu/beta_cpu/alpha_dyn_inst.hh67
-rw-r--r--cpu/beta_cpu/alpha_dyn_inst_impl.hh109
-rw-r--r--cpu/beta_cpu/alpha_full_cpu.cc918
-rw-r--r--cpu/beta_cpu/alpha_full_cpu.hh13
-rw-r--r--cpu/beta_cpu/alpha_full_cpu_builder.cc306
-rw-r--r--cpu/beta_cpu/alpha_full_cpu_impl.hh690
-rw-r--r--cpu/beta_cpu/alpha_impl.hh48
-rw-r--r--cpu/beta_cpu/alpha_params.hh49
-rw-r--r--cpu/beta_cpu/bpred_unit.cc5
-rw-r--r--cpu/beta_cpu/bpred_unit.hh51
-rw-r--r--cpu/beta_cpu/bpred_unit_impl.hh13
-rw-r--r--cpu/beta_cpu/btb.cc85
-rw-r--r--cpu/beta_cpu/btb.hh52
-rw-r--r--cpu/beta_cpu/comm.hh65
-rw-r--r--cpu/beta_cpu/commit.hh28
-rw-r--r--cpu/beta_cpu/commit_impl.hh120
-rw-r--r--cpu/beta_cpu/cpu_policy.hh38
-rw-r--r--cpu/beta_cpu/decode.hh24
-rw-r--r--cpu/beta_cpu/decode_impl.hh79
-rw-r--r--cpu/beta_cpu/fetch.hh29
-rw-r--r--cpu/beta_cpu/fetch_impl.hh259
-rw-r--r--cpu/beta_cpu/free_list.cc23
-rw-r--r--cpu/beta_cpu/free_list.hh35
-rw-r--r--cpu/beta_cpu/full_cpu.cc85
-rw-r--r--cpu/beta_cpu/full_cpu.hh56
-rw-r--r--cpu/beta_cpu/iew.hh29
-rw-r--r--cpu/beta_cpu/iew_impl.hh156
-rw-r--r--cpu/beta_cpu/inst_queue.hh133
-rw-r--r--cpu/beta_cpu/inst_queue_impl.hh509
-rw-r--r--cpu/beta_cpu/mem_dep_unit.cc9
-rw-r--r--cpu/beta_cpu/mem_dep_unit.hh70
-rw-r--r--cpu/beta_cpu/mem_dep_unit_impl.hh166
-rw-r--r--cpu/beta_cpu/regfile.hh42
-rw-r--r--cpu/beta_cpu/rename.hh45
-rw-r--r--cpu/beta_cpu/rename_impl.hh397
-rw-r--r--cpu/beta_cpu/rename_map.cc76
-rw-r--r--cpu/beta_cpu/rename_map.hh15
-rw-r--r--cpu/beta_cpu/rob.hh43
-rw-r--r--cpu/beta_cpu/rob_impl.hh78
-rw-r--r--cpu/beta_cpu/store_set.cc192
-rw-r--r--cpu/beta_cpu/store_set.hh58
44 files changed, 3674 insertions, 1905 deletions
diff --git a/cpu/beta_cpu/2bit_local_pred.cc b/cpu/beta_cpu/2bit_local_pred.cc
new file mode 100644
index 000000000..88c39a9b0
--- /dev/null
+++ b/cpu/beta_cpu/2bit_local_pred.cc
@@ -0,0 +1,110 @@
+#include "base/trace.hh"
+#include "cpu/beta_cpu/2bit_local_pred.hh"
+
+DefaultBP::SatCounter::SatCounter(unsigned bits)
+ : maxVal((1 << bits) - 1), counter(0)
+{
+}
+
+DefaultBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val)
+ : maxVal((1 << bits) - 1), counter(initial_val)
+{
+ // Check to make sure initial value doesn't exceed the max counter value.
+ if (initial_val > maxVal) {
+ panic("BP: Initial counter value exceeds max size.");
+ }
+}
+
+void
+DefaultBP::SatCounter::increment()
+{
+ if(counter < maxVal) {
+ ++counter;
+ }
+}
+
+void
+DefaultBP::SatCounter::decrement()
+{
+ if(counter > 0) {
+ --counter;
+ }
+}
+
+DefaultBP::DefaultBP(unsigned _localPredictorSize,
+ unsigned _localCtrBits,
+ unsigned _instShiftAmt)
+ : localPredictorSize(_localPredictorSize),
+ localCtrBits(_localCtrBits),
+ instShiftAmt(_instShiftAmt)
+{
+ // Should do checks here to make sure sizes are correct (powers of 2).
+
+ // Setup the index mask.
+ indexMask = localPredictorSize - 1;
+
+ DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask);
+
+ // Setup the array of counters for the local predictor.
+ localCtrs = new SatCounter[localPredictorSize](localCtrBits);
+
+ DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n",
+ localPredictorSize);
+
+ DPRINTF(Fetch, "Branch predictor: local counter bits: %i\n", localCtrBits);
+
+ DPRINTF(Fetch, "Branch predictor: instruction shift amount: %i\n",
+ instShiftAmt);
+}
+
+inline
+bool
+DefaultBP::getPrediction(uint8_t &count)
+{
+ // Get the MSB of the count
+ return (count >> (localCtrBits - 1));
+}
+
+inline
+unsigned
+DefaultBP::getLocalIndex(Addr &branch_addr)
+{
+ return (branch_addr >> instShiftAmt) & indexMask;
+}
+
+bool
+DefaultBP::lookup(Addr &branch_addr)
+{
+ uint8_t local_prediction;
+ unsigned local_predictor_idx = getLocalIndex(branch_addr);
+
+ DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n",
+ local_predictor_idx);
+
+ local_prediction = localCtrs[local_predictor_idx].read();
+
+ DPRINTF(Fetch, "Branch predictor: prediction is %i.\n",
+ (int)local_prediction);
+
+ return getPrediction(local_prediction);
+}
+
+void
+DefaultBP::update(Addr &branch_addr, bool taken)
+{
+ unsigned local_predictor_idx;
+
+ // Update the local predictor.
+ local_predictor_idx = getLocalIndex(branch_addr);
+
+ DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n",
+ local_predictor_idx);
+
+ if (taken) {
+ DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n");
+ localCtrs[local_predictor_idx].increment();
+ } else {
+ DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n");
+ localCtrs[local_predictor_idx].decrement();
+ }
+}
diff --git a/cpu/beta_cpu/2bit_local_pred.hh b/cpu/beta_cpu/2bit_local_pred.hh
new file mode 100644
index 000000000..32a7972d0
--- /dev/null
+++ b/cpu/beta_cpu/2bit_local_pred.hh
@@ -0,0 +1,99 @@
+#ifndef __2BIT_LOCAL_PRED_HH__
+#define __2BIT_LOCAL_PRED_HH__
+
+// For Addr type.
+#include "arch/alpha/isa_traits.hh"
+
+class DefaultBP
+{
+ public:
+ /**
+ * Default branch predictor constructor.
+ */
+ DefaultBP(unsigned localPredictorSize, unsigned localCtrBits,
+ unsigned instShiftAmt);
+
+ /**
+ * Looks up the given address in the branch predictor and returns
+ * a true/false value as to whether it is taken.
+ * @param branch_addr The address of the branch to look up.
+ * @return Whether or not the branch is taken.
+ */
+ bool lookup(Addr &branch_addr);
+
+ /**
+ * Updates the branch predictor with the actual result of a branch.
+ * @param branch_addr The address of the branch to update.
+ * @param taken Whether or not the branch was taken.
+ */
+ void update(Addr &branch_addr, bool taken);
+
+ private:
+
+ inline bool getPrediction(uint8_t &count);
+
+ inline unsigned getLocalIndex(Addr &PC);
+
+ /**
+ * Private counter class for the internal saturating counters.
+ * Implements an n bit saturating counter and provides methods to
+ * increment, decrement, and read it.
+ * @todo Consider making this something that more closely mimics a
+ * built in class so you can use ++ or --.
+ */
+ class SatCounter
+ {
+ public:
+ /**
+ * Constructor for the counter.
+ * @param bits How many bits the counter will have.
+ */
+ SatCounter(unsigned bits);
+
+ /**
+ * Constructor for the counter.
+ * @param bits How many bits the counter will have.
+ * @param initial_val Starting value for each counter.
+ */
+ SatCounter(unsigned bits, unsigned initial_val);
+
+ /**
+ * Increments the counter's current value.
+ */
+ void increment();
+
+ /**
+ * Decrements the counter's current value.
+ */
+ void decrement();
+
+ /**
+ * Read the counter's value.
+ */
+ uint8_t read()
+ {
+ return counter;
+ }
+
+ private:
+ uint8_t maxVal;
+ uint8_t counter;
+ };
+
+ /** Array of counters that make up the local predictor. */
+ SatCounter *localCtrs;
+
+ /** Size of the local predictor. */
+ unsigned localPredictorSize;
+
+ /** Number of bits of the local predictor's counters. */
+ unsigned localCtrBits;
+
+ /** Number of bits to shift the PC when calculating index. */
+ unsigned instShiftAmt;
+
+ /** Mask to get index bits. */
+ unsigned indexMask;
+};
+
+#endif // __2BIT_LOCAL_PRED_HH__
diff --git a/cpu/beta_cpu/alpha_dyn_inst.cc b/cpu/beta_cpu/alpha_dyn_inst.cc
index a79d3082c..1bfcb8420 100644
--- a/cpu/beta_cpu/alpha_dyn_inst.cc
+++ b/cpu/beta_cpu/alpha_dyn_inst.cc
@@ -1,102 +1,7 @@
-#ifndef __ALPHA_DYN_INST_CC__
-#define __ALPHA_DYN_INST_CC__
-#include "cpu/beta_cpu/alpha_dyn_inst.hh"
+#include "cpu/beta_cpu/alpha_dyn_inst_impl.hh"
+#include "cpu/beta_cpu/alpha_impl.hh"
-// Force instantiation of BaseDynInst
-template BaseDynInst<AlphaSimpleImpl>;
-
-AlphaDynInst::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC,
- InstSeqNum seq_num, FullCPU *cpu)
- : BaseDynInst<AlphaSimpleImpl>(inst, PC, Pred_PC, seq_num, cpu)
-{
- // Initialize these to illegal values.
- robIdx = -1;
- iqIdx = -1;
-}
-
-AlphaDynInst::AlphaDynInst(StaticInstPtr<AlphaISA> &_staticInst)
- : BaseDynInst<AlphaSimpleImpl>(_staticInst)
-{
-}
-
-uint64_t
-AlphaDynInst::readUniq()
-{
- return cpu->readUniq();
-}
-
-void
-AlphaDynInst::setUniq(uint64_t val)
-{
- cpu->setUniq(val);
-}
-
-uint64_t
-AlphaDynInst::readFpcr()
-{
- return cpu->readFpcr();
-}
-
-void
-AlphaDynInst::setFpcr(uint64_t val)
-{
- cpu->setFpcr(val);
-}
-
-#ifdef FULL_SYSTEM
-uint64_t
-AlphaDynInst::readIpr(int idx, Fault &fault)
-{
- return cpu->readIpr(idx, fault);
-}
-Fault
-AlphaDynInst::setIpr(int idx, uint64_t val)
-{
- return cpu->setIpr(idx, val);
-}
-
-Fault
-AlphaDynInst::hwrei()
-{
- return cpu->hwrei();
-}
-
-int
-AlphaDynInst::readIntrFlag()
-{
-return cpu->readIntrFlag();
-}
-
-void
-AlphaDynInst::setIntrFlag(int val)
-{
- cpu->setIntrFlag(val);
-}
-
-bool
-AlphaDynInst::inPalMode()
-{
- return cpu->inPalMode();
-}
-
-void
-AlphaDynInst::trap(Fault fault)
-{
- cpu->trap(fault);
-}
-
-bool
-AlphaDynInst::simPalCheck(int palFunc)
-{
- return cpu->simPalCheck(palFunc);
-}
-#else
-void
-AlphaDynInst::syscall()
-{
- cpu->syscall();
-}
-#endif
-
-#endif // __ALPHA_DYN_INST_CC__
+// Force instantiation of AlphaDynInst for all the implementations that
+// are needed.
+template AlphaDynInst<AlphaSimpleImpl>;
diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh
index 69d145355..4e1cebd11 100644
--- a/cpu/beta_cpu/alpha_dyn_inst.hh
+++ b/cpu/beta_cpu/alpha_dyn_inst.hh
@@ -8,11 +8,38 @@
#include "cpu/beta_cpu/alpha_impl.hh"
#include "cpu/inst_seq.hh"
-using namespace std;
-
-class AlphaDynInst : public BaseDynInst<AlphaSimpleImpl>
+/**
+ * Mostly implementation specific AlphaDynInst. It is templated in case there
+ * are other implementations that are similar enough to be able to use this
+ * class without changes. This is mainly useful if there are multiple similar
+ * CPU implementations of the same ISA.
+ */
+
+template <class Impl>
+class AlphaDynInst : public BaseDynInst<Impl>
{
public:
+ // Typedef for the CPU.
+ typedef typename Impl::FullCPU FullCPU;
+
+ //Typedef to get the ISA.
+ typedef typename Impl::ISA ISA;
+
+ /// Binary machine instruction type.
+ typedef typename ISA::MachInst MachInst;
+ /// Memory address type.
+ typedef typename ISA::Addr Addr;
+ /// Logical register index type.
+ typedef typename ISA::RegIndex RegIndex;
+ /// Integer register index type.
+ typedef typename ISA::IntReg IntReg;
+
+ enum {
+ MaxInstSrcRegs = ISA::MaxInstSrcRegs, //< Max source regs
+ MaxInstDestRegs = ISA::MaxInstDestRegs, //< Max dest regs
+ };
+
+ public:
/** BaseDynInst constructor given a binary instruction. */
AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num,
FullCPU *cpu);
@@ -27,40 +54,6 @@ class AlphaDynInst : public BaseDynInst<AlphaSimpleImpl>
return fault;
}
- /** Location of this instruction within the ROB. Might be somewhat
- * implementation specific.
- * Might not want this data in the inst as it may be deleted prior to
- * execution of the stage that needs it.
- */
- int robIdx;
-
- int getROBEntry()
- {
- return robIdx;
- }
-
- void setROBEntry(int rob_idx)
- {
- robIdx = rob_idx;
- }
-
- /** Location of this instruction within the IQ. Might be somewhat
- * implementation specific.
- * Might not want this data in the inst as it may be deleted prior to
- * execution of the stage that needs it.
- */
- int iqIdx;
-
- int getIQEntry()
- {
- return iqIdx;
- }
-
- void setIQEntry(int iq_idx)
- {
- iqIdx = iq_idx;
- }
-
uint64_t readUniq();
void setUniq(uint64_t val);
diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh
new file mode 100644
index 000000000..8311067db
--- /dev/null
+++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh
@@ -0,0 +1,109 @@
+
+#include "cpu/beta_cpu/alpha_dyn_inst.hh"
+
+template <class Impl>
+AlphaDynInst<Impl>::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC,
+ InstSeqNum seq_num, FullCPU *cpu)
+ : BaseDynInst<AlphaSimpleImpl>(inst, PC, Pred_PC, seq_num, cpu)
+{
+}
+
+template <class Impl>
+AlphaDynInst<Impl>::AlphaDynInst(StaticInstPtr<AlphaISA> &_staticInst)
+ : BaseDynInst<AlphaSimpleImpl>(_staticInst)
+{
+}
+
+template <class Impl>
+uint64_t
+AlphaDynInst<Impl>::readUniq()
+{
+ return cpu->readUniq();
+}
+
+template <class Impl>
+void
+AlphaDynInst<Impl>::setUniq(uint64_t val)
+{
+ cpu->setUniq(val);
+}
+
+template <class Impl>
+uint64_t
+AlphaDynInst<Impl>::readFpcr()
+{
+ return cpu->readFpcr();
+}
+
+template <class Impl>
+void
+AlphaDynInst<Impl>::setFpcr(uint64_t val)
+{
+ cpu->setFpcr(val);
+}
+
+#ifdef FULL_SYSTEM
+template <class Impl>
+uint64_t
+AlphaDynInst<Impl>::readIpr(int idx, Fault &fault)
+{
+ return cpu->readIpr(idx, fault);
+}
+
+template <class Impl>
+Fault
+AlphaDynInst<Impl>::setIpr(int idx, uint64_t val)
+{
+ return cpu->setIpr(idx, val);
+}
+
+template <class Impl>
+Fault
+AlphaDynInst<Impl>::hwrei()
+{
+ return cpu->hwrei();
+}
+
+template <class Impl>
+int
+AlphaDynInst<Impl>::readIntrFlag()
+{
+return cpu->readIntrFlag();
+}
+
+template <class Impl>
+void
+AlphaDynInst<Impl>::setIntrFlag(int val)
+{
+ cpu->setIntrFlag(val);
+}
+
+template <class Impl>
+bool
+AlphaDynInst<Impl>::inPalMode()
+{
+ return cpu->inPalMode();
+}
+
+template <class Impl>
+void
+AlphaDynInst<Impl>::trap(Fault fault)
+{
+ cpu->trap(fault);
+}
+
+template <class Impl>
+bool
+AlphaDynInst<Impl>::simPalCheck(int palFunc)
+{
+ return cpu->simPalCheck(palFunc);
+}
+#else
+template <class Impl>
+void
+AlphaDynInst<Impl>::syscall()
+{
+ cpu->syscall();
+}
+#endif
+
diff --git a/cpu/beta_cpu/alpha_full_cpu.cc b/cpu/beta_cpu/alpha_full_cpu.cc
index 880418146..80c4bdec8 100644
--- a/cpu/beta_cpu/alpha_full_cpu.cc
+++ b/cpu/beta_cpu/alpha_full_cpu.cc
@@ -1,911 +1,9 @@
-#include "base/cprintf.hh"
-#include "base/statistics.hh"
-#include "base/timebuf.hh"
-#include "cpu/full_cpu/dd_queue.hh"
-#include "cpu/full_cpu/full_cpu.hh"
-#include "cpu/full_cpu/rob_station.hh"
-#include "mem/cache/cache.hh" // for dynamic cast
-#include "mem/mem_interface.hh"
-#include "sim/builder.hh"
-#include "sim/sim_events.hh"
-#include "sim/stats.hh"
-
-#include "cpu/beta_cpu/alpha_full_cpu.hh"
-#include "cpu/beta_cpu/alpha_params.hh"
-#include "cpu/beta_cpu/comm.hh"
-
-AlphaFullCPU::AlphaFullCPU(Params &params)
- : FullBetaCPU<AlphaSimpleImpl>(params)
-{
-
- fetch.setCPU(this);
- decode.setCPU(this);
- rename.setCPU(this);
- iew.setCPU(this);
- commit.setCPU(this);
-
- rob.setCPU(this);
-}
-
-#ifndef FULL_SYSTEM
-
-void
-AlphaFullCPU::syscall()
-{
- DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n");
-
- squashStages();
-
- // Copy over all important state to xc once all the unrolling is done.
- copyToXC();
-
- process->syscall(xc);
-
- // Copy over all important state back to normal.
- copyFromXC();
-}
-
-// This is not a pretty function, and should only be used if it is necessary
-// to fake having everything squash all at once (ie for non-full system
-// syscalls).
-void
-AlphaFullCPU::squashStages()
-{
- InstSeqNum rob_head = rob.readHeadSeqNum();
-
- // Now hack the time buffer to put this sequence number in the places
- // where the stages might read it.
- for (int i = 0; i < 10; ++i)
- {
- timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head;
- }
-
- fetch.squash(rob.readHeadNextPC());
- fetchQueue.advance();
-
- decode.squash();
- decodeQueue.advance();
-
- rename.squash();
- renameQueue.advance();
- renameQueue.advance();
-
- iew.squash();
- iewQueue.advance();
- iewQueue.advance();
-
- rob.squash(rob_head);
- commit.setSquashing();
-}
-
-#endif // FULL_SYSTEM
-
-void
-AlphaFullCPU::copyToXC()
-{
- PhysRegIndex renamed_reg;
-
- // First loop through the integer registers.
- for (int i = 0; i < AlphaISA::NumIntRegs; ++i)
- {
- renamed_reg = renameMap.lookup(i);
- xc->regs.intRegFile[i] = regFile.intRegFile[renamed_reg];
- DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n",
- renamed_reg, regFile.intRegFile[renamed_reg]);
- }
-
- // Then loop through the floating point registers.
- for (int i = 0; i < AlphaISA::NumFloatRegs; ++i)
- {
- renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag);
- xc->regs.floatRegFile.d[i] = regFile.floatRegFile[renamed_reg].d;
- xc->regs.floatRegFile.q[i] = regFile.floatRegFile[renamed_reg].q;
- }
-
- xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr;
- xc->regs.miscRegs.uniq = regFile.miscRegs.uniq;
- xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag;
- xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr;
-
- xc->regs.pc = rob.readHeadPC();
- xc->regs.npc = xc->regs.pc+4;
-
- xc->func_exe_inst = funcExeInst;
-}
-
-// This function will probably mess things up unless the ROB is empty and
-// there are no instructions in the pipeline.
-void
-AlphaFullCPU::copyFromXC()
-{
- PhysRegIndex renamed_reg;
-
- // First loop through the integer registers.
- for (int i = 0; i < AlphaISA::NumIntRegs; ++i)
- {
- renamed_reg = renameMap.lookup(i);
-
- DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, "
- "now has data %lli.\n",
- renamed_reg, regFile.intRegFile[renamed_reg],
- xc->regs.intRegFile[i]);
-
- regFile.intRegFile[renamed_reg] = xc->regs.intRegFile[i];
- }
-
- // Then loop through the floating point registers.
- for (int i = 0; i < AlphaISA::NumFloatRegs; ++i)
- {
- renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag);
- regFile.floatRegFile[renamed_reg].d = xc->regs.floatRegFile.d[i];
- regFile.floatRegFile[renamed_reg].q = xc->regs.floatRegFile.q[i] ;
- }
-
- // Then loop through the misc registers.
- regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr;
- regFile.miscRegs.uniq = xc->regs.miscRegs.uniq;
- regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag;
- regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr;
-
- // Then finally set the PC and the next PC.
-// regFile.pc = xc->regs.pc;
-// regFile.npc = xc->regs.npc;
-
- funcExeInst = xc->func_exe_inst;
-}
-
-#ifdef FULL_SYSTEM
-
-uint64_t *
-AlphaFullCPU::getIpr()
-{
- return regs.ipr;
-}
-
-uint64_t
-AlphaFullCPU::readIpr(int idx, Fault &fault)
-{
- uint64_t *ipr = getIpr();
- uint64_t retval = 0; // return value, default 0
-
- switch (idx) {
- case AlphaISA::IPR_PALtemp0:
- case AlphaISA::IPR_PALtemp1:
- case AlphaISA::IPR_PALtemp2:
- case AlphaISA::IPR_PALtemp3:
- case AlphaISA::IPR_PALtemp4:
- case AlphaISA::IPR_PALtemp5:
- case AlphaISA::IPR_PALtemp6:
- case AlphaISA::IPR_PALtemp7:
- case AlphaISA::IPR_PALtemp8:
- case AlphaISA::IPR_PALtemp9:
- case AlphaISA::IPR_PALtemp10:
- case AlphaISA::IPR_PALtemp11:
- case AlphaISA::IPR_PALtemp12:
- case AlphaISA::IPR_PALtemp13:
- case AlphaISA::IPR_PALtemp14:
- case AlphaISA::IPR_PALtemp15:
- case AlphaISA::IPR_PALtemp16:
- case AlphaISA::IPR_PALtemp17:
- case AlphaISA::IPR_PALtemp18:
- case AlphaISA::IPR_PALtemp19:
- case AlphaISA::IPR_PALtemp20:
- case AlphaISA::IPR_PALtemp21:
- case AlphaISA::IPR_PALtemp22:
- case AlphaISA::IPR_PALtemp23:
- case AlphaISA::IPR_PAL_BASE:
-
- case AlphaISA::IPR_IVPTBR:
- case AlphaISA::IPR_DC_MODE:
- case AlphaISA::IPR_MAF_MODE:
- case AlphaISA::IPR_ISR:
- case AlphaISA::IPR_EXC_ADDR:
- case AlphaISA::IPR_IC_PERR_STAT:
- case AlphaISA::IPR_DC_PERR_STAT:
- case AlphaISA::IPR_MCSR:
- case AlphaISA::IPR_ASTRR:
- case AlphaISA::IPR_ASTER:
- case AlphaISA::IPR_SIRR:
- case AlphaISA::IPR_ICSR:
- case AlphaISA::IPR_ICM:
- case AlphaISA::IPR_DTB_CM:
- case AlphaISA::IPR_IPLR:
- case AlphaISA::IPR_INTID:
- case AlphaISA::IPR_PMCTR:
- // no side-effect
- retval = ipr[idx];
- break;
-
- case AlphaISA::IPR_CC:
- retval |= ipr[idx] & ULL(0xffffffff00000000);
- retval |= curTick & ULL(0x00000000ffffffff);
- break;
-
- case AlphaISA::IPR_VA:
- retval = ipr[idx];
- break;
-
- case AlphaISA::IPR_VA_FORM:
- case AlphaISA::IPR_MM_STAT:
- case AlphaISA::IPR_IFAULT_VA_FORM:
- case AlphaISA::IPR_EXC_MASK:
- case AlphaISA::IPR_EXC_SUM:
- retval = ipr[idx];
- break;
-
- case AlphaISA::IPR_DTB_PTE:
- {
- AlphaISA::PTE &pte = dtb->index(!misspeculating());
-
- retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
- retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
- retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
- retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
- retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
- retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
- retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
- }
- break;
-
- // write only registers
- case AlphaISA::IPR_HWINT_CLR:
- case AlphaISA::IPR_SL_XMIT:
- case AlphaISA::IPR_DC_FLUSH:
- case AlphaISA::IPR_IC_FLUSH:
- case AlphaISA::IPR_ALT_MODE:
- case AlphaISA::IPR_DTB_IA:
- case AlphaISA::IPR_DTB_IAP:
- case AlphaISA::IPR_ITB_IA:
- case AlphaISA::IPR_ITB_IAP:
- fault = Unimplemented_Opcode_Fault;
- break;
-
- default:
- // invalid IPR
- fault = Unimplemented_Opcode_Fault;
- break;
- }
-
- return retval;
-}
-
-Fault
-AlphaFullCPU::setIpr(int idx, uint64_t val)
-{
- uint64_t *ipr = getIpr();
- uint64_t old;
-
- if (misspeculating())
- return No_Fault;
-
- switch (idx) {
- case AlphaISA::IPR_PALtemp0:
- case AlphaISA::IPR_PALtemp1:
- case AlphaISA::IPR_PALtemp2:
- case AlphaISA::IPR_PALtemp3:
- case AlphaISA::IPR_PALtemp4:
- case AlphaISA::IPR_PALtemp5:
- case AlphaISA::IPR_PALtemp6:
- case AlphaISA::IPR_PALtemp7:
- case AlphaISA::IPR_PALtemp8:
- case AlphaISA::IPR_PALtemp9:
- case AlphaISA::IPR_PALtemp10:
- case AlphaISA::IPR_PALtemp11:
- case AlphaISA::IPR_PALtemp12:
- case AlphaISA::IPR_PALtemp13:
- case AlphaISA::IPR_PALtemp14:
- case AlphaISA::IPR_PALtemp15:
- case AlphaISA::IPR_PALtemp16:
- case AlphaISA::IPR_PALtemp17:
- case AlphaISA::IPR_PALtemp18:
- case AlphaISA::IPR_PALtemp19:
- case AlphaISA::IPR_PALtemp20:
- case AlphaISA::IPR_PALtemp21:
- case AlphaISA::IPR_PALtemp22:
- case AlphaISA::IPR_PAL_BASE:
- case AlphaISA::IPR_IC_PERR_STAT:
- case AlphaISA::IPR_DC_PERR_STAT:
- case AlphaISA::IPR_PMCTR:
- // write entire quad w/ no side-effect
- ipr[idx] = val;
- break;
-
- case AlphaISA::IPR_CC_CTL:
- // This IPR resets the cycle counter. We assume this only
- // happens once... let's verify that.
- assert(ipr[idx] == 0);
- ipr[idx] = 1;
- break;
-
- case AlphaISA::IPR_CC:
- // This IPR only writes the upper 64 bits. It's ok to write
- // all 64 here since we mask out the lower 32 in rpcc (see
- // isa_desc).
- ipr[idx] = val;
- break;
-
- case AlphaISA::IPR_PALtemp23:
- // write entire quad w/ no side-effect
- old = ipr[idx];
- ipr[idx] = val;
- kernelStats.context(old, val);
- break;
-
- case AlphaISA::IPR_DTB_PTE:
- // write entire quad w/ no side-effect, tag is forthcoming
- ipr[idx] = val;
- break;
-
- case AlphaISA::IPR_EXC_ADDR:
- // second least significant bit in PC is always zero
- ipr[idx] = val & ~2;
- break;
-
- case AlphaISA::IPR_ASTRR:
- case AlphaISA::IPR_ASTER:
- // only write least significant four bits - privilege mask
- ipr[idx] = val & 0xf;
- break;
-
- case AlphaISA::IPR_IPLR:
-#ifdef DEBUG
- if (break_ipl != -1 && break_ipl == (val & 0x1f))
- debug_break();
-#endif
-
- // only write least significant five bits - interrupt level
- ipr[idx] = val & 0x1f;
- kernelStats.swpipl(ipr[idx]);
- break;
-
- case AlphaISA::IPR_DTB_CM:
- kernelStats.mode((val & 0x18) != 0);
-
- case AlphaISA::IPR_ICM:
- // only write two mode bits - processor mode
- ipr[idx] = val & 0x18;
- break;
-
- case AlphaISA::IPR_ALT_MODE:
- // only write two mode bits - processor mode
- ipr[idx] = val & 0x18;
- break;
-
- case AlphaISA::IPR_MCSR:
- // more here after optimization...
- ipr[idx] = val;
- break;
-
- case AlphaISA::IPR_SIRR:
- // only write software interrupt mask
- ipr[idx] = val & 0x7fff0;
- break;
-
- case AlphaISA::IPR_ICSR:
- ipr[idx] = val & ULL(0xffffff0300);
- break;
-
- case AlphaISA::IPR_IVPTBR:
- case AlphaISA::IPR_MVPTBR:
- ipr[idx] = val & ULL(0xffffffffc0000000);
- break;
-
- case AlphaISA::IPR_DC_TEST_CTL:
- ipr[idx] = val & 0x1ffb;
- break;
-
- case AlphaISA::IPR_DC_MODE:
- case AlphaISA::IPR_MAF_MODE:
- ipr[idx] = val & 0x3f;
- break;
-
- case AlphaISA::IPR_ITB_ASN:
- ipr[idx] = val & 0x7f0;
- break;
-
- case AlphaISA::IPR_DTB_ASN:
- ipr[idx] = val & ULL(0xfe00000000000000);
- break;
-
- case AlphaISA::IPR_EXC_SUM:
- case AlphaISA::IPR_EXC_MASK:
- // any write to this register clears it
- ipr[idx] = 0;
- break;
-
- case AlphaISA::IPR_INTID:
- case AlphaISA::IPR_SL_RCV:
- case AlphaISA::IPR_MM_STAT:
- case AlphaISA::IPR_ITB_PTE_TEMP:
- case AlphaISA::IPR_DTB_PTE_TEMP:
- // read-only registers
- return Unimplemented_Opcode_Fault;
-
- case AlphaISA::IPR_HWINT_CLR:
- case AlphaISA::IPR_SL_XMIT:
- case AlphaISA::IPR_DC_FLUSH:
- case AlphaISA::IPR_IC_FLUSH:
- // the following are write only
- ipr[idx] = val;
- break;
-
- case AlphaISA::IPR_DTB_IA:
- // really a control write
- ipr[idx] = 0;
-
- dtb->flushAll();
- break;
-
- case AlphaISA::IPR_DTB_IAP:
- // really a control write
- ipr[idx] = 0;
-
- dtb->flushProcesses();
- break;
-
- case AlphaISA::IPR_DTB_IS:
- // really a control write
- ipr[idx] = val;
-
- dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]));
- break;
-
- case AlphaISA::IPR_DTB_TAG: {
- struct AlphaISA::PTE pte;
-
- // FIXME: granularity hints NYI...
- if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0)
- panic("PTE GH field != 0");
-
- // write entire quad
- ipr[idx] = val;
-
- // construct PTE for new entry
- pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]);
-
- // insert new TAG/PTE value into data TLB
- dtb->insert(val, pte);
- }
- break;
-
- case AlphaISA::IPR_ITB_PTE: {
- struct AlphaISA::PTE pte;
-
- // FIXME: granularity hints NYI...
- if (ITB_PTE_GH(val) != 0)
- panic("PTE GH field != 0");
-
- // write entire quad
- ipr[idx] = val;
-
- // construct PTE for new entry
- pte.ppn = ITB_PTE_PPN(val);
- pte.xre = ITB_PTE_XRE(val);
- pte.xwe = 0;
- pte.fonr = ITB_PTE_FONR(val);
- pte.fonw = ITB_PTE_FONW(val);
- pte.asma = ITB_PTE_ASMA(val);
- pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]);
-
- // insert new TAG/PTE value into data TLB
- itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte);
- }
- break;
-
- case AlphaISA::IPR_ITB_IA:
- // really a control write
- ipr[idx] = 0;
-
- itb->flushAll();
- break;
-
- case AlphaISA::IPR_ITB_IAP:
- // really a control write
- ipr[idx] = 0;
-
- itb->flushProcesses();
- break;
-
- case AlphaISA::IPR_ITB_IS:
- // really a control write
- ipr[idx] = val;
-
- itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]));
- break;
-
- default:
- // invalid IPR
- return Unimplemented_Opcode_Fault;
- }
-
- // no error...
- return No_Fault;
-
-}
-
-int
-AlphaFullCPU::readIntrFlag()
-{
- return regs.intrflag;
-}
-
-void
-AlphaFullCPU::setIntrFlag(int val)
-{
- regs.intrflag = val;
-}
-
-// Maybe have this send back from IEW stage to squash and update PC.
-Fault
-AlphaFullCPU::hwrei()
-{
- uint64_t *ipr = getIpr();
-
- if (!PC_PAL(regs.pc))
- return Unimplemented_Opcode_Fault;
-
- setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]);
-
- if (!misspeculating()) {
- kernelStats.hwrei();
-
- if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0)
- AlphaISA::swap_palshadow(&regs, false);
-
- AlphaISA::check_interrupts = true;
- }
-
- // FIXME: XXX check for interrupts? XXX
- return No_Fault;
-}
-
-bool
-AlphaFullCPU::inPalMode()
-{
- return PC_PAL(readPC());
-}
-
-bool
-AlphaFullCPU::simPalCheck(int palFunc)
-{
- kernelStats.callpal(palFunc);
-
- switch (palFunc) {
- case PAL::halt:
- halt();
- if (--System::numSystemsRunning == 0)
- new SimExitEvent("all cpus halted");
- break;
-
- case PAL::bpt:
- case PAL::bugchk:
- if (system->breakpoint())
- return false;
- break;
- }
-
- return true;
-}
-
-// Probably shouldn't be able to switch to the trap handler as quickly as
-// this. Also needs to get the exception restart address from the commit
-// stage.
-void
-AlphaFullCPU::trap(Fault fault)
-{
- uint64_t PC = commit.readPC();
-
- DPRINTF(Fault, "Fault %s\n", FaultName(fault));
- Stats::recordEvent(csprintf("Fault %s", FaultName(fault)));
-
- assert(!misspeculating());
- kernelStats.fault(fault);
-
- if (fault == Arithmetic_Fault)
- panic("Arithmetic traps are unimplemented!");
-
- AlphaISA::InternalProcReg *ipr = getIpr();
-
- // exception restart address - Get the commit PC
- if (fault != Interrupt_Fault || !PC_PAL(PC))
- ipr[AlphaISA::IPR_EXC_ADDR] = PC;
-
- if (fault == Pal_Fault || fault == Arithmetic_Fault /* ||
- fault == Interrupt_Fault && !PC_PAL(regs.pc) */) {
- // traps... skip faulting instruction
- ipr[AlphaISA::IPR_EXC_ADDR] += 4;
- }
-
- if (!PC_PAL(PC))
- AlphaISA::swap_palshadow(&regs, true);
-
- setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] );
- setNextPC(PC + sizeof(MachInst));
-}
-
-void
-AlphaFullCPU::processInterrupts()
-{
- // Check for interrupts here. For now can copy the code that exists
- // within isa_fullsys_traits.hh.
-}
-
-// swap_palshadow swaps in the values of the shadow registers and
-// swaps them with the values of the physical registers that map to the
-// same logical index.
-void
-AlphaFullCPU::swap_palshadow(RegFile *regs, bool use_shadow)
-{
- if (palShadowEnabled == use_shadow)
- panic("swap_palshadow: wrong PAL shadow state");
-
- palShadowEnabled = use_shadow;
-
- // Will have to lookup in rename map to get physical registers, then
- // swap.
- for (int i = 0; i < AlphaISA::NumIntRegs; i++) {
- if (reg_redir[i]) {
- AlphaISA::IntReg temp = regs->intRegFile[i];
- regs->intRegFile[i] = regs->palregs[i];
- regs->palregs[i] = temp;
- }
- }
-}
-
-#endif // FULL_SYSTEM
-
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaFullCPU)
-
- Param<int> numThreads;
-
-#ifdef FULL_SYSTEM
-SimObjectParam<System *> system;
-SimObjectParam<AlphaITB *> itb;
-SimObjectParam<AlphaDTB *> dtb;
-Param<int> mult;
-#else
-SimObjectVectorParam<Process *> workload;
-SimObjectParam<Process *> process;
-Param<short> asid;
-#endif // FULL_SYSTEM
-SimObjectParam<FunctionalMemory *> mem;
-
-Param<Counter> max_insts_any_thread;
-Param<Counter> max_insts_all_threads;
-Param<Counter> max_loads_any_thread;
-Param<Counter> max_loads_all_threads;
-
-SimObjectParam<BaseCache *> icache;
-SimObjectParam<BaseCache *> dcache;
-
-Param<unsigned> decodeToFetchDelay;
-Param<unsigned> renameToFetchDelay;
-Param<unsigned> iewToFetchDelay;
-Param<unsigned> commitToFetchDelay;
-Param<unsigned> fetchWidth;
-
-Param<unsigned> renameToDecodeDelay;
-Param<unsigned> iewToDecodeDelay;
-Param<unsigned> commitToDecodeDelay;
-Param<unsigned> fetchToDecodeDelay;
-Param<unsigned> decodeWidth;
-
-Param<unsigned> iewToRenameDelay;
-Param<unsigned> commitToRenameDelay;
-Param<unsigned> decodeToRenameDelay;
-Param<unsigned> renameWidth;
-
-Param<unsigned> commitToIEWDelay;
-Param<unsigned> renameToIEWDelay;
-Param<unsigned> issueToExecuteDelay;
-Param<unsigned> issueWidth;
-Param<unsigned> executeWidth;
-Param<unsigned> executeIntWidth;
-Param<unsigned> executeFloatWidth;
-
-Param<unsigned> iewToCommitDelay;
-Param<unsigned> renameToROBDelay;
-Param<unsigned> commitWidth;
-Param<unsigned> squashWidth;
-
-Param<unsigned> numPhysIntRegs;
-Param<unsigned> numPhysFloatRegs;
-Param<unsigned> numIQEntries;
-Param<unsigned> numROBEntries;
-
-Param<bool> defReg;
-
-END_DECLARE_SIM_OBJECT_PARAMS(AlphaFullCPU)
-
-BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaFullCPU)
-
- INIT_PARAM(numThreads, "number of HW thread contexts"),
-
-#ifdef FULL_SYSTEM
- INIT_PARAM(system, "System object"),
- INIT_PARAM(itb, "Instruction translation buffer"),
- INIT_PARAM(dtb, "Data translation buffer"),
- INIT_PARAM_DFLT(mult, "System clock multiplier", 1),
-#else
- INIT_PARAM(workload, "Processes to run"),
- INIT_PARAM_DFLT(process, "Process to run", NULL),
- INIT_PARAM(asid, "Address space ID"),
-#endif // FULL_SYSTEM
-
- INIT_PARAM_DFLT(mem, "Memory", NULL),
-
- INIT_PARAM_DFLT(max_insts_any_thread,
- "Terminate when any thread reaches this inst count",
- 0),
- INIT_PARAM_DFLT(max_insts_all_threads,
- "Terminate when all threads have reached"
- "this inst count",
- 0),
- INIT_PARAM_DFLT(max_loads_any_thread,
- "Terminate when any thread reaches this load count",
- 0),
- INIT_PARAM_DFLT(max_loads_all_threads,
- "Terminate when all threads have reached this load"
- "count",
- 0),
-
- INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
- INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
-
- INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
- INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
- INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
- "delay"),
- INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
- INIT_PARAM(fetchWidth, "Fetch width"),
-
- INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
- INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
- "delay"),
- INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
- INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
- INIT_PARAM(decodeWidth, "Decode width"),
-
- INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
- "delay"),
- INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
- INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
- INIT_PARAM(renameWidth, "Rename width"),
-
- INIT_PARAM(commitToIEWDelay, "Commit to "
- "Issue/Execute/Writeback delay"),
- INIT_PARAM(renameToIEWDelay, "Rename to "
- "Issue/Execute/Writeback delay"),
- INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
- "to the IEW stage)"),
- INIT_PARAM(issueWidth, "Issue width"),
- INIT_PARAM(executeWidth, "Execute width"),
- INIT_PARAM(executeIntWidth, "Integer execute width"),
- INIT_PARAM(executeFloatWidth, "Floating point execute width"),
-
- INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
- "delay"),
- INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
- INIT_PARAM(commitWidth, "Commit width"),
- INIT_PARAM(squashWidth, "Squash width"),
-
- INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
- INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
- "registers"),
- INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
- INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
-
- INIT_PARAM(defReg, "Defer registration")
-
-END_INIT_SIM_OBJECT_PARAMS(AlphaFullCPU)
-
-CREATE_SIM_OBJECT(AlphaFullCPU)
-{
- AlphaFullCPU *cpu;
-
-#ifdef FULL_SYSTEM
- if (mult != 1)
- panic("Processor clock multiplier must be 1?\n");
-
- // Full-system only supports a single thread for the moment.
- int actual_num_threads = 1;
-#else
- // In non-full-system mode, we infer the number of threads from
- // the workload if it's not explicitly specified.
- int actual_num_threads =
- numThreads.isValid() ? numThreads : workload.size();
-
- if (workload.size() == 0) {
- fatal("Must specify at least one workload!");
- }
-
- Process *actual_process;
-
- if (process == NULL) {
- actual_process = workload[0];
- } else {
- actual_process = process;
- }
-
-#endif
-
- AlphaSimpleParams params;
-
- params.name = getInstanceName();
- params.numberOfThreads = actual_num_threads;
-
-#ifdef FULL_SYSTEM
- params._system = system;
- params.itb = itb;
- params.dtb = dtb;
- params.freq = ticksPerSecond * mult;
-#else
- params.workload = workload;
- params.process = actual_process;
- params.asid = asid;
-#endif // FULL_SYSTEM
-
- params.mem = mem;
-
- params.maxInstsAnyThread = max_insts_any_thread;
- params.maxInstsAllThreads = max_insts_all_threads;
- params.maxLoadsAnyThread = max_loads_any_thread;
- params.maxLoadsAllThreads = max_loads_all_threads;
-
- //
- // Caches
- //
- params.icacheInterface = icache ? icache->getInterface() : NULL;
- params.dcacheInterface = dcache ? dcache->getInterface() : NULL;
-
- params.decodeToFetchDelay = decodeToFetchDelay;
- params.renameToFetchDelay = renameToFetchDelay;
- params.iewToFetchDelay = iewToFetchDelay;
- params.commitToFetchDelay = commitToFetchDelay;
- params.fetchWidth = fetchWidth;
-
- params.renameToDecodeDelay = renameToDecodeDelay;
- params.iewToDecodeDelay = iewToDecodeDelay;
- params.commitToDecodeDelay = commitToDecodeDelay;
- params.fetchToDecodeDelay = fetchToDecodeDelay;
- params.decodeWidth = decodeWidth;
-
- params.iewToRenameDelay = iewToRenameDelay;
- params.commitToRenameDelay = commitToRenameDelay;
- params.decodeToRenameDelay = decodeToRenameDelay;
- params.renameWidth = renameWidth;
-
- params.commitToIEWDelay = commitToIEWDelay;
- params.renameToIEWDelay = renameToIEWDelay;
- params.issueToExecuteDelay = issueToExecuteDelay;
- params.issueWidth = issueWidth;
- params.executeWidth = executeWidth;
- params.executeIntWidth = executeIntWidth;
- params.executeFloatWidth = executeFloatWidth;
-
- params.iewToCommitDelay = iewToCommitDelay;
- params.renameToROBDelay = renameToROBDelay;
- params.commitWidth = commitWidth;
- params.squashWidth = squashWidth;
-
- params.numPhysIntRegs = numPhysIntRegs;
- params.numPhysFloatRegs = numPhysFloatRegs;
- params.numIQEntries = numIQEntries;
- params.numROBEntries = numROBEntries;
-
- params.defReg = defReg;
-
- cpu = new AlphaFullCPU(params);
-
- return cpu;
-}
-
-REGISTER_SIM_OBJECT("AlphaFullCPU", AlphaFullCPU)
-
+#include "cpu/beta_cpu/alpha_impl.hh"
+#include "cpu/beta_cpu/alpha_full_cpu_impl.hh"
+#include "cpu/beta_cpu/alpha_dyn_inst.hh"
+
+// Force instantiation of AlphaFullCPU for all the implemntations that are
+// needed. Consider merging this and alpha_dyn_inst.cc, and maybe all
+// classes that depend on a certain impl, into one file (alpha_impl.cc?).
+template AlphaFullCPU<AlphaSimpleImpl>;
diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh
index b098aaac1..0e094b122 100644
--- a/cpu/beta_cpu/alpha_full_cpu.hh
+++ b/cpu/beta_cpu/alpha_full_cpu.hh
@@ -6,18 +6,19 @@
#ifndef __ALPHA_FULL_CPU_HH__
#define __ALPHA_FULL_CPU_HH__
-// To include: comm, impl, full cpu, ITB/DTB if full sys,
-#include "cpu/beta_cpu/comm.hh"
-#include "cpu/beta_cpu/alpha_impl.hh"
+// To include: comm, full cpu, ITB/DTB if full sys,
+//#include "cpu/beta_cpu/comm.hh"
+//#include "cpu/beta_cpu/alpha_impl.hh"
#include "cpu/beta_cpu/full_cpu.hh"
using namespace std;
-class AlphaFullCPU : public FullBetaCPU<AlphaSimpleImpl>
+template <class Impl>
+class AlphaFullCPU : public FullBetaCPU<Impl>
{
public:
- typedef AlphaSimpleImpl::ISA AlphaISA;
- typedef AlphaSimpleImpl::Params Params;
+ typedef typename Impl::ISA AlphaISA;
+ typedef typename Impl::Params Params;
public:
AlphaFullCPU(Params &params);
diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc
new file mode 100644
index 000000000..5fe96d656
--- /dev/null
+++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc
@@ -0,0 +1,306 @@
+#include "cpu/beta_cpu/alpha_impl.hh"
+#include "cpu/beta_cpu/alpha_full_cpu.hh"
+
+#include "mem/cache/base_cache.hh"
+
+#include "base/inifile.hh"
+#include "base/loader/symtab.hh"
+#include "base/misc.hh"
+#include "cpu/base_cpu.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/exetrace.hh"
+#include "mem/base_mem.hh"
+#include "mem/mem_interface.hh"
+#include "sim/builder.hh"
+#include "sim/debug.hh"
+#include "sim/host.hh"
+#include "sim/process.hh"
+#include "sim/sim_events.hh"
+#include "sim/sim_object.hh"
+#include "sim/stats.hh"
+
+#ifdef FULL_SYSTEM
+#include "base/remote_gdb.hh"
+#include "dev/alpha_access.h"
+#include "dev/pciareg.h"
+#include "mem/functional_mem/memory_control.hh"
+#include "mem/functional_mem/physical_memory.hh"
+#include "sim/system.hh"
+#include "targetarch/alpha_memory.hh"
+#include "targetarch/vtophys.hh"
+#else // !FULL_SYSTEM
+#include "eio/eio.hh"
+#include "mem/functional_mem/functional_memory.hh"
+#endif // FULL_SYSTEM
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU)
+
+ Param<int> numThreads;
+
+#ifdef FULL_SYSTEM
+SimObjectParam<System *> system;
+SimObjectParam<AlphaITB *> itb;
+SimObjectParam<AlphaDTB *> dtb;
+Param<int> mult;
+#else
+SimObjectVectorParam<Process *> workload;
+SimObjectParam<Process *> process;
+Param<short> asid;
+#endif // FULL_SYSTEM
+SimObjectParam<FunctionalMemory *> mem;
+
+Param<Counter> max_insts_any_thread;
+Param<Counter> max_insts_all_threads;
+Param<Counter> max_loads_any_thread;
+Param<Counter> max_loads_all_threads;
+
+SimObjectParam<BaseCache *> icache;
+SimObjectParam<BaseCache *> dcache;
+
+Param<unsigned> decodeToFetchDelay;
+Param<unsigned> renameToFetchDelay;
+Param<unsigned> iewToFetchDelay;
+Param<unsigned> commitToFetchDelay;
+Param<unsigned> fetchWidth;
+
+Param<unsigned> renameToDecodeDelay;
+Param<unsigned> iewToDecodeDelay;
+Param<unsigned> commitToDecodeDelay;
+Param<unsigned> fetchToDecodeDelay;
+Param<unsigned> decodeWidth;
+
+Param<unsigned> iewToRenameDelay;
+Param<unsigned> commitToRenameDelay;
+Param<unsigned> decodeToRenameDelay;
+Param<unsigned> renameWidth;
+
+Param<unsigned> commitToIEWDelay;
+Param<unsigned> renameToIEWDelay;
+Param<unsigned> issueToExecuteDelay;
+Param<unsigned> issueWidth;
+Param<unsigned> executeWidth;
+Param<unsigned> executeIntWidth;
+Param<unsigned> executeFloatWidth;
+
+Param<unsigned> iewToCommitDelay;
+Param<unsigned> renameToROBDelay;
+Param<unsigned> commitWidth;
+Param<unsigned> squashWidth;
+
+Param<unsigned> localPredictorSize;
+Param<unsigned> localPredictorCtrBits;
+Param<unsigned> BTBEntries;
+Param<unsigned> BTBTagSize;
+
+Param<unsigned> numPhysIntRegs;
+Param<unsigned> numPhysFloatRegs;
+Param<unsigned> numIQEntries;
+Param<unsigned> numROBEntries;
+
+Param<unsigned> instShiftAmt;
+
+Param<bool> defReg;
+
+END_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
+
+ INIT_PARAM(numThreads, "number of HW thread contexts"),
+
+#ifdef FULL_SYSTEM
+ INIT_PARAM(system, "System object"),
+ INIT_PARAM(itb, "Instruction translation buffer"),
+ INIT_PARAM(dtb, "Data translation buffer"),
+ INIT_PARAM_DFLT(mult, "System clock multiplier", 1),
+#else
+ INIT_PARAM(workload, "Processes to run"),
+ INIT_PARAM_DFLT(process, "Process to run", NULL),
+ INIT_PARAM(asid, "Address space ID"),
+#endif // FULL_SYSTEM
+
+ INIT_PARAM_DFLT(mem, "Memory", NULL),
+
+ INIT_PARAM_DFLT(max_insts_any_thread,
+ "Terminate when any thread reaches this inst count",
+ 0),
+ INIT_PARAM_DFLT(max_insts_all_threads,
+ "Terminate when all threads have reached"
+ "this inst count",
+ 0),
+ INIT_PARAM_DFLT(max_loads_any_thread,
+ "Terminate when any thread reaches this load count",
+ 0),
+ INIT_PARAM_DFLT(max_loads_all_threads,
+ "Terminate when all threads have reached this load"
+ "count",
+ 0),
+
+ INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
+ INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
+
+ INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
+ INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
+ INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
+ "delay"),
+ INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
+ INIT_PARAM(fetchWidth, "Fetch width"),
+
+ INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
+ INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
+ "delay"),
+ INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
+ INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
+ INIT_PARAM(decodeWidth, "Decode width"),
+
+ INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
+ "delay"),
+ INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
+ INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
+ INIT_PARAM(renameWidth, "Rename width"),
+
+ INIT_PARAM(commitToIEWDelay, "Commit to "
+ "Issue/Execute/Writeback delay"),
+ INIT_PARAM(renameToIEWDelay, "Rename to "
+ "Issue/Execute/Writeback delay"),
+ INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
+ "to the IEW stage)"),
+ INIT_PARAM(issueWidth, "Issue width"),
+ INIT_PARAM(executeWidth, "Execute width"),
+ INIT_PARAM(executeIntWidth, "Integer execute width"),
+ INIT_PARAM(executeFloatWidth, "Floating point execute width"),
+
+ INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
+ "delay"),
+ INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
+ INIT_PARAM(commitWidth, "Commit width"),
+ INIT_PARAM(squashWidth, "Squash width"),
+
+ INIT_PARAM(localPredictorSize, "Size of the local predictor in entries. "
+ "Must be a power of 2."),
+ INIT_PARAM(localPredictorCtrBits, "Number of bits per counter for bpred"),
+ INIT_PARAM(BTBEntries, "Number of BTB entries"),
+ INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+
+
+ INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
+ INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
+ "registers"),
+ INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
+ INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
+
+ INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+
+ INIT_PARAM(defReg, "Defer registration")
+
+END_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
+
+CREATE_SIM_OBJECT(BaseFullCPU)
+{
+ AlphaFullCPU<AlphaSimpleImpl> *cpu;
+
+#ifdef FULL_SYSTEM
+ if (mult != 1)
+ panic("Processor clock multiplier must be 1?\n");
+
+ // Full-system only supports a single thread for the moment.
+ int actual_num_threads = 1;
+#else
+ // In non-full-system mode, we infer the number of threads from
+ // the workload if it's not explicitly specified.
+ int actual_num_threads =
+ numThreads.isValid() ? numThreads : workload.size();
+
+ if (workload.size() == 0) {
+ fatal("Must specify at least one workload!");
+ }
+
+ Process *actual_process;
+
+ if (process == NULL) {
+ actual_process = workload[0];
+ } else {
+ actual_process = process;
+ }
+
+#endif
+
+ AlphaSimpleParams params;
+
+ params.name = getInstanceName();
+ params.numberOfThreads = actual_num_threads;
+
+#ifdef FULL_SYSTEM
+ params._system = system;
+ params.itb = itb;
+ params.dtb = dtb;
+ params.freq = ticksPerSecond * mult;
+#else
+ params.workload = workload;
+ params.process = actual_process;
+ params.asid = asid;
+#endif // FULL_SYSTEM
+
+ params.mem = mem;
+
+ params.maxInstsAnyThread = max_insts_any_thread;
+ params.maxInstsAllThreads = max_insts_all_threads;
+ params.maxLoadsAnyThread = max_loads_any_thread;
+ params.maxLoadsAllThreads = max_loads_all_threads;
+
+ //
+ // Caches
+ //
+ params.icacheInterface = icache ? icache->getInterface() : NULL;
+ params.dcacheInterface = dcache ? dcache->getInterface() : NULL;
+
+ params.decodeToFetchDelay = decodeToFetchDelay;
+ params.renameToFetchDelay = renameToFetchDelay;
+ params.iewToFetchDelay = iewToFetchDelay;
+ params.commitToFetchDelay = commitToFetchDelay;
+ params.fetchWidth = fetchWidth;
+
+ params.renameToDecodeDelay = renameToDecodeDelay;
+ params.iewToDecodeDelay = iewToDecodeDelay;
+ params.commitToDecodeDelay = commitToDecodeDelay;
+ params.fetchToDecodeDelay = fetchToDecodeDelay;
+ params.decodeWidth = decodeWidth;
+
+ params.iewToRenameDelay = iewToRenameDelay;
+ params.commitToRenameDelay = commitToRenameDelay;
+ params.decodeToRenameDelay = decodeToRenameDelay;
+ params.renameWidth = renameWidth;
+
+ params.commitToIEWDelay = commitToIEWDelay;
+ params.renameToIEWDelay = renameToIEWDelay;
+ params.issueToExecuteDelay = issueToExecuteDelay;
+ params.issueWidth = issueWidth;
+ params.executeWidth = executeWidth;
+ params.executeIntWidth = executeIntWidth;
+ params.executeFloatWidth = executeFloatWidth;
+
+ params.iewToCommitDelay = iewToCommitDelay;
+ params.renameToROBDelay = renameToROBDelay;
+ params.commitWidth = commitWidth;
+ params.squashWidth = squashWidth;
+
+ params.localPredictorSize = localPredictorSize;
+ params.localPredictorCtrBits = localPredictorCtrBits;
+ params.BTBEntries = BTBEntries;
+ params.BTBTagSize = BTBTagSize;
+
+ params.numPhysIntRegs = numPhysIntRegs;
+ params.numPhysFloatRegs = numPhysFloatRegs;
+ params.numIQEntries = numIQEntries;
+ params.numROBEntries = numROBEntries;
+
+ params.instShiftAmt = 2;
+
+ params.defReg = defReg;
+
+ cpu = new AlphaFullCPU<AlphaSimpleImpl>(params);
+
+ return cpu;
+}
+
+REGISTER_SIM_OBJECT("AlphaFullCPU", BaseFullCPU)
+
diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh
new file mode 100644
index 000000000..8bfc0777e
--- /dev/null
+++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh
@@ -0,0 +1,690 @@
+
+#include "base/cprintf.hh"
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
+#include "mem/cache/cache.hh" // for dynamic cast
+#include "mem/mem_interface.hh"
+#include "sim/builder.hh"
+#include "sim/sim_events.hh"
+#include "sim/stats.hh"
+
+#include "cpu/beta_cpu/alpha_full_cpu.hh"
+#include "cpu/beta_cpu/alpha_params.hh"
+#include "cpu/beta_cpu/comm.hh"
+
+template <class Impl>
+AlphaFullCPU<Impl>::AlphaFullCPU(Params &params)
+ : FullBetaCPU<AlphaSimpleImpl>(params)
+{
+ DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n");
+
+ fetch.setCPU(this);
+ decode.setCPU(this);
+ rename.setCPU(this);
+ iew.setCPU(this);
+ commit.setCPU(this);
+
+ rob.setCPU(this);
+}
+
+#ifndef FULL_SYSTEM
+
+template <class Impl>
+void
+AlphaFullCPU<Impl>::syscall()
+{
+ DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n");
+
+ // Commit stage needs to run as well.
+ commit.tick();
+
+ squashStages();
+
+ // Temporarily increase this by one to account for the syscall
+ // instruction.
+ ++funcExeInst;
+
+ // Copy over all important state to xc once all the unrolling is done.
+ copyToXC();
+
+ process->syscall(xc);
+
+ // Copy over all important state back to CPU.
+ copyFromXC();
+
+ // Decrease funcExeInst by one as the normal commit will handle
+ // incrememnting it.
+ --funcExeInst;
+}
+
+// This is not a pretty function, and should only be used if it is necessary
+// to fake having everything squash all at once (ie for non-full system
+// syscalls). Maybe put this at the FullCPU level?
+template <class Impl>
+void
+AlphaFullCPU<Impl>::squashStages()
+{
+ InstSeqNum rob_head = rob.readHeadSeqNum();
+
+ // Now hack the time buffer to put this sequence number in the places
+ // where the stages might read it.
+ for (int i = 0; i < 5; ++i)
+ {
+ timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head;
+ }
+
+ fetch.squash(rob.readHeadNextPC());
+ fetchQueue.advance();
+
+ decode.squash();
+ decodeQueue.advance();
+
+ rename.squash();
+ renameQueue.advance();
+ renameQueue.advance();
+
+ // Be sure to advance the IEW queues so that the commit stage doesn't
+ // try to set an instruction as completed at the same time that it
+ // might be deleting it.
+ iew.squash();
+ iewQueue.advance();
+ iewQueue.advance();
+
+ rob.squash(rob_head);
+ commit.setSquashing();
+}
+
+#endif // FULL_SYSTEM
+
+template <class Impl>
+void
+AlphaFullCPU<Impl>::copyToXC()
+{
+ PhysRegIndex renamed_reg;
+
+ // First loop through the integer registers.
+ for (int i = 0; i < AlphaISA::NumIntRegs; ++i)
+ {
+ renamed_reg = renameMap.lookup(i);
+ xc->regs.intRegFile[i] = regFile.intRegFile[renamed_reg];
+ DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n",
+ renamed_reg, regFile.intRegFile[renamed_reg]);
+ }
+
+ // Then loop through the floating point registers.
+ for (int i = 0; i < AlphaISA::NumFloatRegs; ++i)
+ {
+ renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag);
+ xc->regs.floatRegFile.d[i] = regFile.floatRegFile[renamed_reg].d;
+ xc->regs.floatRegFile.q[i] = regFile.floatRegFile[renamed_reg].q;
+ }
+
+ xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr;
+ xc->regs.miscRegs.uniq = regFile.miscRegs.uniq;
+ xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag;
+ xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr;
+
+ xc->regs.pc = rob.readHeadPC();
+ xc->regs.npc = xc->regs.pc+4;
+
+ xc->func_exe_inst = funcExeInst;
+}
+
+// This function will probably mess things up unless the ROB is empty and
+// there are no instructions in the pipeline.
+template <class Impl>
+void
+AlphaFullCPU<Impl>::copyFromXC()
+{
+ PhysRegIndex renamed_reg;
+
+ // First loop through the integer registers.
+ for (int i = 0; i < AlphaISA::NumIntRegs; ++i)
+ {
+ renamed_reg = renameMap.lookup(i);
+
+ DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, "
+ "now has data %lli.\n",
+ renamed_reg, regFile.intRegFile[renamed_reg],
+ xc->regs.intRegFile[i]);
+
+ regFile.intRegFile[renamed_reg] = xc->regs.intRegFile[i];
+ }
+
+ // Then loop through the floating point registers.
+ for (int i = 0; i < AlphaISA::NumFloatRegs; ++i)
+ {
+ renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag);
+ regFile.floatRegFile[renamed_reg].d = xc->regs.floatRegFile.d[i];
+ regFile.floatRegFile[renamed_reg].q = xc->regs.floatRegFile.q[i] ;
+ }
+
+ // Then loop through the misc registers.
+ regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr;
+ regFile.miscRegs.uniq = xc->regs.miscRegs.uniq;
+ regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag;
+ regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr;
+
+ // Then finally set the PC and the next PC.
+// regFile.pc = xc->regs.pc;
+// regFile.npc = xc->regs.npc;
+
+ funcExeInst = xc->func_exe_inst;
+}
+
+#ifdef FULL_SYSTEM
+
+template <class Impl>
+uint64_t *
+AlphaFullCPU<Impl>::getIpr()
+{
+ return regs.ipr;
+}
+
+template <class Impl>
+uint64_t
+AlphaFullCPU<Impl>::readIpr(int idx, Fault &fault)
+{
+ uint64_t *ipr = getIpr();
+ uint64_t retval = 0; // return value, default 0
+
+ switch (idx) {
+ case AlphaISA::IPR_PALtemp0:
+ case AlphaISA::IPR_PALtemp1:
+ case AlphaISA::IPR_PALtemp2:
+ case AlphaISA::IPR_PALtemp3:
+ case AlphaISA::IPR_PALtemp4:
+ case AlphaISA::IPR_PALtemp5:
+ case AlphaISA::IPR_PALtemp6:
+ case AlphaISA::IPR_PALtemp7:
+ case AlphaISA::IPR_PALtemp8:
+ case AlphaISA::IPR_PALtemp9:
+ case AlphaISA::IPR_PALtemp10:
+ case AlphaISA::IPR_PALtemp11:
+ case AlphaISA::IPR_PALtemp12:
+ case AlphaISA::IPR_PALtemp13:
+ case AlphaISA::IPR_PALtemp14:
+ case AlphaISA::IPR_PALtemp15:
+ case AlphaISA::IPR_PALtemp16:
+ case AlphaISA::IPR_PALtemp17:
+ case AlphaISA::IPR_PALtemp18:
+ case AlphaISA::IPR_PALtemp19:
+ case AlphaISA::IPR_PALtemp20:
+ case AlphaISA::IPR_PALtemp21:
+ case AlphaISA::IPR_PALtemp22:
+ case AlphaISA::IPR_PALtemp23:
+ case AlphaISA::IPR_PAL_BASE:
+
+ case AlphaISA::IPR_IVPTBR:
+ case AlphaISA::IPR_DC_MODE:
+ case AlphaISA::IPR_MAF_MODE:
+ case AlphaISA::IPR_ISR:
+ case AlphaISA::IPR_EXC_ADDR:
+ case AlphaISA::IPR_IC_PERR_STAT:
+ case AlphaISA::IPR_DC_PERR_STAT:
+ case AlphaISA::IPR_MCSR:
+ case AlphaISA::IPR_ASTRR:
+ case AlphaISA::IPR_ASTER:
+ case AlphaISA::IPR_SIRR:
+ case AlphaISA::IPR_ICSR:
+ case AlphaISA::IPR_ICM:
+ case AlphaISA::IPR_DTB_CM:
+ case AlphaISA::IPR_IPLR:
+ case AlphaISA::IPR_INTID:
+ case AlphaISA::IPR_PMCTR:
+ // no side-effect
+ retval = ipr[idx];
+ break;
+
+ case AlphaISA::IPR_CC:
+ retval |= ipr[idx] & ULL(0xffffffff00000000);
+ retval |= curTick & ULL(0x00000000ffffffff);
+ break;
+
+ case AlphaISA::IPR_VA:
+ retval = ipr[idx];
+ break;
+
+ case AlphaISA::IPR_VA_FORM:
+ case AlphaISA::IPR_MM_STAT:
+ case AlphaISA::IPR_IFAULT_VA_FORM:
+ case AlphaISA::IPR_EXC_MASK:
+ case AlphaISA::IPR_EXC_SUM:
+ retval = ipr[idx];
+ break;
+
+ case AlphaISA::IPR_DTB_PTE:
+ {
+ AlphaISA::PTE &pte = dtb->index(!misspeculating());
+
+ retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
+ retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
+ retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
+ retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
+ retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
+ retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
+ retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
+ }
+ break;
+
+ // write only registers
+ case AlphaISA::IPR_HWINT_CLR:
+ case AlphaISA::IPR_SL_XMIT:
+ case AlphaISA::IPR_DC_FLUSH:
+ case AlphaISA::IPR_IC_FLUSH:
+ case AlphaISA::IPR_ALT_MODE:
+ case AlphaISA::IPR_DTB_IA:
+ case AlphaISA::IPR_DTB_IAP:
+ case AlphaISA::IPR_ITB_IA:
+ case AlphaISA::IPR_ITB_IAP:
+ fault = Unimplemented_Opcode_Fault;
+ break;
+
+ default:
+ // invalid IPR
+ fault = Unimplemented_Opcode_Fault;
+ break;
+ }
+
+ return retval;
+}
+
+template <class Impl>
+Fault
+AlphaFullCPU<Impl>::setIpr(int idx, uint64_t val)
+{
+ uint64_t *ipr = getIpr();
+ uint64_t old;
+
+ if (misspeculating())
+ return No_Fault;
+
+ switch (idx) {
+ case AlphaISA::IPR_PALtemp0:
+ case AlphaISA::IPR_PALtemp1:
+ case AlphaISA::IPR_PALtemp2:
+ case AlphaISA::IPR_PALtemp3:
+ case AlphaISA::IPR_PALtemp4:
+ case AlphaISA::IPR_PALtemp5:
+ case AlphaISA::IPR_PALtemp6:
+ case AlphaISA::IPR_PALtemp7:
+ case AlphaISA::IPR_PALtemp8:
+ case AlphaISA::IPR_PALtemp9:
+ case AlphaISA::IPR_PALtemp10:
+ case AlphaISA::IPR_PALtemp11:
+ case AlphaISA::IPR_PALtemp12:
+ case AlphaISA::IPR_PALtemp13:
+ case AlphaISA::IPR_PALtemp14:
+ case AlphaISA::IPR_PALtemp15:
+ case AlphaISA::IPR_PALtemp16:
+ case AlphaISA::IPR_PALtemp17:
+ case AlphaISA::IPR_PALtemp18:
+ case AlphaISA::IPR_PALtemp19:
+ case AlphaISA::IPR_PALtemp20:
+ case AlphaISA::IPR_PALtemp21:
+ case AlphaISA::IPR_PALtemp22:
+ case AlphaISA::IPR_PAL_BASE:
+ case AlphaISA::IPR_IC_PERR_STAT:
+ case AlphaISA::IPR_DC_PERR_STAT:
+ case AlphaISA::IPR_PMCTR:
+ // write entire quad w/ no side-effect
+ ipr[idx] = val;
+ break;
+
+ case AlphaISA::IPR_CC_CTL:
+ // This IPR resets the cycle counter. We assume this only
+ // happens once... let's verify that.
+ assert(ipr[idx] == 0);
+ ipr[idx] = 1;
+ break;
+
+ case AlphaISA::IPR_CC:
+ // This IPR only writes the upper 64 bits. It's ok to write
+ // all 64 here since we mask out the lower 32 in rpcc (see
+ // isa_desc).
+ ipr[idx] = val;
+ break;
+
+ case AlphaISA::IPR_PALtemp23:
+ // write entire quad w/ no side-effect
+ old = ipr[idx];
+ ipr[idx] = val;
+ kernelStats.context(old, val);
+ break;
+
+ case AlphaISA::IPR_DTB_PTE:
+ // write entire quad w/ no side-effect, tag is forthcoming
+ ipr[idx] = val;
+ break;
+
+ case AlphaISA::IPR_EXC_ADDR:
+ // second least significant bit in PC is always zero
+ ipr[idx] = val & ~2;
+ break;
+
+ case AlphaISA::IPR_ASTRR:
+ case AlphaISA::IPR_ASTER:
+ // only write least significant four bits - privilege mask
+ ipr[idx] = val & 0xf;
+ break;
+
+ case AlphaISA::IPR_IPLR:
+#ifdef DEBUG
+ if (break_ipl != -1 && break_ipl == (val & 0x1f))
+ debug_break();
+#endif
+
+ // only write least significant five bits - interrupt level
+ ipr[idx] = val & 0x1f;
+ kernelStats.swpipl(ipr[idx]);
+ break;
+
+ case AlphaISA::IPR_DTB_CM:
+ kernelStats.mode((val & 0x18) != 0);
+
+ case AlphaISA::IPR_ICM:
+ // only write two mode bits - processor mode
+ ipr[idx] = val & 0x18;
+ break;
+
+ case AlphaISA::IPR_ALT_MODE:
+ // only write two mode bits - processor mode
+ ipr[idx] = val & 0x18;
+ break;
+
+ case AlphaISA::IPR_MCSR:
+ // more here after optimization...
+ ipr[idx] = val;
+ break;
+
+ case AlphaISA::IPR_SIRR:
+ // only write software interrupt mask
+ ipr[idx] = val & 0x7fff0;
+ break;
+
+ case AlphaISA::IPR_ICSR:
+ ipr[idx] = val & ULL(0xffffff0300);
+ break;
+
+ case AlphaISA::IPR_IVPTBR:
+ case AlphaISA::IPR_MVPTBR:
+ ipr[idx] = val & ULL(0xffffffffc0000000);
+ break;
+
+ case AlphaISA::IPR_DC_TEST_CTL:
+ ipr[idx] = val & 0x1ffb;
+ break;
+
+ case AlphaISA::IPR_DC_MODE:
+ case AlphaISA::IPR_MAF_MODE:
+ ipr[idx] = val & 0x3f;
+ break;
+
+ case AlphaISA::IPR_ITB_ASN:
+ ipr[idx] = val & 0x7f0;
+ break;
+
+ case AlphaISA::IPR_DTB_ASN:
+ ipr[idx] = val & ULL(0xfe00000000000000);
+ break;
+
+ case AlphaISA::IPR_EXC_SUM:
+ case AlphaISA::IPR_EXC_MASK:
+ // any write to this register clears it
+ ipr[idx] = 0;
+ break;
+
+ case AlphaISA::IPR_INTID:
+ case AlphaISA::IPR_SL_RCV:
+ case AlphaISA::IPR_MM_STAT:
+ case AlphaISA::IPR_ITB_PTE_TEMP:
+ case AlphaISA::IPR_DTB_PTE_TEMP:
+ // read-only registers
+ return Unimplemented_Opcode_Fault;
+
+ case AlphaISA::IPR_HWINT_CLR:
+ case AlphaISA::IPR_SL_XMIT:
+ case AlphaISA::IPR_DC_FLUSH:
+ case AlphaISA::IPR_IC_FLUSH:
+ // the following are write only
+ ipr[idx] = val;
+ break;
+
+ case AlphaISA::IPR_DTB_IA:
+ // really a control write
+ ipr[idx] = 0;
+
+ dtb->flushAll();
+ break;
+
+ case AlphaISA::IPR_DTB_IAP:
+ // really a control write
+ ipr[idx] = 0;
+
+ dtb->flushProcesses();
+ break;
+
+ case AlphaISA::IPR_DTB_IS:
+ // really a control write
+ ipr[idx] = val;
+
+ dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]));
+ break;
+
+ case AlphaISA::IPR_DTB_TAG: {
+ struct AlphaISA::PTE pte;
+
+ // FIXME: granularity hints NYI...
+ if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0)
+ panic("PTE GH field != 0");
+
+ // write entire quad
+ ipr[idx] = val;
+
+ // construct PTE for new entry
+ pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]);
+
+ // insert new TAG/PTE value into data TLB
+ dtb->insert(val, pte);
+ }
+ break;
+
+ case AlphaISA::IPR_ITB_PTE: {
+ struct AlphaISA::PTE pte;
+
+ // FIXME: granularity hints NYI...
+ if (ITB_PTE_GH(val) != 0)
+ panic("PTE GH field != 0");
+
+ // write entire quad
+ ipr[idx] = val;
+
+ // construct PTE for new entry
+ pte.ppn = ITB_PTE_PPN(val);
+ pte.xre = ITB_PTE_XRE(val);
+ pte.xwe = 0;
+ pte.fonr = ITB_PTE_FONR(val);
+ pte.fonw = ITB_PTE_FONW(val);
+ pte.asma = ITB_PTE_ASMA(val);
+ pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]);
+
+ // insert new TAG/PTE value into data TLB
+ itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte);
+ }
+ break;
+
+ case AlphaISA::IPR_ITB_IA:
+ // really a control write
+ ipr[idx] = 0;
+
+ itb->flushAll();
+ break;
+
+ case AlphaISA::IPR_ITB_IAP:
+ // really a control write
+ ipr[idx] = 0;
+
+ itb->flushProcesses();
+ break;
+
+ case AlphaISA::IPR_ITB_IS:
+ // really a control write
+ ipr[idx] = val;
+
+ itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]));
+ break;
+
+ default:
+ // invalid IPR
+ return Unimplemented_Opcode_Fault;
+ }
+
+ // no error...
+ return No_Fault;
+
+}
+
+template <class Impl>
+int
+AlphaFullCPU<Impl>::readIntrFlag()
+{
+ return regs.intrflag;
+}
+
+template <class Impl>
+void
+AlphaFullCPU<Impl>::setIntrFlag(int val)
+{
+ regs.intrflag = val;
+}
+
+// Maybe have this send back from IEW stage to squash and update PC.
+template <class Impl>
+Fault
+AlphaFullCPU<Impl>::hwrei()
+{
+ uint64_t *ipr = getIpr();
+
+ if (!PC_PAL(regs.pc))
+ return Unimplemented_Opcode_Fault;
+
+ setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]);
+
+ if (!misspeculating()) {
+ kernelStats.hwrei();
+
+ if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0)
+ AlphaISA::swap_palshadow(&regs, false);
+
+ AlphaISA::check_interrupts = true;
+ }
+
+ // FIXME: XXX check for interrupts? XXX
+ return No_Fault;
+}
+
+template <class Impl>
+bool
+AlphaFullCPU<Impl>::inPalMode()
+{
+ return PC_PAL(readPC());
+}
+
+template <class Impl>
+bool
+AlphaFullCPU<Impl>::simPalCheck(int palFunc)
+{
+ kernelStats.callpal(palFunc);
+
+ switch (palFunc) {
+ case PAL::halt:
+ halt();
+ if (--System::numSystemsRunning == 0)
+ new SimExitEvent("all cpus halted");
+ break;
+
+ case PAL::bpt:
+ case PAL::bugchk:
+ if (system->breakpoint())
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+// Probably shouldn't be able to switch to the trap handler as quickly as
+// this. Also needs to get the exception restart address from the commit
+// stage.
+template <class Impl>
+void
+AlphaFullCPU<Impl>::trap(Fault fault)
+{
+ uint64_t PC = commit.readPC();
+
+ DPRINTF(Fault, "Fault %s\n", FaultName(fault));
+ Stats::recordEvent(csprintf("Fault %s", FaultName(fault)));
+
+ assert(!misspeculating());
+ kernelStats.fault(fault);
+
+ if (fault == Arithmetic_Fault)
+ panic("Arithmetic traps are unimplemented!");
+
+ AlphaISA::InternalProcReg *ipr = getIpr();
+
+ // exception restart address - Get the commit PC
+ if (fault != Interrupt_Fault || !PC_PAL(PC))
+ ipr[AlphaISA::IPR_EXC_ADDR] = PC;
+
+ if (fault == Pal_Fault || fault == Arithmetic_Fault /* ||
+ fault == Interrupt_Fault && !PC_PAL(regs.pc) */) {
+ // traps... skip faulting instruction
+ ipr[AlphaISA::IPR_EXC_ADDR] += 4;
+ }
+
+ if (!PC_PAL(PC))
+ AlphaISA::swap_palshadow(&regs, true);
+
+ setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] );
+ setNextPC(PC + sizeof(MachInst));
+}
+
+template <class Impl>
+void
+AlphaFullCPU<Impl>::processInterrupts()
+{
+ // Check for interrupts here. For now can copy the code that exists
+ // within isa_fullsys_traits.hh.
+}
+
+// swap_palshadow swaps in the values of the shadow registers and
+// swaps them with the values of the physical registers that map to the
+// same logical index.
+template <class Impl>
+void
+AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow)
+{
+ if (palShadowEnabled == use_shadow)
+ panic("swap_palshadow: wrong PAL shadow state");
+
+ palShadowEnabled = use_shadow;
+
+ // Will have to lookup in rename map to get physical registers, then
+ // swap.
+ for (int i = 0; i < AlphaISA::NumIntRegs; i++) {
+ if (reg_redir[i]) {
+ AlphaISA::IntReg temp = regs->intRegFile[i];
+ regs->intRegFile[i] = regs->palregs[i];
+ regs->palregs[i] = temp;
+ }
+ }
+}
+
+#endif // FULL_SYSTEM
diff --git a/cpu/beta_cpu/alpha_impl.hh b/cpu/beta_cpu/alpha_impl.hh
index a80b116a8..fc86dacd7 100644
--- a/cpu/beta_cpu/alpha_impl.hh
+++ b/cpu/beta_cpu/alpha_impl.hh
@@ -3,23 +3,14 @@
#include "arch/alpha/isa_traits.hh"
-#include "cpu/beta_cpu/comm.hh"
#include "cpu/beta_cpu/cpu_policy.hh"
#include "cpu/beta_cpu/alpha_params.hh"
-#include "cpu/beta_cpu/commit.hh"
-#include "cpu/beta_cpu/decode.hh"
-#include "cpu/beta_cpu/fetch.hh"
-#include "cpu/beta_cpu/free_list.hh"
-#include "cpu/beta_cpu/iew.hh"
-
-#include "cpu/beta_cpu/inst_queue.hh"
-#include "cpu/beta_cpu/regfile.hh"
-#include "cpu/beta_cpu/rename.hh"
-#include "cpu/beta_cpu/rename_map.hh"
-#include "cpu/beta_cpu/rob.hh"
-
+// Forward declarations.
+template <class Impl>
class AlphaDynInst;
+
+template <class Impl>
class AlphaFullCPU;
/** Implementation specific struct that defines several key things to the
@@ -42,33 +33,22 @@ struct AlphaSimpleImpl
typedef SimpleCPUPolicy<AlphaSimpleImpl> CPUPol;
/** The DynInst to be used. */
- typedef AlphaDynInst DynInst;
+ typedef AlphaDynInst<AlphaSimpleImpl> DynInst;
+
+ /** The refcounted DynInst pointer to be used. In most cases this is
+ * what should be used, and not DynInst *.
+ */
+ typedef RefCountingPtr<DynInst> DynInstPtr;
/** The FullCPU to be used. */
- typedef AlphaFullCPU FullCPU;
+ typedef AlphaFullCPU<AlphaSimpleImpl> FullCPU;
/** The Params to be passed to each stage. */
typedef AlphaSimpleParams Params;
- /** The struct for communication between fetch and decode. */
- typedef SimpleFetchSimpleDecode<AlphaSimpleImpl> FetchStruct;
-
- /** The struct for communication between decode and rename. */
- typedef SimpleDecodeSimpleRename<AlphaSimpleImpl> DecodeStruct;
-
- /** The struct for communication between rename and IEW. */
- typedef SimpleRenameSimpleIEW<AlphaSimpleImpl> RenameStruct;
-
- /** The struct for communication between IEW and commit. */
- typedef SimpleIEWSimpleCommit<AlphaSimpleImpl> IEWStruct;
-
- /** The struct for communication within the IEW stage. */
- typedef IssueStruct<AlphaSimpleImpl> IssueStruct;
-
- /** The struct for all backwards communication. */
- typedef TimeBufStruct TimeStruct;
+ enum {
+ MaxWidth = 8
+ };
};
-
-
#endif // __ALPHA_IMPL_HH__
diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh
index b217ef8e3..92dfd35f5 100644
--- a/cpu/beta_cpu/alpha_params.hh
+++ b/cpu/beta_cpu/alpha_params.hh
@@ -1,6 +1,8 @@
#ifndef __ALPHA_SIMPLE_PARAMS_HH__
#define __ALPHA_SIMPLE_PARAMS_HH__
+#include "cpu/beta_cpu/full_cpu.hh"
+
//Forward declarations
class System;
class AlphaITB;
@@ -15,16 +17,11 @@ class MemInterface;
* defined that it can pass to all of the individual stages.
*/
-class AlphaSimpleParams
+class AlphaSimpleParams : public BaseFullCPU::Params
{
public:
- std::string name;
- int numberOfThreads;
-
#ifdef FULL_SYSTEM
- System *_system;
AlphaITB *itb; AlphaDTB *dtb;
- Tick freq;
#else
std::vector<Process *> workload;
Process *process;
@@ -33,34 +30,41 @@ class AlphaSimpleParams
FunctionalMemory *mem;
- Counter maxInstsAnyThread;
- Counter maxInstsAllThreads;
- Counter maxLoadsAnyThread;
- Counter maxLoadsAllThreads;
-
//
// Caches
//
MemInterface *icacheInterface;
MemInterface *dcacheInterface;
+ //
+ // Fetch
+ //
unsigned decodeToFetchDelay;
unsigned renameToFetchDelay;
unsigned iewToFetchDelay;
unsigned commitToFetchDelay;
unsigned fetchWidth;
+ //
+ // Decode
+ //
unsigned renameToDecodeDelay;
unsigned iewToDecodeDelay;
unsigned commitToDecodeDelay;
unsigned fetchToDecodeDelay;
unsigned decodeWidth;
+ //
+ // Rename
+ //
unsigned iewToRenameDelay;
unsigned commitToRenameDelay;
unsigned decodeToRenameDelay;
unsigned renameWidth;
+ //
+ // IEW
+ //
unsigned commitToIEWDelay;
unsigned renameToIEWDelay;
unsigned issueToExecuteDelay;
@@ -69,16 +73,39 @@ class AlphaSimpleParams
unsigned executeIntWidth;
unsigned executeFloatWidth;
+ //
+ // Commit
+ //
unsigned iewToCommitDelay;
unsigned renameToROBDelay;
unsigned commitWidth;
unsigned squashWidth;
+ //
+ // Branch predictor (BP & BTB)
+ //
+ unsigned localPredictorSize;
+ unsigned localPredictorCtrBits;
+ unsigned BTBEntries;
+ unsigned BTBTagSize;
+
+ //
+ // Load store queue
+ //
+ unsigned LQEntries;
+ unsigned SQEntries;
+
+ //
+ // Miscellaneous
+ //
unsigned numPhysIntRegs;
unsigned numPhysFloatRegs;
unsigned numIQEntries;
unsigned numROBEntries;
+ // Probably can get this from somewhere.
+ unsigned instShiftAmt;
+
bool defReg;
};
diff --git a/cpu/beta_cpu/bpred_unit.cc b/cpu/beta_cpu/bpred_unit.cc
new file mode 100644
index 000000000..6de2def44
--- /dev/null
+++ b/cpu/beta_cpu/bpred_unit.cc
@@ -0,0 +1,5 @@
+
+#include "cpu/beta_cpu/bpred_unit_impl.hh"
+#include "cpu/beta_cpu/alpha_impl.hh"
+
+template DefaultBPredUnit<AlphaSimpleImpl>;
diff --git a/cpu/beta_cpu/bpred_unit.hh b/cpu/beta_cpu/bpred_unit.hh
new file mode 100644
index 000000000..71191f5b7
--- /dev/null
+++ b/cpu/beta_cpu/bpred_unit.hh
@@ -0,0 +1,51 @@
+
+#ifndef __BPRED_UNIT_HH__
+#define __BPRED_UNIT_HH__
+
+// For Addr type.
+#include "arch/alpha/isa_traits.hh"
+
+#include "cpu/beta_cpu/2bit_local_pred.hh"
+#include "cpu/beta_cpu/btb.hh"
+
+/**
+ * Basically a wrapper class to hold both the branch predictor
+ * and the BTB. Right now I'm unsure of the implementation; it would
+ * be nicer to have something closer to the CPUPolicy or the Impl where
+ * this is just typedefs, but it forces the upper level stages to be
+ * aware of the constructors of the BP and the BTB. The nicer thing
+ * to do is have this templated on the Impl, accept the usual Params
+ * object, and be able to call the constructors on the BP and BTB.
+ */
+template<class Impl>
+class DefaultBPredUnit
+{
+ public:
+ typedef typename Impl::Params Params;
+
+ DefaultBPredUnit(Params &params);
+
+ bool BPLookup(Addr &inst_PC)
+ { return BP.lookup(inst_PC); }
+
+ bool BTBValid(Addr &inst_PC)
+ { return BTB.valid(inst_PC); }
+
+ Addr BTBLookup(Addr &inst_PC)
+ { return BTB.lookup(inst_PC); }
+
+ void BPUpdate(Addr &inst_PC, bool taken)
+ { BP.update(inst_PC, taken); }
+
+ void BTBUpdate(Addr &inst_PC, Addr &target_PC)
+ { BTB.update(inst_PC, target_PC); }
+
+ private:
+
+ DefaultBP BP;
+
+ DefaultBTB BTB;
+
+};
+
+#endif // __BPRED_UNIT_HH__
diff --git a/cpu/beta_cpu/bpred_unit_impl.hh b/cpu/beta_cpu/bpred_unit_impl.hh
new file mode 100644
index 000000000..47415ce9b
--- /dev/null
+++ b/cpu/beta_cpu/bpred_unit_impl.hh
@@ -0,0 +1,13 @@
+
+#include "cpu/beta_cpu/bpred_unit.hh"
+
+template<class Impl>
+DefaultBPredUnit<Impl>::DefaultBPredUnit(Params &params)
+ : BP(params.localPredictorSize,
+ params.localPredictorCtrBits,
+ params.instShiftAmt),
+ BTB(params.BTBEntries,
+ params.BTBTagSize,
+ params.instShiftAmt)
+{
+}
diff --git a/cpu/beta_cpu/btb.cc b/cpu/beta_cpu/btb.cc
new file mode 100644
index 000000000..b49f30482
--- /dev/null
+++ b/cpu/beta_cpu/btb.cc
@@ -0,0 +1,85 @@
+#include <math.h>
+
+#include "cpu/beta_cpu/btb.hh"
+#include "base/trace.hh"
+
+DefaultBTB::DefaultBTB(unsigned _numEntries,
+ unsigned _tagBits,
+ unsigned _instShiftAmt)
+ : numEntries(_numEntries),
+ tagBits(_tagBits),
+ instShiftAmt(_instShiftAmt)
+{
+ // @todo Check to make sure num_entries is valid (a power of 2)
+
+ DPRINTF(Fetch, "BTB: Creating BTB object.\n");
+
+ btb = new BTBEntry[numEntries];
+
+ for (int i = 0; i < numEntries; ++i)
+ {
+ btb[i].valid = false;
+ }
+
+ idxMask = numEntries - 1;
+
+ tagMask = (1 << tagBits) - 1;
+
+ tagShiftAmt = instShiftAmt + (int)log2(numEntries);
+}
+
+inline
+unsigned
+DefaultBTB::getIndex(const Addr &inst_PC)
+{
+ // Need to shift PC over by the word offset.
+ return (inst_PC >> instShiftAmt) & idxMask;
+}
+
+inline
+Addr
+DefaultBTB::getTag(const Addr &inst_PC)
+{
+ return (inst_PC >> tagShiftAmt) & tagMask;
+}
+
+bool
+DefaultBTB::valid(const Addr &inst_PC)
+{
+ unsigned btb_idx = getIndex(inst_PC);
+
+ Addr inst_tag = getTag(inst_PC);
+
+ if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// @todo Create some sort of return struct that has both whether or not the
+// address is valid, and also the address. For now will just use addr = 0 to
+// represent invalid entry.
+Addr
+DefaultBTB::lookup(const Addr &inst_PC)
+{
+ unsigned btb_idx = getIndex(inst_PC);
+
+ Addr inst_tag = getTag(inst_PC);
+
+ if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) {
+ return btb[btb_idx].target;
+ } else {
+ return 0;
+ }
+}
+
+void
+DefaultBTB::update(const Addr &inst_PC, const Addr &target)
+{
+ unsigned btb_idx = getIndex(inst_PC);
+
+ btb[btb_idx].valid = true;
+ btb[btb_idx].target = target;
+ btb[btb_idx].tag = getTag(inst_PC);
+}
diff --git a/cpu/beta_cpu/btb.hh b/cpu/beta_cpu/btb.hh
new file mode 100644
index 000000000..81069eabe
--- /dev/null
+++ b/cpu/beta_cpu/btb.hh
@@ -0,0 +1,52 @@
+#ifndef __BTB_HH__
+#define __BTB_HH__
+
+// For Addr type.
+#include "arch/alpha/isa_traits.hh"
+
+class DefaultBTB
+{
+ private:
+ struct BTBEntry
+ {
+ BTBEntry()
+ : tag(0), target(0), valid(false)
+ {
+ }
+
+ Addr tag;
+ Addr target;
+ bool valid;
+ };
+
+ public:
+ DefaultBTB(unsigned numEntries, unsigned tagBits,
+ unsigned instShiftAmt);
+
+ Addr lookup(const Addr &inst_PC);
+
+ bool valid(const Addr &inst_PC);
+
+ void update(const Addr &inst_PC, const Addr &target_PC);
+
+ private:
+ inline unsigned getIndex(const Addr &inst_PC);
+
+ inline Addr getTag(const Addr &inst_PC);
+
+ BTBEntry *btb;
+
+ unsigned numEntries;
+
+ unsigned idxMask;
+
+ unsigned tagBits;
+
+ unsigned tagMask;
+
+ unsigned instShiftAmt;
+
+ unsigned tagShiftAmt;
+};
+
+#endif // __BTB_HH__
diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh
index 21a530ecf..849a6c797 100644
--- a/cpu/beta_cpu/comm.hh
+++ b/cpu/beta_cpu/comm.hh
@@ -2,6 +2,7 @@
#define __COMM_HH__
#include <stdint.h>
+#include <vector>
#include "arch/alpha/isa_traits.hh"
#include "cpu/inst_seq.hh"
@@ -10,34 +11,49 @@ using namespace std;
// Find better place to put this typedef.
typedef short int PhysRegIndex;
-// Might want to put constructors/destructors here.
template<class Impl>
struct SimpleFetchSimpleDecode {
- // Consider having a field of how many ready instructions.
- typename Impl::DynInst *insts[1];
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ int size;
+
+ DynInstPtr insts[Impl::MaxWidth + 1];
};
template<class Impl>
struct SimpleDecodeSimpleRename {
- // Consider having a field of how many ready instructions.
- typename Impl::DynInst *insts[1];
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ int size;
+
+ DynInstPtr insts[Impl::MaxWidth + 1];
};
template<class Impl>
struct SimpleRenameSimpleIEW {
- // Consider having a field of how many ready instructions.
- typename Impl::DynInst *insts[1];
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ int size;
+
+ DynInstPtr insts[Impl::MaxWidth + 1];
};
template<class Impl>
struct SimpleIEWSimpleCommit {
- // Consider having a field of how many ready instructions.
- typename Impl::DynInst *insts[1];
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ int size;
+
+ DynInstPtr insts[Impl::MaxWidth + 1];
};
template<class Impl>
struct IssueStruct {
- typename Impl::DynInst *insts[1];
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ int size;
+
+ DynInstPtr insts[Impl::MaxWidth + 1];
};
struct TimeBufStruct {
@@ -47,11 +63,9 @@ struct TimeBufStruct {
bool predIncorrect;
uint64_t branchAddr;
- //Question, is it worthwhile to have this Addr passed along
- //by each stage, or just have Fetch look it up in the proper
- //amount of cycles in the time buffer?
- //Both might actually be needed because decode can send a different
- //nextPC if the bpred was wrong.
+ bool branchMispredict;
+ bool branchTaken;
+ uint64_t mispredPC;
uint64_t nextPC;
};
@@ -72,14 +86,14 @@ struct TimeBufStruct {
struct iewComm {
bool squash;
bool stall;
- bool predIncorrect;
// Also eventually include skid buffer space.
unsigned freeIQEntries;
+ bool branchMispredict;
+ bool branchTaken;
+ uint64_t mispredPC;
uint64_t nextPC;
- // For now hardcode the type.
- // Change this to sequence number eventually.
InstSeqNum squashedSeqNum;
};
@@ -90,18 +104,31 @@ struct TimeBufStruct {
bool stall;
unsigned freeROBEntries;
+ bool branchMispredict;
+ bool branchTaken;
+ uint64_t mispredPC;
uint64_t nextPC;
// Think of better names here.
// Will need to be a variety of sizes...
// Maybe make it a vector, that way only need one object.
- vector<PhysRegIndex> freeRegs;
+ std::vector<PhysRegIndex> freeRegs;
bool robSquashing;
+
// Represents the instruction that has either been retired or
// squashed. Similar to having a single bus that broadcasts the
// retired or squashed sequence number.
InstSeqNum doneSeqNum;
+
+ // Extra bits of information so that the LDSTQ only updates when it
+ // needs to.
+ bool commitIsStore;
+ bool commitIsLoad;
+
+ // Communication specifically to the IQ to tell the IQ that it can
+ // schedule a non-speculative instruction.
+ InstSeqNum nonSpecSeqNum;
};
commitComm commitInfo;
diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh
index 0e5a96e2a..981d9e78f 100644
--- a/cpu/beta_cpu/commit.hh
+++ b/cpu/beta_cpu/commit.hh
@@ -1,6 +1,4 @@
-// Todo: Squash properly. Have commit be able to send a squash signal
-// to previous stages; will be needed when trap() is implemented.
-// Maybe have a special method for handling interrupts/traps.
+// Todo: Maybe have a special method for handling interrupts/traps.
//
// Traps: Have IEW send a signal to commit saying that there's a trap to
// be handled. Have commit send the PC back to the fetch stage, along
@@ -17,12 +15,11 @@
#ifndef __SIMPLE_COMMIT_HH__
#define __SIMPLE_COMMIT_HH__
-//Includes: ROB, time buffer, structs, memory interface
-#include "arch/alpha/isa_traits.hh"
+//#include "arch/alpha/isa_traits.hh"
#include "base/timebuf.hh"
-#include "cpu/beta_cpu/comm.hh"
-#include "cpu/beta_cpu/rename_map.hh"
-#include "cpu/beta_cpu/rob.hh"
+//#include "cpu/beta_cpu/comm.hh"
+//#include "cpu/beta_cpu/rename_map.hh"
+//#include "cpu/beta_cpu/rob.hh"
#include "mem/memory_interface.hh"
template<class Impl>
@@ -32,14 +29,15 @@ class SimpleCommit
// Typedefs from the Impl.
typedef typename Impl::ISA ISA;
typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::Params Params;
+ typedef typename Impl::CPUPol CPUPol;
- typedef typename Impl::CPUPol::ROB ROB;
+ typedef typename CPUPol::ROB ROB;
- typedef typename Impl::TimeStruct TimeStruct;
- typedef typename Impl::IEWStruct IEWStruct;
- typedef typename Impl::RenameStruct RenameStruct;
+ typedef typename CPUPol::TimeStruct TimeStruct;
+ typedef typename CPUPol::IEWStruct IEWStruct;
+ typedef typename CPUPol::RenameStruct RenameStruct;
public:
// I don't believe commit can block, so it will only have two
@@ -83,7 +81,7 @@ class SimpleCommit
void commitInsts();
- bool commitHead(DynInst *head_inst, unsigned inst_num);
+ bool commitHead(DynInstPtr &head_inst, unsigned inst_num);
void getInsts();
@@ -117,7 +115,7 @@ class SimpleCommit
FullCPU *cpu;
/** Pointer to the rename map. DO NOT USE if possible. */
- typename Impl::CPUPol::RenameMap *renameMap;
+// typename Impl::CPUPol::RenameMap *renameMap;
//Store buffer interface? Will need to move committed stores to the
//store buffer
diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh
index bc8db0ce0..45b8bc7de 100644
--- a/cpu/beta_cpu/commit_impl.hh
+++ b/cpu/beta_cpu/commit_impl.hh
@@ -9,7 +9,7 @@
#include "cpu/beta_cpu/commit.hh"
#include "cpu/exetrace.hh"
-template<class Impl>
+template <class Impl>
SimpleCommit<Impl>::SimpleCommit(Params &params)
: dcacheInterface(params.dcacheInterface),
iewToCommitDelay(params.iewToCommitDelay),
@@ -21,7 +21,7 @@ SimpleCommit<Impl>::SimpleCommit(Params &params)
_status = Idle;
}
-template<class Impl>
+template <class Impl>
void
SimpleCommit<Impl>::setCPU(FullCPU *cpu_ptr)
{
@@ -29,7 +29,7 @@ SimpleCommit<Impl>::setCPU(FullCPU *cpu_ptr)
cpu = cpu_ptr;
}
-template<class Impl>
+template <class Impl>
void
SimpleCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
@@ -43,7 +43,7 @@ SimpleCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay);
}
-template<class Impl>
+template <class Impl>
void
SimpleCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
{
@@ -54,7 +54,7 @@ SimpleCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
fromRename = renameQueue->getWire(-renameToROBDelay);
}
-template<class Impl>
+template <class Impl>
void
SimpleCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
{
@@ -65,7 +65,7 @@ SimpleCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
fromIEW = iewQueue->getWire(-iewToCommitDelay);
}
-template<class Impl>
+template <class Impl>
void
SimpleCommit<Impl>::setROB(ROB *rob_ptr)
{
@@ -73,7 +73,7 @@ SimpleCommit<Impl>::setROB(ROB *rob_ptr)
rob = rob_ptr;
}
-template<class Impl>
+template <class Impl>
void
SimpleCommit<Impl>::tick()
{
@@ -106,7 +106,7 @@ SimpleCommit<Impl>::tick()
toIEW->commitInfo.freeROBEntries = rob->numFreeEntries();
}
-template<class Impl>
+template <class Impl>
void
SimpleCommit<Impl>::commit()
{
@@ -154,17 +154,30 @@ SimpleCommit<Impl>::commit()
// Send back the sequence number of the squashed instruction.
toIEW->commitInfo.doneSeqNum = squashed_inst;
+
// Send back the squash signal to tell stages that they should squash.
toIEW->commitInfo.squash = true;
+
// Send back the rob squashing signal so other stages know that the
// ROB is in the process of squashing.
toIEW->commitInfo.robSquashing = true;
+
+ toIEW->commitInfo.branchMispredict =
+ robInfoFromIEW->iewInfo.branchMispredict;
+
+ toIEW->commitInfo.branchTaken =
+ robInfoFromIEW->iewInfo.branchTaken;
+
toIEW->commitInfo.nextPC = robInfoFromIEW->iewInfo.nextPC;
+
+ toIEW->commitInfo.mispredPC = robInfoFromIEW->iewInfo.mispredPC;
}
if (_status != ROBSquashing) {
+ // If we're not currently squashing, then get instructions.
getInsts();
+ // Try to commit any instructions.
commitInsts();
}
@@ -183,7 +196,7 @@ SimpleCommit<Impl>::commit()
// Loop that goes through as many instructions in the ROB as possible and
// tries to commit them. The actual work for committing is done by the
// commitHead() function.
-template<class Impl>
+template <class Impl>
void
SimpleCommit<Impl>::commitInsts()
{
@@ -195,7 +208,7 @@ SimpleCommit<Impl>::commitInsts()
// Can't commit and squash things at the same time...
////////////////////////////////////
- DynInst *head_inst = rob->readHeadInst();
+ DynInstPtr head_inst = rob->readHeadInst();
unsigned num_committed = 0;
@@ -224,12 +237,12 @@ SimpleCommit<Impl>::commitInsts()
// inst in the ROB without affecting any other stages.
rob->retireHead();
- ++num_committed;
} else {
// Increment the total number of non-speculative instructions
// executed.
// Hack for now: it really shouldn't happen until after the
- // commit is deemed to be successful.
+ // commit is deemed to be successful, but this count is needed
+ // for syscalls.
cpu->funcExeInst++;
// Try to commit the head instruction.
@@ -256,9 +269,9 @@ SimpleCommit<Impl>::commitInsts()
}
}
-template<class Impl>
+template <class Impl>
bool
-SimpleCommit<Impl>::commitHead(DynInst *head_inst, unsigned inst_num)
+SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
{
// Make sure instruction is valid
assert(head_inst);
@@ -271,21 +284,26 @@ SimpleCommit<Impl>::commitHead(DynInst *head_inst, unsigned inst_num)
// Also check if it's nonspeculative. Or a nop. Then it will be
// executed only when it reaches the head of the ROB. Actually
// executing a nop is a bit overkill...
- if (head_inst->isStore() ||
- head_inst->isLoad() ||
- head_inst->isNonSpeculative() ||
- head_inst->isNop()) {
- DPRINTF(Commit, "Commit: Executing a memory reference or "
- "nonspeculative instruction at commit, inst PC %#x\n",
- head_inst->PC);
- fault = head_inst->execute();
-
- // Tell CPU to tell IEW to tell IQ (nasty chain of calls) that
- // this instruction has completed. Could predicate this on
- // whether or not the instruction has a destination.
- // Slightly unrealistic, but will not really be a factor once
- // a real load/store queue is added.
- cpu->wakeDependents(head_inst);
+ if (!head_inst->isExecuted()) {
+ // Keep this number correct. We have not yet actually executed
+ // and committed this instruction.
+ cpu->funcExeInst--;
+ if (head_inst->isStore() || head_inst->isNonSpeculative()) {
+ DPRINTF(Commit, "Commit: Encountered a store or non-speculative "
+ "instruction at the head of the ROB, PC %#x.\n",
+ head_inst->readPC());
+
+ toIEW->commitInfo.nonSpecSeqNum = head_inst->seqNum;
+
+ // Change the instruction so it won't try to commit again until
+ // it is executed.
+ head_inst->clearCanCommit();
+
+ return false;
+ } else {
+ panic("Commit: Trying to commit un-executed instruction "
+ "of unknown type!\n");
+ }
}
// Check if memory access was successful.
@@ -320,8 +338,10 @@ SimpleCommit<Impl>::commitHead(DynInst *head_inst, unsigned inst_num)
#ifdef FULL_SYSTEM
cpu->trap(fault);
#else // !FULL_SYSTEM
- panic("fault (%d) detected @ PC %08p", head_inst->getFault(),
- head_inst->PC);
+ if (!head_inst->isNop()) {
+ panic("fault (%d) detected @ PC %08p", head_inst->getFault(),
+ head_inst->PC);
+ }
#endif // FULL_SYSTEM
}
@@ -333,8 +353,8 @@ SimpleCommit<Impl>::commitHead(DynInst *head_inst, unsigned inst_num)
return false;
}
- //If it's a branch, then send back branch prediction update info
- //to the fetch stage.
+ // If it's a branch, then send back branch prediction update info
+ // to the fetch stage.
// This should be handled in the iew stage if a mispredict happens...
#if 0
if (head_inst->isControl()) {
@@ -358,6 +378,15 @@ SimpleCommit<Impl>::commitHead(DynInst *head_inst, unsigned inst_num)
}
#endif
+ // Explicit communication back to the LDSTQ that a load has been committed
+ // and can be removed from the LDSTQ. Stores don't need this because
+ // the LDSTQ will already have been told that a store has reached the head
+ // of the ROB. Consider including communication if it's a store as well
+ // to keep things orthagonal.
+ if (head_inst->isLoad()) {
+ toIEW->commitInfo.commitIsLoad = true;
+ }
+
// Now that the instruction is going to be committed, finalize its
// trace data.
if (head_inst->traceData) {
@@ -371,7 +400,7 @@ SimpleCommit<Impl>::commitHead(DynInst *head_inst, unsigned inst_num)
return true;
}
-template<class Impl>
+template <class Impl>
void
SimpleCommit<Impl>::getInsts()
{
@@ -382,24 +411,33 @@ SimpleCommit<Impl>::getInsts()
// Read any issued instructions and place them into the ROB. Do this
// prior to squashing to avoid having instructions in the ROB that
// don't get squashed properly.
+ int insts_to_process = min((int)renameWidth, fromRename->size);
+
for (int inst_num = 0;
- fromRename->insts[inst_num] != NULL && inst_num < renameWidth;
+ inst_num < insts_to_process;
++inst_num)
{
- DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n",
- fromRename->insts[inst_num]->readPC());
- rob->insertInst(fromRename->insts[inst_num]);
+ if (!fromRename->insts[inst_num]->isSquashed()) {
+ DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n",
+ fromRename->insts[inst_num]->readPC());
+ rob->insertInst(fromRename->insts[inst_num]);
+ } else {
+ DPRINTF(Commit, "Commit: Instruction %i PC %#x was "
+ "squashed, skipping.\n",
+ fromRename->insts[inst_num]->seqNum,
+ fromRename->insts[inst_num]->readPC());
+ }
}
}
-template<class Impl>
+template <class Impl>
void
SimpleCommit<Impl>::markCompletedInsts()
{
// Grab completed insts out of the IEW instruction queue, and mark
// instructions completed within the ROB.
for (int inst_num = 0;
- fromIEW->insts[inst_num] != NULL && inst_num < iewWidth;
+ inst_num < iewWidth && fromIEW->insts[inst_num];
++inst_num)
{
DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n",
@@ -411,7 +449,7 @@ SimpleCommit<Impl>::markCompletedInsts()
}
}
-template<class Impl>
+template <class Impl>
uint64_t
SimpleCommit<Impl>::readCommitPC()
{
diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh
index 676334249..ec8460b77 100644
--- a/cpu/beta_cpu/cpu_policy.hh
+++ b/cpu/beta_cpu/cpu_policy.hh
@@ -1,32 +1,60 @@
#ifndef __CPU_POLICY_HH__
#define __CPU_POLICY_HH__
+#include "cpu/beta_cpu/bpred_unit.hh"
+#include "cpu/beta_cpu/inst_queue.hh"
+#include "cpu/beta_cpu/regfile.hh"
+#include "cpu/beta_cpu/free_list.hh"
+#include "cpu/beta_cpu/rename_map.hh"
+#include "cpu/beta_cpu/rob.hh"
+#include "cpu/beta_cpu/store_set.hh"
+#include "cpu/beta_cpu/mem_dep_unit.hh"
+#include "cpu/beta_cpu/ldstq.hh"
+
#include "cpu/beta_cpu/fetch.hh"
#include "cpu/beta_cpu/decode.hh"
#include "cpu/beta_cpu/rename.hh"
#include "cpu/beta_cpu/iew.hh"
#include "cpu/beta_cpu/commit.hh"
-#include "cpu/beta_cpu/inst_queue.hh"
-#include "cpu/beta_cpu/regfile.hh"
-#include "cpu/beta_cpu/free_list.hh"
-#include "cpu/beta_cpu/rename_map.hh"
-#include "cpu/beta_cpu/rob.hh"
+#include "cpu/beta_cpu/comm.hh"
template<class Impl>
struct SimpleCPUPolicy
{
+ typedef DefaultBPredUnit<Impl> BPredUnit;
typedef PhysRegFile<Impl> RegFile;
typedef SimpleFreeList FreeList;
typedef SimpleRenameMap RenameMap;
typedef ROB<Impl> ROB;
typedef InstructionQueue<Impl> IQ;
+ typedef MemDepUnit<StoreSet, Impl> MemDepUnit;
+ typedef LDSTQ<Impl> LDSTQ;
typedef SimpleFetch<Impl> Fetch;
typedef SimpleDecode<Impl> Decode;
typedef SimpleRename<Impl> Rename;
typedef SimpleIEW<Impl, IQ> IEW;
typedef SimpleCommit<Impl> Commit;
+
+ /** The struct for communication between fetch and decode. */
+ typedef SimpleFetchSimpleDecode<Impl> FetchStruct;
+
+ /** The struct for communication between decode and rename. */
+ typedef SimpleDecodeSimpleRename<Impl> DecodeStruct;
+
+ /** The struct for communication between rename and IEW. */
+ typedef SimpleRenameSimpleIEW<Impl> RenameStruct;
+
+ /** The struct for communication between IEW and commit. */
+ typedef SimpleIEWSimpleCommit<Impl> IEWStruct;
+
+ /** The struct for communication within the IEW stage. */
+ typedef IssueStruct<Impl> IssueStruct;
+
+ /** The struct for all backwards communication. */
+ typedef TimeBufStruct TimeStruct;
+
};
#endif //__CPU_POLICY_HH__
diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh
index c41955dcb..be88a4b36 100644
--- a/cpu/beta_cpu/decode.hh
+++ b/cpu/beta_cpu/decode.hh
@@ -10,11 +10,7 @@
#include <queue>
-//Will want to include: time buffer, structs,
#include "base/timebuf.hh"
-#include "cpu/beta_cpu/comm.hh"
-
-using namespace std;
template<class Impl>
class SimpleDecode
@@ -22,13 +18,15 @@ class SimpleDecode
private:
// Typedefs from the Impl.
typedef typename Impl::ISA ISA;
- typedef typename Impl::DynInst DynInst;
typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::Params Params;
+ typedef typename Impl::CPUPol CPUPol;
- typedef typename Impl::FetchStruct FetchStruct;
- typedef typename Impl::DecodeStruct DecodeStruct;
- typedef typename Impl::TimeStruct TimeStruct;
+ // Typedefs from the CPU policy.
+ typedef typename CPUPol::FetchStruct FetchStruct;
+ typedef typename CPUPol::DecodeStruct DecodeStruct;
+ typedef typename CPUPol::TimeStruct TimeStruct;
// Typedefs from the ISA.
typedef typename ISA::Addr Addr;
@@ -71,7 +69,7 @@ class SimpleDecode
inline void unblock();
- void squash(DynInst *inst);
+ void squash(DynInstPtr &inst);
// Interfaces to objects outside of decode.
/** CPU interface. */
@@ -106,7 +104,7 @@ class SimpleDecode
typename TimeBuffer<FetchStruct>::wire fromFetch;
/** Skid buffer between fetch and decode. */
- queue<FetchStruct> skidBuffer;
+ std::queue<FetchStruct> skidBuffer;
private:
//Consider making these unsigned to avoid any confusion.
@@ -124,6 +122,12 @@ class SimpleDecode
/** The width of decode, in instructions. */
unsigned decodeWidth;
+
+ /** The instruction that decode is currently on. It needs to have
+ * persistent state so that when a stall occurs in the middle of a
+ * group of instructions, it can restart at the proper instruction.
+ */
+ unsigned numInst;
};
#endif // __SIMPLE_DECODE_HH__
diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh
index ecf19b8ea..d0f46eaa5 100644
--- a/cpu/beta_cpu/decode_impl.hh
+++ b/cpu/beta_cpu/decode_impl.hh
@@ -9,7 +9,8 @@ SimpleDecode<Impl>::SimpleDecode(Params &params)
iewToDecodeDelay(params.iewToDecodeDelay),
commitToDecodeDelay(params.commitToDecodeDelay),
fetchToDecodeDelay(params.fetchToDecodeDelay),
- decodeWidth(params.decodeWidth)
+ decodeWidth(params.decodeWidth),
+ numInst(0)
{
DPRINTF(Decode, "Decode: decodeWidth=%i.\n", decodeWidth);
_status = Idle;
@@ -103,7 +104,7 @@ SimpleDecode<Impl>::unblock()
// was predicted incorrectly.
template<class Impl>
void
-SimpleDecode<Impl>::squash(DynInst *inst)
+SimpleDecode<Impl>::squash(DynInstPtr &inst)
{
DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction "
"detected at decode.\n");
@@ -163,16 +164,22 @@ SimpleDecode<Impl>::tick()
// buffer were used. Remove those instructions and handle
// the rest of unblocking.
if (_status == Unblocking) {
+ if (fromFetch->size > 0) {
+ // Add the current inputs to the skid buffer so they can be
+ // reprocessed when this stage unblocks.
+ skidBuffer.push(*fromFetch);
+ }
+
unblock();
}
} else if (_status == Blocked) {
- if (fromFetch->insts[0] != NULL) {
+ if (fromFetch->size > 0) {
block();
}
if (!fromRename->renameInfo.stall &&
- !fromIEW->iewInfo.stall &&
- !fromCommit->commitInfo.stall) {
+ !fromIEW->iewInfo.stall &&
+ !fromCommit->commitInfo.stall) {
DPRINTF(Decode, "Decode: Stall signals cleared, going to "
"unblock.\n");
_status = Unblocking;
@@ -204,9 +211,7 @@ void
SimpleDecode<Impl>::decode()
{
// Check time buffer if being told to squash.
- if (/* fromRename->renameInfo.squash || */
- /* fromIEW->iewInfo.squash || */
- fromCommit->commitInfo.squash) {
+ if (fromCommit->commitInfo.squash) {
squash();
return;
}
@@ -223,20 +228,22 @@ SimpleDecode<Impl>::decode()
// Check fetch queue to see if instructions are available.
// If no available instructions, do nothing, unless this stage is
// currently unblocking.
- if (fromFetch->insts[0] == NULL && _status != Unblocking) {
+ if (!fromFetch->insts[0] && _status != Unblocking) {
DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n");
// Should I change the status to idle?
return;
}
- DynInst *inst;
+ DynInstPtr inst;
+
// Instead have a class member variable that records which instruction
// was the last one that was ended on. At the tick() stage, it can
// check if that's equal to 0. If not, then don't pop stuff off.
- unsigned num_inst = 0;
- bool insts_available = _status == Unblocking ?
- skidBuffer.front().insts[num_inst] != NULL :
- fromFetch->insts[num_inst] != NULL;
+ unsigned to_rename_index = 0;
+
+ int insts_available = _status == Unblocking ?
+ skidBuffer.front().size :
+ fromFetch->size;
// Debug block...
#if 0
@@ -247,7 +254,7 @@ SimpleDecode<Impl>::decode()
DPRINTF(Decode, "Decode: No instructions available, skid buffer "
"empty.\n");
} else if (_status != Unblocking &&
- fromFetch->insts[0] == NULL) {
+ !fromFetch->insts[0]) {
DPRINTF(Decode, "Decode: No instructions available, fetch queue "
"empty.\n");
} else {
@@ -262,26 +269,39 @@ SimpleDecode<Impl>::decode()
// should be computed here. However in this simple model all
// computation will take place at execute. Hence doneTargCalc()
// will always be false.
- while (num_inst < decodeWidth &&
- insts_available)
+ while (insts_available > 0)
{
DPRINTF(Decode, "Decode: Sending instruction to rename.\n");
// Might create some sort of accessor to get an instruction
// on a per thread basis. Or might be faster to just get
// a pointer to an array or list of instructions and use that
// within this code.
- inst = _status == Unblocking ? skidBuffer.front().insts[num_inst] :
- fromFetch->insts[num_inst];
+ inst = _status == Unblocking ? skidBuffer.front().insts[numInst] :
+ fromFetch->insts[numInst];
+
DPRINTF(Decode, "Decode: Processing instruction %i with PC %#x\n",
- inst, inst->readPC());
+ inst->seqNum, inst->readPC());
+
+ if (inst->isSquashed()) {
+ DPRINTF(Decode, "Decode: Instruction %i with PC %#x is "
+ "squashed, skipping.\n",
+ inst->seqNum, inst->readPC());
+
+ ++numInst;
+ --insts_available;
+
+ continue;
+ }
// This current instruction is valid, so add it into the decode
// queue. The next instruction may not be valid, so check to
// see if branches were predicted correctly.
- toRename->insts[num_inst] = inst;
+ toRename->insts[to_rename_index] = inst;
+
+ ++(toRename->size);
// Ensure that if it was predicted as a branch, it really is a
- // branch. This case should never happen in this model.
+ // branch.
if (inst->predTaken() && !inst->isControl()) {
panic("Instruction predicted as a branch!");
@@ -306,20 +326,19 @@ SimpleDecode<Impl>::decode()
// them as ready to issue at any time. Not sure if this check
// should exist here or at a later stage; however it doesn't matter
// too much for function correctness.
+ // Isn't this handled by the inst queue?
if (inst->numSrcRegs() == 0) {
inst->setCanIssue();
}
// Increment which instruction we're looking at.
- ++num_inst;
-
- // Check whether or not there are instructions available.
- // Either need to check within the skid buffer, or the fetch
- // queue, depending if this stage is unblocking or not.
- insts_available = _status == Unblocking ?
- skidBuffer.front().insts[num_inst] == NULL :
- fromFetch->insts[num_inst] == NULL;
+ ++numInst;
+ ++to_rename_index;
+
+ --insts_available;
}
+
+ numInst = 0;
}
#endif // __SIMPLE_DECODE_CC__
diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh
index 5717c65ac..e59a9df7f 100644
--- a/cpu/beta_cpu/fetch.hh
+++ b/cpu/beta_cpu/fetch.hh
@@ -13,16 +13,12 @@
#include "base/timebuf.hh"
#include "sim/eventq.hh"
#include "cpu/pc_event.hh"
-#include "cpu/beta_cpu/comm.hh"
#include "mem/mem_interface.hh"
-using namespace std;
-
/**
* SimpleFetch class to fetch a single instruction each cycle. SimpleFetch
* will stall if there's an Icache miss, but otherwise assumes a one cycle
- * Icache hit. This will be replaced with a more fleshed out class in the
- * future.
+ * Icache hit.
*/
template <class Impl>
@@ -31,12 +27,15 @@ class SimpleFetch
public:
/** Typedefs from Impl. */
typedef typename Impl::ISA ISA;
+ typedef typename Impl::CPUPol CPUPol;
typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::FullCPU FullCPU;
typedef typename Impl::Params Params;
- typedef typename Impl::FetchStruct FetchStruct;
- typedef typename Impl::TimeStruct TimeStruct;
+ typedef typename CPUPol::BPredUnit BPredUnit;
+ typedef typename CPUPol::FetchStruct FetchStruct;
+ typedef typename CPUPol::TimeStruct TimeStruct;
/** Typedefs from ISA. */
typedef typename ISA::MachInst MachInst;
@@ -76,6 +75,17 @@ class SimpleFetch
// Figure out PC vs next PC and how it should be updated
void squash(Addr newPC);
+ private:
+ /**
+ * Looks up in the branch predictor to see if the next PC should be
+ * either next PC+=MachInst or a branch target.
+ * @params next_PC Next PC variable passed in by reference. It is
+ * expected to be set to the current PC; it will be updated with what
+ * the next PC will be.
+ * @return Whether or not a branch was predicted as taken.
+ */
+ bool lookupAndUpdateNextPC(Addr &next_PC);
+
public:
class CacheCompletionEvent : public Event
{
@@ -110,8 +120,6 @@ class SimpleFetch
/** Wire to get commit's information from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromCommit;
- // Will probably have this sit in the FullCPU and just pass a pointr in.
- // Simplifies the constructors of all stages.
/** Internal fetch instruction queue. */
TimeBuffer<FetchStruct> *fetchQueue;
@@ -122,6 +130,9 @@ class SimpleFetch
/** Icache interface. */
MemInterface *icacheInterface;
+ /** BPredUnit. */
+ BPredUnit branchPred;
+
/** Memory request used to access cache. */
MemReqPtr memReq;
diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh
index 918d2dad2..93f7bf6d2 100644
--- a/cpu/beta_cpu/fetch_impl.hh
+++ b/cpu/beta_cpu/fetch_impl.hh
@@ -1,7 +1,5 @@
-// Todo: Rewrite this. Add in branch prediction. Fix up if squashing comes
-// from decode; only the correct instructions should be killed. This will
-// probably require changing the CPU's instList functions to take a seqNum
-// instead of a dyninst. With probe path, should be able to specify
+// Todo: Add in branch prediction. With probe path, should
+// be able to specify
// size of data to fetch. Will be able to get full cache line.
// Remove this later.
@@ -41,6 +39,7 @@ template<class Impl>
SimpleFetch<Impl>::SimpleFetch(Params &params)
: cacheCompletionEvent(this),
icacheInterface(params.icacheInterface),
+ branchPred(params),
decodeToFetchDelay(params.decodeToFetchDelay),
renameToFetchDelay(params.renameToFetchDelay),
iewToFetchDelay(params.iewToFetchDelay),
@@ -66,7 +65,7 @@ SimpleFetch<Impl>::SimpleFetch(Params &params)
blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
// Create mask to get rid of offset bits.
- cacheBlockMask = ~((int)log2(blkSize) - 1);
+ cacheBlockMask = (blkSize - 1);
// Get the size of an instruction.
instSize = sizeof(MachInst);
@@ -123,24 +122,59 @@ SimpleFetch<Impl>::processCacheCompletion()
_status = IcacheMissComplete;
}
-// Note that in the SimpleFetch<>, will most likely have to provide the
-// template parameters to BP and BTB.
+template<class Impl>
+bool
+SimpleFetch<Impl>::lookupAndUpdateNextPC(Addr &next_PC)
+{
+#if 1
+ // Do branch prediction check here.
+ bool predict_taken = branchPred.BPLookup(next_PC);
+ Addr predict_target;
+
+ DPRINTF(Fetch, "Fetch: Branch predictor predicts taken? %i\n",
+ predict_taken);
+
+ if (branchPred.BTBValid(next_PC)) {
+ predict_target = branchPred.BTBLookup(next_PC);
+ DPRINTF(Fetch, "Fetch: BTB target is %#x.\n", predict_target);
+ } else {
+ predict_taken = false;
+ DPRINTF(Fetch, "Fetch: BTB does not have a valid entry.\n");
+ }
+
+ // Now update the PC to fetch the next instruction in the cache
+ // line.
+ if (!predict_taken) {
+ next_PC = next_PC + instSize;
+ return false;
+ } else {
+ next_PC = predict_target;
+ return true;
+ }
+#endif
+
+#if 0
+ next_PC = next_PC + instSize;
+ return false;
+#endif
+}
+
template<class Impl>
void
SimpleFetch<Impl>::squash(Addr new_PC)
{
DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
+
cpu->setNextPC(new_PC + instSize);
cpu->setPC(new_PC);
_status = Squashing;
- // Clear out the instructions that are no longer valid.
- // Actually maybe slightly unrealistic to kill instructions that are
- // in flight like that between stages. Perhaps just have next
- // stage ignore those instructions or something. In the cycle where it's
- // returning from squashing, the other stages can just ignore the inputs
- // for that cycle.
+ // Clear the icache miss if it's outstanding.
+ if (_status == IcacheMissStall && icacheInterface) {
+ // @todo: Use an actual thread number here.
+ icacheInterface->squash(0);
+ }
// Tell the CPU to remove any instructions that aren't currently
// in the ROB (instructions in flight that were killed).
@@ -151,25 +185,27 @@ template<class Impl>
void
SimpleFetch<Impl>::tick()
{
-#if 0
+#if 1
+ // Check squash signals from commit.
if (fromCommit->commitInfo.squash) {
DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
"from commit.\n");
// In any case, squash.
squash(fromCommit->commitInfo.nextPC);
- return;
- }
- if (fromDecode->decodeInfo.squash) {
- DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
- "from decode.\n");
+ // Also check if there's a mispredict that happened.
+ if (fromCommit->commitInfo.branchMispredict) {
+ branchPred.BPUpdate(fromCommit->commitInfo.mispredPC,
+ fromCommit->commitInfo.branchTaken);
+ branchPred.BTBUpdate(fromCommit->commitInfo.mispredPC,
+ fromCommit->commitInfo.nextPC);
+ }
- // Squash unless we're already squashing?
- squash(fromDecode->decodeInfo.nextPC);
return;
}
+ // Check ROB squash signals from commit.
if (fromCommit->commitInfo.robSquashing) {
DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
@@ -178,11 +214,36 @@ SimpleFetch<Impl>::tick()
return;
}
+ // Check squash signals from decode.
+ if (fromDecode->decodeInfo.squash) {
+ DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
+ "from decode.\n");
+
+ // Update the branch predictor.
+ if (fromCommit->decodeInfo.branchMispredict) {
+ branchPred.BPUpdate(fromDecode->decodeInfo.mispredPC,
+ fromDecode->decodeInfo.branchTaken);
+ branchPred.BTBUpdate(fromDecode->decodeInfo.mispredPC,
+ fromDecode->decodeInfo.nextPC);
+ }
+
+ if (_status != Squashing) {
+ // Squash unless we're already squashing?
+ squash(fromDecode->decodeInfo.nextPC);
+ return;
+ }
+ }
+
+
+
+ // Check if any of the stall signals are high.
if (fromDecode->decodeInfo.stall ||
fromRename->renameInfo.stall ||
fromIEW->iewInfo.stall ||
fromCommit->commitInfo.stall)
{
+ // Block stage, regardless of current status.
+
DPRINTF(Fetch, "Fetch: Stalling stage.\n");
DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
"Commit: %i\n",
@@ -190,10 +251,36 @@ SimpleFetch<Impl>::tick()
fromRename->renameInfo.stall,
fromIEW->iewInfo.stall,
fromCommit->commitInfo.stall);
- // What to do if we're already in an icache stall?
+
+ _status = Blocked;
+ return;
+ } else if (_status == Blocked) {
+ // Unblock stage if status is currently blocked and none of the
+ // stall signals are being held high.
+ _status = Running;
+
+ return;
+ }
+
+ // If fetch has reached this point, then there are no squash signals
+ // still being held high. Check if fetch is in the squashing state;
+ // if so, fetch can switch to running.
+ // Similarly, there are no blocked signals still being held high.
+ // Check if fetch is in the blocked state; if so, fetch can switch to
+ // running.
+ if (_status == Squashing) {
+ DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n");
+
+ // Switch status to running
+ _status = Running;
+ } else if (_status != IcacheMissStall) {
+ DPRINTF(Fetch, "Fetch: Running stage.\n");
+
+ fetch();
}
#endif
+#if 0
if (_status != Blocked &&
_status != Squashing &&
_status != IcacheMissStall) {
@@ -253,7 +340,7 @@ SimpleFetch<Impl>::tick()
DPRINTF(Fetch, "Fetch: ROB still squashing.\n");
}
}
-
+#endif
}
template<class Impl>
@@ -261,54 +348,9 @@ void
SimpleFetch<Impl>::fetch()
{
//////////////////////////////////////////
- // Check backwards communication
- //////////////////////////////////////////
-
- // If branch prediction is incorrect, squash any instructions,
- // update PC, and do not fetch anything this cycle.
-
- // Might want to put all the PC changing stuff in one area.
- // Normally should also check here to see if there is branch
- // misprediction info to update with.
- if (fromCommit->commitInfo.squash) {
- DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
- "from commit.\n");
- squash(fromCommit->commitInfo.nextPC);
- return;
- } else if (fromDecode->decodeInfo.squash) {
- DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
- "from decode.\n");
- squash(fromDecode->decodeInfo.nextPC);
- return;
- } else if (fromCommit->commitInfo.robSquashing) {
- DPRINTF(Fetch, "Fetch: ROB still squashing.\n");
- _status = Squashing;
- return;
- }
-
- // If being told to stall, do nothing.
- if (fromDecode->decodeInfo.stall ||
- fromRename->renameInfo.stall ||
- fromIEW->iewInfo.stall ||
- fromCommit->commitInfo.stall)
- {
- DPRINTF(Fetch, "Fetch: Stalling stage.\n");
- DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
- "Commit: %i\n",
- fromDecode->decodeInfo.stall,
- fromRename->renameInfo.stall,
- fromIEW->iewInfo.stall,
- fromCommit->commitInfo.stall);
- _status = Blocked;
- return;
- }
-
- //////////////////////////////////////////
// Start actual fetch
//////////////////////////////////////////
- // If nothing else outstanding, attempt to read instructions.
-
#ifdef FULL_SYSTEM
// Flag to say whether or not address is physical addr.
unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
@@ -317,13 +359,14 @@ SimpleFetch<Impl>::fetch()
#endif // FULL_SYSTEM
// The current PC.
- Addr PC = cpu->readPC();
+ Addr fetch_PC = cpu->readPC();
// Fault code for memory access.
Fault fault = No_Fault;
// If returning from the delay of a cache miss, then update the status
- // to running, otherwise do the cache access.
+ // to running, otherwise do the cache access. Possibly move this up
+ // to tick() function.
if (_status == IcacheMissComplete) {
DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");
@@ -334,7 +377,7 @@ SimpleFetch<Impl>::fetch()
} else {
DPRINTF(Fetch, "Fetch: Attempting to translate and read "
"instruction, starting at PC %08p.\n",
- PC);
+ fetch_PC);
// Otherwise check if the instruction exists within the cache.
// If it does, then proceed on to read the instruction and the rest
@@ -347,7 +390,7 @@ SimpleFetch<Impl>::fetch()
// Setup the memReq to do a read of the first isntruction's address.
// Set the appropriate read size and flags as well.
memReq->cmd = Read;
- memReq->reset(PC, instSize, flags);
+ memReq->reset(fetch_PC, instSize, flags);
// Translate the instruction request.
// Should this function be
@@ -401,7 +444,7 @@ SimpleFetch<Impl>::fetch()
// Probably have a status on a per thread basis so each thread can
// block independently and be woken up independently.
- Addr next_PC = 0;
+ Addr next_PC = fetch_PC;
InstSeqNum inst_seq;
// If the read of the first instruction was successful, then grab the
@@ -410,6 +453,10 @@ SimpleFetch<Impl>::fetch()
if (fault == No_Fault) {
DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");
+ //////////////////////////
+ // Fetch first instruction
+ //////////////////////////
+
// Need to keep track of whether or not a predicted branch
// ended this fetch block.
bool predicted_branch = false;
@@ -420,12 +467,17 @@ SimpleFetch<Impl>::fetch()
// Get a sequence number.
inst_seq = cpu->getAndIncrementInstSeq();
+ // Update the next PC; it either is PC+sizeof(MachInst), or
+ // branch_target. Check whether or not a branch was taken.
+ predicted_branch = lookupAndUpdateNextPC(next_PC);
+
// Because the first instruction was already fetched, create the
// DynInst and put it into the queue to decode.
- DynInst *instruction = new DynInst(inst, PC, PC+instSize, inst_seq,
- cpu);
+ DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC,
+ inst_seq, cpu);
+
DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
- instruction, instruction->readPC());
+ inst_seq, instruction->readPC());
DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
OPCODE(inst));
@@ -440,13 +492,17 @@ SimpleFetch<Impl>::fetch()
// that heads to decode.
toDecode->insts[0] = instruction;
- // Now update the PC to fetch the next instruction in the cache
- // line.
- PC = PC + instSize;
+ toDecode->size++;
+
+ fetch_PC = next_PC;
+
+ //////////////////////////
+ // Fetch other instructions
+ //////////////////////////
// Obtain the index into the cache line by getting only the low
- // order bits.
- int line_index = PC & cacheBlockMask;
+ // order bits. Will need to do shifting as well.
+ int line_index = fetch_PC & cacheBlockMask;
// Take instructions and put them into the queue heading to decode.
// Then read the next instruction in the cache line. Continue
@@ -461,12 +517,14 @@ SimpleFetch<Impl>::fetch()
// instructions, which can then be used to get all the instructions
// needed. Figure out if I can roll it back into one loop.
for (int fetched = 1;
- line_index < blkSize && fetched < fetchWidth;
+ line_index < blkSize &&
+ fetched < fetchWidth &&
+ !predicted_branch;
line_index+=instSize, ++fetched)
{
// Reset the mem request to setup the read of the next
// instruction.
- memReq->reset(PC, instSize, flags);
+ memReq->reset(fetch_PC, instSize, flags);
// Translate the instruction request.
fault = cpu->translateInstReq(memReq);
@@ -485,16 +543,24 @@ SimpleFetch<Impl>::fetch()
// Get a sequence number.
inst_seq = cpu->getAndIncrementInstSeq();
+ predicted_branch = lookupAndUpdateNextPC(next_PC);
+
// Create the actual DynInst. Parameters are:
// DynInst(instruction, PC, predicted PC, CPU pointer).
// Because this simple model has no branch prediction, the
// predicted PC will simply be PC+sizeof(MachInst).
// Update to actually use a branch predictor to predict the
// target in the future.
- DynInst *instruction = new DynInst(inst, PC, PC+instSize,
- inst_seq, cpu);
+ DynInstPtr instruction =
+ new DynInst(inst, fetch_PC, next_PC, inst_seq, cpu);
+
+ instruction->traceData =
+ Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
+ instruction->staticInst,
+ instruction->readPC(), 0);
+
DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
- instruction, instruction->readPC());
+ inst_seq, instruction->readPC());
DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
OPCODE(inst));
@@ -504,20 +570,15 @@ SimpleFetch<Impl>::fetch()
// that heads to decode.
toDecode->insts[fetched] = instruction;
+ toDecode->size++;
+
// Might want to keep track of various stats.
// numInstsFetched++;
- // Now update the PC to fetch the next instruction in the cache
- // line.
- PC = PC + instSize;
+ // Update the PC with the next PC.
+ fetch_PC = next_PC;
}
- // If no branches predicted taken, then increment PC with
- // fall-through path. This simple model always predicts not
- // taken.
- if (!predicted_branch) {
- next_PC = PC;
- }
}
// Now that fetching is completed, update the PC to signify what the next
@@ -544,10 +605,10 @@ SimpleFetch<Impl>::fetch()
_status = Blocked;
#ifdef FULL_SYSTEM
- // Trap will probably need a pointer to the CPU to do accessing.
- // Or an exec context. --Write ProxyExecContext eventually.
- // Avoid using this for now as the xc really shouldn't be in here.
- cpu->trap(fault);
+// cpu->trap(fault);
+ // Send a signal to the ROB indicating that there's a trap from the
+ // fetch stage that needs to be handled. Need to indicate that
+ // there's a fault, and the fault type.
#else // !FULL_SYSTEM
fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());
#endif // FULL_SYSTEM
diff --git a/cpu/beta_cpu/free_list.cc b/cpu/beta_cpu/free_list.cc
index 006bf4bf7..542b87471 100644
--- a/cpu/beta_cpu/free_list.cc
+++ b/cpu/beta_cpu/free_list.cc
@@ -1,3 +1,5 @@
+#include "base/trace.hh"
+
#include "cpu/beta_cpu/free_list.hh"
SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs,
@@ -10,6 +12,16 @@ SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs,
numPhysicalFloatRegs(_numPhysicalFloatRegs),
numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs)
{
+ DPRINTF(FreeList, "FreeList: Creating new free list object.\n");
+
+ // DEBUG stuff.
+ freeIntRegsScoreboard.resize(numPhysicalIntRegs);
+
+ freeFloatRegsScoreboard.resize(numPhysicalRegs);
+
+ for (PhysRegIndex i = 0; i < numLogicalIntRegs; ++i) {
+ freeIntRegsScoreboard[i] = 0;
+ }
// Put all of the extra physical registers onto the free list. This
// means excluding all of the base logical registers.
@@ -17,6 +29,14 @@ SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs,
i < numPhysicalIntRegs; ++i)
{
freeIntRegs.push(i);
+
+ freeIntRegsScoreboard[i] = 1;
+ }
+
+ for (PhysRegIndex i = 0; i < numPhysicalIntRegs + numLogicalFloatRegs;
+ ++i)
+ {
+ freeFloatRegsScoreboard[i] = 0;
}
// Put all of the extra physical registers onto the free list. This
@@ -26,8 +46,9 @@ SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs,
for (PhysRegIndex i = numPhysicalIntRegs + numLogicalFloatRegs;
i < numPhysicalRegs; ++i)
{
- cprintf("Free List: Adding register %i to float list.\n", i);
freeFloatRegs.push(i);
+
+ freeFloatRegsScoreboard[i] = 1;
}
}
diff --git a/cpu/beta_cpu/free_list.hh b/cpu/beta_cpu/free_list.hh
index 8521ad94c..0d2b2c421 100644
--- a/cpu/beta_cpu/free_list.hh
+++ b/cpu/beta_cpu/free_list.hh
@@ -8,8 +8,6 @@
#include "cpu/beta_cpu/comm.hh"
#include "base/trace.hh"
-using namespace std;
-
// Question: Do I even need the number of logical registers?
// How to avoid freeing registers instantly? Same with ROB entries.
@@ -33,10 +31,10 @@ class SimpleFreeList
private:
/** The list of free integer registers. */
- queue<PhysRegIndex> freeIntRegs;
+ std::queue<PhysRegIndex> freeIntRegs;
/** The list of free floating point registers. */
- queue<PhysRegIndex> freeFloatRegs;
+ std::queue<PhysRegIndex> freeFloatRegs;
/** Number of logical integer registers. */
int numLogicalIntRegs;
@@ -53,6 +51,11 @@ class SimpleFreeList
/** Total number of physical registers. */
int numPhysicalRegs;
+ /** DEBUG stuff below. */
+ std::vector<int> freeIntRegsScoreboard;
+
+ std::vector<bool> freeFloatRegsScoreboard;
+
public:
SimpleFreeList(unsigned _numLogicalIntRegs,
unsigned _numPhysicalIntRegs,
@@ -94,6 +97,10 @@ SimpleFreeList::getIntReg()
freeIntRegs.pop();
+ // DEBUG
+ assert(freeIntRegsScoreboard[free_reg]);
+ freeIntRegsScoreboard[free_reg] = 0;
+
return(free_reg);
}
@@ -109,6 +116,10 @@ SimpleFreeList::getFloatReg()
freeFloatRegs.pop();
+ // DEBUG
+ assert(freeFloatRegsScoreboard[free_reg]);
+ freeFloatRegsScoreboard[free_reg] = 0;
+
return(free_reg);
}
@@ -120,8 +131,16 @@ SimpleFreeList::addReg(PhysRegIndex freed_reg)
//already in there. A bit vector or something similar would be useful.
if (freed_reg < numPhysicalIntRegs) {
freeIntRegs.push(freed_reg);
+
+ // DEBUG
+ assert(freeIntRegsScoreboard[freed_reg] == false);
+ freeIntRegsScoreboard[freed_reg] = 1;
} else if (freed_reg < numPhysicalRegs) {
freeFloatRegs.push(freed_reg);
+
+ // DEBUG
+ assert(freeFloatRegsScoreboard[freed_reg] == false);
+ freeFloatRegsScoreboard[freed_reg] = 1;
}
}
@@ -130,6 +149,10 @@ SimpleFreeList::addIntReg(PhysRegIndex freed_reg)
{
DPRINTF(Rename, "Freelist: Freeing int register %i.\n", freed_reg);
+ // DEBUG
+ assert(!freeIntRegsScoreboard[freed_reg]);
+ freeIntRegsScoreboard[freed_reg] = 1;
+
//Might want to add in a check for whether or not this register is
//already in there. A bit vector or something similar would be useful.
freeIntRegs.push(freed_reg);
@@ -140,6 +163,10 @@ SimpleFreeList::addFloatReg(PhysRegIndex freed_reg)
{
DPRINTF(Rename, "Freelist: Freeing float register %i.\n", freed_reg);
+ // DEBUG
+ assert(!freeFloatRegsScoreboard[freed_reg]);
+ freeFloatRegsScoreboard[freed_reg] = 1;
+
//Might want to add in a check for whether or not this register is
//already in there. A bit vector or something similar would be useful.
freeFloatRegs.push(freed_reg);
diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc
index 6fbf5d69a..abeb4cb87 100644
--- a/cpu/beta_cpu/full_cpu.cc
+++ b/cpu/beta_cpu/full_cpu.cc
@@ -16,29 +16,18 @@
using namespace std;
#ifdef FULL_SYSTEM
-BaseFullCPU::BaseFullCPU(const std::string &_name,
- int number_of_threads,
- Counter max_insts_any_thread,
- Counter max_insts_all_threads,
- Counter max_loads_any_thread,
- Counter max_loads_all_threads,
- System *_system, Tick freq)
- : BaseCPU(_name, number_of_threads,
- max_insts_any_thread, max_insts_all_threads,
- max_loads_any_thread, max_loads_all_threads,
- _system, freq)
+BaseFullCPU::BaseFullCPU(Params &params)
+ : BaseCPU(params.name, params.numberOfThreads,
+ params.maxInstsAnyThread, params.maxInstsAllThreads,
+ params.maxLoadsAnyThread, params.maxLoadsAllThreads,
+ params._system, params.freq)
{
}
#else
-BaseFullCPU::BaseFullCPU(const std::string &_name,
- int number_of_threads,
- Counter max_insts_any_thread,
- Counter max_insts_all_threads,
- Counter max_loads_any_thread,
- Counter max_loads_all_threads)
- : BaseCPU(_name, number_of_threads,
- max_insts_any_thread, max_insts_all_threads,
- max_loads_any_thread, max_loads_all_threads)
+BaseFullCPU::BaseFullCPU(Params &params)
+ : BaseCPU(params.name, params.numberOfThreads,
+ params.maxInstsAnyThread, params.maxInstsAllThreads,
+ params.maxLoadsAnyThread, params.maxLoadsAllThreads)
{
}
#endif // FULL_SYSTEM
@@ -67,14 +56,9 @@ FullBetaCPU<Impl>::TickEvent::description()
template <class Impl>
FullBetaCPU<Impl>::FullBetaCPU(Params &params)
#ifdef FULL_SYSTEM
- : BaseFullCPU(params.name, /* number_of_threads */ 1,
- params.maxInstsAnyThread, params.maxInstsAllThreads,
- params.maxLoadsAnyThread, params.maxLoadsAllThreads,
- params.system, params.freq),
+ : BaseFullCPU(params),
#else
- : BaseFullCPU(params.name, /* number_of_threads */ 1,
- params.maxInstsAnyThread, params.maxInstsAllThreads,
- params.maxLoadsAnyThread, params.maxLoadsAllThreads),
+ : BaseFullCPU(params),
#endif // FULL_SYSTEM
tickEvent(this),
fetch(params),
@@ -91,17 +75,18 @@ FullBetaCPU<Impl>::FullBetaCPU(Params &params)
renameMap(Impl::ISA::NumIntRegs, params.numPhysIntRegs,
Impl::ISA::NumFloatRegs, params.numPhysFloatRegs,
Impl::ISA::NumMiscRegs,
- Impl::ISA::ZeroReg, Impl::ISA::ZeroReg),
+ Impl::ISA::ZeroReg,
+ Impl::ISA::ZeroReg + Impl::ISA::NumIntRegs),
rob(params.numROBEntries, params.squashWidth),
// What to pass to these time buffers?
// For now just have these time buffers be pretty big.
- timeBuffer(20, 20),
- fetchQueue(20, 20),
- decodeQueue(20, 20),
- renameQueue(20, 20),
- iewQueue(20, 20),
+ timeBuffer(5, 5),
+ fetchQueue(5, 5),
+ decodeQueue(5, 5),
+ renameQueue(5, 5),
+ iewQueue(5, 5),
xc(NULL),
@@ -133,9 +118,9 @@ FullBetaCPU<Impl>::FullBetaCPU(Params &params)
// initialize CPU, including PC
TheISA::initCPU(&xc->regs);
#else
- xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0);
DPRINTF(FullCPU, "FullCPU: Process's starting PC is %#x, process is %#x",
process->prog_entry, process);
+ xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0);
assert(process->getMemory() != NULL);
assert(mem != NULL);
@@ -393,7 +378,7 @@ FullBetaCPU<Impl>::setPC(Addr new_PC)
template <class Impl>
void
-FullBetaCPU<Impl>::addInst(DynInst *inst)
+FullBetaCPU<Impl>::addInst(DynInstPtr &inst)
{
instList.push_back(inst);
}
@@ -411,9 +396,9 @@ FullBetaCPU<Impl>::instDone()
template <class Impl>
void
-FullBetaCPU<Impl>::removeBackInst(DynInst *inst)
+FullBetaCPU<Impl>::removeBackInst(DynInstPtr &inst)
{
- DynInst *inst_to_delete;
+ DynInstPtr inst_to_delete;
// Walk through the instruction list, removing any instructions
// that were inserted after the given instruction, inst.
@@ -424,22 +409,22 @@ FullBetaCPU<Impl>::removeBackInst(DynInst *inst)
// Obtain the pointer to the instruction.
inst_to_delete = instList.back();
- DPRINTF(FullCPU, "FullCPU: Deleting instruction %#x, PC %#x\n",
- inst_to_delete, inst_to_delete->readPC());
+ DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n",
+ inst_to_delete->seqNum, inst_to_delete->readPC());
// Remove the instruction from the list.
instList.pop_back();
- // Delete the instruction itself.
- delete inst_to_delete;
+ // Mark it as squashed.
+ inst_to_delete->setSquashed();
}
}
template <class Impl>
void
-FullBetaCPU<Impl>::removeFrontInst(DynInst *inst)
+FullBetaCPU<Impl>::removeFrontInst(DynInstPtr &inst)
{
- DynInst *inst_to_delete;
+ DynInstPtr inst_to_delete;
// The front instruction should be the same one being asked to be deleted.
assert(instList.front() == inst);
@@ -451,7 +436,7 @@ FullBetaCPU<Impl>::removeFrontInst(DynInst *inst)
DPRINTF(FullCPU, "FullCPU: Deleting committed instruction %#x, PC %#x\n",
inst_to_delete, inst_to_delete->readPC());
- delete inst_to_delete;
+// delete inst_to_delete;
}
template <class Impl>
@@ -461,7 +446,7 @@ FullBetaCPU<Impl>::removeInstsNotInROB()
DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction "
"list.\n");
- DynInst *rob_tail = rob.readTailInst();
+ DynInstPtr rob_tail = rob.readTailInst();
removeBackInst(rob_tail);
}
@@ -478,13 +463,13 @@ void
FullBetaCPU<Impl>::dumpInsts()
{
int num = 0;
- typename list<DynInst *>::iterator inst_list_it = instList.begin();
+ typename list<DynInstPtr>::iterator inst_list_it = instList.begin();
while (inst_list_it != instList.end())
{
- cprintf("Instruction:%i\nInst:%#x\nPC:%#x\nSN:%lli\n\n",
- num, (*inst_list_it), (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum);
+ cprintf("Instruction:%i\nPC:%#x\nSN:%lli\nIssued:%i\nSquashed:%i\n\n",
+ num, (*inst_list_it)->readPC(), (*inst_list_it)->seqNum,
+ (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed());
inst_list_it++;
++num;
}
@@ -492,7 +477,7 @@ FullBetaCPU<Impl>::dumpInsts()
template <class Impl>
void
-FullBetaCPU<Impl>::wakeDependents(DynInst *inst)
+FullBetaCPU<Impl>::wakeDependents(DynInstPtr &inst)
{
iew.wakeDependents(inst);
}
diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh
index 00ff1f878..cf753ad67 100644
--- a/cpu/beta_cpu/full_cpu.hh
+++ b/cpu/beta_cpu/full_cpu.hh
@@ -16,6 +16,7 @@
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/base_cpu.hh"
+#include "cpu/exec_context.hh"
#include "cpu/beta_cpu/cpu_policy.hh"
#include "sim/process.hh"
@@ -28,17 +29,32 @@ class BaseFullCPU : public BaseCPU
{
//Stuff that's pretty ISA independent will go here.
public:
+ class Params
+ {
+ public:
+#ifdef FULL_SYSTEM
+ std::string name;
+ int numberOfThreads;
+ Counter maxInstsAnyThread;
+ Counter maxInstsAllThreads;
+ Counter maxLoadsAnyThread;
+ Counter maxLoadsAllThreads;
+ System *_system;
+ Tick freq;
+#else
+ std::string name;
+ int numberOfThreads;
+ Counter maxInstsAnyThread;
+ Counter maxInstsAllThreads;
+ Counter maxLoadsAnyThread;
+ Counter maxLoadsAllThreads;
+#endif // FULL_SYSTEM
+ };
+
#ifdef FULL_SYSTEM
- BaseFullCPU(const std::string &_name, int _number_of_threads,
- Counter max_insts_any_thread, Counter max_insts_all_threads,
- Counter max_loads_any_thread, Counter max_loads_all_threads,
- System *_system, Tick freq);
+ BaseFullCPU(Params &params);
#else
- BaseFullCPU(const std::string &_name, int _number_of_threads,
- Counter max_insts_any_thread = 0,
- Counter max_insts_all_threads = 0,
- Counter max_loads_any_thread = 0,
- Counter max_loads_all_threads = 0);
+ BaseFullCPU(Params &params);
#endif // FULL_SYSTEM
};
@@ -49,7 +65,7 @@ class FullBetaCPU : public BaseFullCPU
//Put typedefs from the Impl here.
typedef typename Impl::CPUPol CPUPolicy;
typedef typename Impl::Params Params;
- typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
public:
enum Status {
@@ -162,7 +178,7 @@ class FullBetaCPU : public BaseFullCPU
/** Function to add instruction onto the head of the list of the
* instructions. Used when new instructions are fetched.
*/
- void addInst(DynInst *inst);
+ void addInst(DynInstPtr &inst);
/** Function to tell the CPU that an instruction has completed. */
void instDone();
@@ -175,7 +191,7 @@ class FullBetaCPU : public BaseFullCPU
* @todo: Remove only up until that inst? Squashed inst is most likely
* valid.
*/
- void removeBackInst(DynInst *inst);
+ void removeBackInst(DynInstPtr &inst);
/** Remove an instruction from the front of the list. It is expected
* that there are no instructions in front of it (that is, none are older
@@ -184,7 +200,7 @@ class FullBetaCPU : public BaseFullCPU
* last instruction once it's verified that commit has the same ordering
* as the instruction list.
*/
- void removeFrontInst(DynInst *inst);
+ void removeFrontInst(DynInstPtr &inst);
/** Remove all instructions that are not currently in the ROB. */
void removeInstsNotInROB();
@@ -198,11 +214,11 @@ class FullBetaCPU : public BaseFullCPU
* commit can tell the instruction queue that they have completed.
* Eventually this hack should be removed.
*/
- void wakeDependents(DynInst *inst);
+ void wakeDependents(DynInstPtr &inst);
public:
/** List of all the instructions in flight. */
- list<DynInst *> instList;
+ list<DynInstPtr> instList;
//not sure these should be private.
protected:
@@ -255,15 +271,15 @@ class FullBetaCPU : public BaseFullCPU
/** Typedefs from the Impl to get the structs that each of the
* time buffers should use.
*/
- typedef typename Impl::TimeStruct TimeStruct;
+ typedef typename CPUPolicy::TimeStruct TimeStruct;
- typedef typename Impl::FetchStruct FetchStruct;
+ typedef typename CPUPolicy::FetchStruct FetchStruct;
- typedef typename Impl::DecodeStruct DecodeStruct;
+ typedef typename CPUPolicy::DecodeStruct DecodeStruct;
- typedef typename Impl::RenameStruct RenameStruct;
+ typedef typename CPUPolicy::RenameStruct RenameStruct;
- typedef typename Impl::IEWStruct IEWStruct;
+ typedef typename CPUPolicy::IEWStruct IEWStruct;
/** The main time buffer to do backwards communication. */
TimeBuffer<TimeStruct> timeBuffer;
diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh
index 52b9ccdb0..de408ef0c 100644
--- a/cpu/beta_cpu/iew.hh
+++ b/cpu/beta_cpu/iew.hh
@@ -1,13 +1,10 @@
-//Todo: Update with statuses. Create constructor. Fix up time buffer stuff.
-//Will also need a signal heading back at least one stage to rename to say
-//how many empty skid buffer entries there are. Perhaps further back even.
+//Todo: Update with statuses.
//Need to handle delaying writes to the writeback bus if it's full at the
-//given time. Squash properly. Load store queue.
+//given time. Load store queue.
#ifndef __SIMPLE_IEW_HH__
#define __SIMPLE_IEW_HH__
-// To include: time buffer, structs, queue,
#include <queue>
#include "base/timebuf.hh"
@@ -22,16 +19,18 @@ class SimpleIEW
private:
//Typedefs from Impl
typedef typename Impl::ISA ISA;
- typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::CPUPol CPUPol;
+ typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::FullCPU FullCPU;
typedef typename Impl::Params Params;
- typedef typename Impl::CPUPol::RenameMap RenameMap;
+ typedef typename CPUPol::RenameMap RenameMap;
+ typedef typename CPUPol::LDSTQ LDSTQ;
- typedef typename Impl::TimeStruct TimeStruct;
- typedef typename Impl::IEWStruct IEWStruct;
- typedef typename Impl::RenameStruct RenameStruct;
- typedef typename Impl::IssueStruct IssueStruct;
+ typedef typename CPUPol::TimeStruct TimeStruct;
+ typedef typename CPUPol::IEWStruct IEWStruct;
+ typedef typename CPUPol::RenameStruct RenameStruct;
+ typedef typename CPUPol::IssueStruct IssueStruct;
public:
enum Status {
@@ -51,7 +50,7 @@ class SimpleIEW
public:
void squash();
- void squash(DynInst *inst);
+ void squash(DynInstPtr &inst);
void block();
@@ -70,7 +69,7 @@ class SimpleIEW
void setRenameMap(RenameMap *rm_ptr);
- void wakeDependents(DynInst *inst);
+ void wakeDependents(DynInstPtr &inst);
void tick();
@@ -111,11 +110,13 @@ class SimpleIEW
//Will need internal queue to hold onto instructions coming from
//the rename stage in case of a stall.
/** Skid buffer between rename and IEW. */
- queue<RenameStruct> skidBuffer;
+ std::queue<RenameStruct> skidBuffer;
/** Instruction queue. */
IQ instQueue;
+ LDSTQ ldstQueue;
+
/** Pointer to rename map. Might not want this stage to directly
* access this though...
*/
diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh
index b198220f5..521ce77f6 100644
--- a/cpu/beta_cpu/iew_impl.hh
+++ b/cpu/beta_cpu/iew_impl.hh
@@ -3,8 +3,8 @@
// communication happens simultaneously. Might not be that bad really...
// it might skew stats a bit though. Issue would otherwise try to issue
// instructions that would never be executed if there were a delay; without
-// it issue will simply squash. Make this stage block properly. Make this
-// stage delay after a squash properly. Update the statuses for each stage.
+// it issue will simply squash. Make this stage block properly.
+// Update the statuses for each stage.
// Actually read instructions out of the skid buffer.
#include <queue>
@@ -15,8 +15,9 @@
template<class Impl, class IQ>
SimpleIEW<Impl, IQ>::SimpleIEW(Params &params)
: // Just make this time buffer really big for now
- issueToExecQueue(20, 20),
+ issueToExecQueue(5, 5),
instQueue(params),
+ ldstQueue(params),
commitToIEWDelay(params.commitToIEWDelay),
renameToIEWDelay(params.renameToIEWDelay),
issueToExecuteDelay(params.issueToExecuteDelay),
@@ -45,6 +46,7 @@ SimpleIEW<Impl, IQ>::setCPU(FullCPU *cpu_ptr)
cpu = cpu_ptr;
instQueue.setCPU(cpu_ptr);
+ ldstQueue.setCPU(cpu_ptr);
}
template<class Impl, class IQ>
@@ -96,7 +98,7 @@ SimpleIEW<Impl, IQ>::setRenameMap(RenameMap *rm_ptr)
template<class Impl, class IQ>
void
-SimpleIEW<Impl, IQ>::wakeDependents(DynInst *inst)
+SimpleIEW<Impl, IQ>::wakeDependents(DynInstPtr &inst)
{
instQueue.wakeDependents(inst);
}
@@ -150,17 +152,15 @@ SimpleIEW<Impl, IQ>::squash()
// Tell the IQ to start squashing.
instQueue.squash();
- // Tell rename to squash through the time buffer.
- // This communication may be redundant depending upon where squash()
- // is called.
-// toRename->iewInfo.squash = true;
+ // Tell the LDSTQ to start squashing.
+ ldstQueue.squash(fromCommit->commitInfo.doneSeqNum);
}
template<class Impl, class IQ>
void
-SimpleIEW<Impl, IQ>::squash(DynInst *inst)
+SimpleIEW<Impl, IQ>::squash(DynInstPtr &inst)
{
- DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC:%#x.\n",
+ DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
inst->PC);
// Perhaps leave the squashing up to the ROB stage to tell it when to
// squash?
@@ -170,8 +170,11 @@ SimpleIEW<Impl, IQ>::squash(DynInst *inst)
toRename->iewInfo.squash = true;
// Also send PC update information back to prior stages.
toRename->iewInfo.squashedSeqNum = inst->seqNum;
+ toRename->iewInfo.mispredPC = inst->readPC();
toRename->iewInfo.nextPC = inst->readCalcTarg();
- toRename->iewInfo.predIncorrect = true;
+ toRename->iewInfo.branchMispredict = true;
+ // Prediction was incorrect, so send back inverse.
+ toRename->iewInfo.branchTaken = !(inst->predTaken());
}
template<class Impl, class IQ>
@@ -229,7 +232,7 @@ SimpleIEW<Impl, IQ>::tick()
// If there's still instructions coming from rename, continue to
// put them on the skid buffer.
- if (fromRename->insts[0] != NULL) {
+ if (fromRename->insts[0]) {
block();
}
@@ -244,6 +247,19 @@ SimpleIEW<Impl, IQ>::tick()
// Write back number of free IQ entries here.
toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries();
+ // Check the committed load/store signals to see if there's a load
+ // or store to commit. Also check if it's being told to execute a
+ // nonspeculative instruction.
+ if (fromCommit->commitInfo.commitIsStore) {
+ ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum);
+ } else if (fromCommit->commitInfo.commitIsLoad) {
+ ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum);
+ }
+
+ if (fromCommit->commitInfo.nonSpecSeqNum != 0) {
+ instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum);
+ }
+
DPRINTF(IEW, "IEW: IQ has %i free entries.\n",
instQueue.numFreeEntries());
}
@@ -265,7 +281,7 @@ SimpleIEW<Impl, IQ>::iew()
}
////////////////////////////////////////
- //ISSUE stage
+ // DISPATCH/ISSUE stage
////////////////////////////////////////
//Put into its own function?
@@ -273,16 +289,16 @@ SimpleIEW<Impl, IQ>::iew()
// Check if there are any instructions coming from rename, and we're.
// not squashing.
- if (fromRename->insts[0] != NULL && _status != Squashing) {
+ if (fromRename->insts[0] && _status != Squashing) {
// Loop through the instructions, putting them in the instruction
// queue.
for (int inst_num = 0; inst_num < issueReadWidth; ++inst_num)
{
- DynInst *inst = fromRename->insts[inst_num];
+ DynInstPtr inst = fromRename->insts[inst_num];
// Make sure there's a valid instruction there.
- if (inst == NULL)
+ if (!inst)
break;
DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n",
@@ -294,25 +310,38 @@ SimpleIEW<Impl, IQ>::iew()
// Be sure to mark these instructions as ready so that the
// commit stage can go ahead and execute them, and mark
// them as issued so the IQ doesn't reprocess them.
- if (inst->isMemRef()) {
+ if (inst->isSquashed()) {
+ continue;
+ } else if (inst->isLoad()) {
DPRINTF(IEW, "IEW: Issue: Memory instruction "
- "encountered, skipping.\n");
+ "encountered, adding to LDSTQ.\n");
- inst->setIssued();
- inst->setExecuted();
+ // Reserve a spot in the load store queue for this
+ // memory access.
+ ldstQueue.insertLoad(inst);
+
+ } else if (inst->isStore()) {
+ ldstQueue.insertStore(inst);
+
+ // A bit of a hack. Set that it can commit so that
+ // the commit stage will try committing it, and then
+ // once commit realizes it's a store it will send back
+ // a signal to this stage to issue and execute that
+ // store.
inst->setCanCommit();
- instQueue.advanceTail(inst);
+ instQueue.insertNonSpec(inst);
continue;
} else if (inst->isNonSpeculative()) {
DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction "
"encountered, skipping.\n");
- inst->setIssued();
- inst->setExecuted();
+ // Same hack as with stores.
inst->setCanCommit();
- instQueue.advanceTail(inst);
+ // Specificall insert it as nonspeculative.
+ instQueue.insertNonSpec(inst);
+
continue;
} else if (inst->isNop()) {
DPRINTF(IEW, "IEW: Issue: Nop instruction encountered "
@@ -355,6 +384,7 @@ SimpleIEW<Impl, IQ>::iew()
// @todo: Move to the FU pool used in the current full cpu.
int fu_usage = 0;
+ bool fetch_redirect = false;
// Execute/writeback any instructions that are available.
for (int inst_num = 0;
@@ -365,26 +395,48 @@ SimpleIEW<Impl, IQ>::iew()
DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n");
// Get instruction from issue's queue.
- DynInst *inst = fromIssue->insts[inst_num];
+ DynInstPtr inst = fromIssue->insts[inst_num];
DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC());
- inst->setExecuted();
-
// Check if the instruction is squashed; if so then skip it
// and don't count it towards the FU usage.
if (inst->isSquashed()) {
DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n");
+
+ // Consider this instruction executed so that commit can go
+ // ahead and retire the instruction.
+ inst->setExecuted();
+
+ toCommit->insts[inst_num] = inst;
+
continue;
}
+ inst->setExecuted();
+
// If an instruction is executed, then count it towards FU usage.
++fu_usage;
// Execute instruction.
// Note that if the instruction faults, it will be handled
// at the commit stage.
- inst->execute();
+ if (inst->isMemRef()) {
+ DPRINTF(IEW, "IEW: Execute: Calculating address for memory "
+ "reference.\n");
+
+ // Tell the LDSTQ to execute this instruction (if it is a load).
+ if (inst->isLoad()) {
+ ldstQueue.executeLoad(inst);
+ } else if (inst->isStore()) {
+ ldstQueue.executeStore();
+ } else {
+ panic("IEW: Unexpected memory type!\n");
+ }
+
+ } else {
+ inst->execute();
+ }
// First check the time slot that this instruction will write
// to. If there are free write ports at the time, then go ahead
@@ -401,16 +453,34 @@ SimpleIEW<Impl, IQ>::iew()
// Check if branch was correct. This check happens after the
// instruction is added to the queue because even if the branch
// is mispredicted, the branch instruction itself is still valid.
- if (inst->mispredicted()) {
- DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n");
- DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n",
- inst->nextPC);
+ // Only handle this if there hasn't already been something that
+ // redirects fetch in this group of instructions.
+ if (!fetch_redirect) {
+ if (inst->mispredicted()) {
+ fetch_redirect = true;
+
+ DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n");
+ DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n",
+ inst->nextPC);
+
+ // If incorrect, then signal the ROB that it must be squashed.
+ squash(inst);
+ } else if (ldstQueue.violation()) {
+ fetch_redirect = true;
+
+ DynInstPtr violator = ldstQueue.getMemDepViolator();
- // If incorrect, then signal the ROB that it must be squashed.
- squash(inst);
+ DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: "
+ "%#x, inst PC: %#x. Addr is: %#x.\n",
+ violator->readPC(), inst->readPC(), inst->physEffAddr);
- // Not sure it really needs to break.
-// break;
+ instQueue.violation(inst, violator);
+
+ squash(inst);
+ // Otherwise check if there was a memory ordering violation.
+ // If there was, then signal ROB that it must be squashed. Also
+ // signal IQ that there was a violation.
+ }
}
}
@@ -422,18 +492,20 @@ SimpleIEW<Impl, IQ>::iew()
// Either have IEW have direct access to rename map, or have this as
// part of backwards communication.
for (int inst_num = 0; inst_num < executeWidth &&
- toCommit->insts[inst_num] != NULL; inst_num++)
+ toCommit->insts[inst_num]; inst_num++)
{
- DynInst *inst = toCommit->insts[inst_num];
+ DynInstPtr inst = toCommit->insts[inst_num];
DPRINTF(IEW, "IEW: Sending instructions to commit, PC %#x.\n",
inst->readPC());
- instQueue.wakeDependents(inst);
+ if(!inst->isSquashed()) {
+ instQueue.wakeDependents(inst);
- for (int i = 0; i < inst->numDestRegs(); i++)
- {
- renameMap->markAsReady(inst->renamedDestRegIdx(i));
+ for (int i = 0; i < inst->numDestRegs(); i++)
+ {
+ renameMap->markAsReady(inst->renamedDestRegIdx(i));
+ }
}
}
diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh
index 5741bfcf5..a170979cb 100644
--- a/cpu/beta_cpu/inst_queue.hh
+++ b/cpu/beta_cpu/inst_queue.hh
@@ -2,12 +2,13 @@
#define __INST_QUEUE_HH__
#include <list>
+#include <map>
#include <queue>
#include <stdint.h>
+#include <vector>
#include "base/timebuf.hh"
-
-using namespace std;
+#include "cpu/inst_seq.hh"
//Perhaps have a better separation between the data structure underlying
//and the actual algorithm.
@@ -24,48 +25,53 @@ using namespace std;
* and 96-191 are fp). This remains true even for both logical and
* physical register indices.
*/
-template<class Impl>
+template <class Impl>
class InstructionQueue
{
public:
//Typedefs from the Impl.
typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::Params Params;
- typedef typename Impl::IssueStruct IssueStruct;
- typedef typename Impl::TimeStruct TimeStruct;
+ typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
+ typedef typename Impl::CPUPol::IssueStruct IssueStruct;
+ typedef typename Impl::CPUPol::TimeStruct TimeStruct;
// Typedef of iterator through the list of instructions. Might be
// better to untie this from the FullCPU or pass its information to
// the stages.
- typedef typename list<DynInst *>::iterator ListIt;
+ typedef typename std::list<DynInstPtr>::iterator ListIt;
/**
- * Class for priority queue entries. Mainly made so that the < operator
- * is defined.
+ * Struct for comparing entries to be added to the priority queue. This
+ * gives reverse ordering to the instructions in terms of sequence
+ * numbers: the instructions with smaller sequence numbers (and hence
+ * are older) will be at the top of the priority queue.
*/
- struct ReadyEntry {
- DynInst *inst;
-
- ReadyEntry(DynInst *_inst)
- : inst(_inst)
- { }
-
- /** Compare(lhs,rhs) checks if rhs is "bigger" than lhs. If so, rhs
- * goes higher on the priority queue. The oldest instruction should
- * be on the top of the instruction queue, so in this case "bigger"
- * has the reverse meaning; the instruction with the lowest
- * sequence number is on the top.
- */
- bool operator <(const ReadyEntry &rhs) const
+ struct pqCompare
+ {
+ bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
{
- if (this->inst->seqNum > rhs.inst->seqNum)
- return true;
- return false;
+ return lhs->seqNum > rhs->seqNum;
}
};
+ /**
+ * Struct for comparing entries to be added to the set. This gives
+ * standard ordering in terms of sequence numbers.
+ */
+ struct setCompare
+ {
+ bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
+ {
+ return lhs->seqNum < rhs->seqNum;
+ }
+ };
+
+ typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare>
+ ReadyInstQueue;
+
InstructionQueue(Params &params);
void setCPU(FullCPU *cpu);
@@ -78,20 +84,32 @@ class InstructionQueue
bool isFull();
- void insert(DynInst *new_inst);
+ void insert(DynInstPtr &new_inst);
- void advanceTail(DynInst *inst);
+ void insertNonSpec(DynInstPtr &new_inst);
+
+ void advanceTail(DynInstPtr &inst);
void scheduleReadyInsts();
- void wakeDependents(DynInst *completed_inst);
+ void scheduleNonSpec(const InstSeqNum &inst);
- void doSquash();
+ void wakeDependents(DynInstPtr &completed_inst);
+
+ void violation(DynInstPtr &store, DynInstPtr &faulting_load);
void squash();
+ void doSquash();
+
void stopSquash();
+ /** Debugging function to dump all the list sizes, as well as print
+ * out the list of nonspeculative instructions. Should not be used
+ * in any other capacity, but it has no harmful sideaffects.
+ */
+ void dumpLists();
+
private:
/** Debugging function to count how many entries are in the IQ. It does
* a linear walk through the instructions, so do not call this function
@@ -103,6 +121,11 @@ class InstructionQueue
/** Pointer to the CPU. */
FullCPU *cpu;
+ /** The memory dependence unit, which tracks/predicts memory dependences
+ * between instructions.
+ */
+ MemDepUnit memDepUnit;
+
/** The queue to the execute stage. Issued instructions will be written
* into it.
*/
@@ -118,26 +141,46 @@ class InstructionQueue
Int,
Float,
Branch,
+ Memory,
+ Misc,
Squashed,
None
};
/** List of ready int instructions. Used to keep track of the order in
- * which */
- priority_queue<ReadyEntry> readyIntInsts;
+ * which instructions should issue.
+ */
+ ReadyInstQueue readyIntInsts;
/** List of ready floating point instructions. */
- priority_queue<ReadyEntry> readyFloatInsts;
+ ReadyInstQueue readyFloatInsts;
/** List of ready branch instructions. */
- priority_queue<ReadyEntry> readyBranchInsts;
+ ReadyInstQueue readyBranchInsts;
+
+ /** List of ready memory instructions. */
+ ReadyInstQueue readyMemInsts;
+
+ /** List of ready miscellaneous instructions. */
+ ReadyInstQueue readyMiscInsts;
/** List of squashed instructions (which are still valid and in IQ).
* Implemented using a priority queue; the entries must contain both
* the IQ index and sequence number of each instruction so that
* ordering based on sequence numbers can be used.
*/
- priority_queue<ReadyEntry> squashedInsts;
+ ReadyInstQueue squashedInsts;
+
+ /** List of non-speculative instructions that will be scheduled
+ * once the IQ gets a signal from commit. While it's redundant to
+ * have the key be a part of the value (the sequence number is stored
+ * inside of DynInst), when these instructions are woken up only
+ * the sequence number will be available. Thus it is necessary to be
+ * able to search by the sequence number alone.
+ */
+ std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
+
+ typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t;
/** Number of free IQ entries left. */
unsigned freeEntries;
@@ -158,6 +201,9 @@ class InstructionQueue
/** The number of branches that can be issued in one cycle. */
unsigned branchWidth;
+ /** The number of memory instructions that can be issued in one cycle. */
+ unsigned memoryWidth;
+
/** The total number of instructions that can be issued in one cycle. */
unsigned totalWidth;
@@ -183,7 +229,7 @@ class InstructionQueue
InstSeqNum squashedSeqNum;
/** Iterator that points to the oldest instruction in the IQ. */
- ListIt head;
+// ListIt head;
/** Iterator that points to the youngest instruction in the IQ. */
ListIt tail;
@@ -200,7 +246,7 @@ class InstructionQueue
class DependencyEntry
{
public:
- DynInst *inst;
+ DynInstPtr inst;
//Might want to include data about what arch. register the
//dependence is waiting on.
DependencyEntry *next;
@@ -212,9 +258,9 @@ class InstructionQueue
//away. So for now it will sit here, within the IQ, until
//a better implementation is decided upon.
// This function probably shouldn't be within the entry...
- void insert(DynInst *new_inst);
+ void insert(DynInstPtr &new_inst);
- void remove(DynInst *inst_to_remove);
+ void remove(DynInstPtr &inst_to_remove);
};
/** Array of linked lists. Each linked list is a list of all the
@@ -233,11 +279,12 @@ class InstructionQueue
*/
vector<bool> regScoreboard;
- bool addToDependents(DynInst *new_inst);
- void insertDependency(DynInst *new_inst);
- void createDependency(DynInst *new_inst);
+ bool addToDependents(DynInstPtr &new_inst);
+ void insertDependency(DynInstPtr &new_inst);
+ void createDependency(DynInstPtr &new_inst);
+ void dumpDependGraph();
- void addIfReady(DynInst *inst);
+ void addIfReady(DynInstPtr &inst);
};
#endif //__INST_QUEUE_HH__
diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh
index 6f1f06858..03e3fed33 100644
--- a/cpu/beta_cpu/inst_queue_impl.hh
+++ b/cpu/beta_cpu/inst_queue_impl.hh
@@ -1,11 +1,8 @@
#ifndef __INST_QUEUE_IMPL_HH__
#define __INST_QUEUE_IMPL_HH__
-// Todo: Fix up consistency errors about back of the ready list being
-// the oldest instructions in the queue. When woken up from the dependency
-// graph they will be the oldest, but when they are immediately executable
-// newer instructions will mistakenly get inserted onto the back. Also
-// current ordering allows for 0 cycle added-to-scheduled. Could maybe fake
+// Todo:
+// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake
// it; either do in reverse order, or have added instructions put into a
// different ready queue that, in scheduleRreadyInsts(), gets put onto the
// normal ready queue. This would however give only a one cycle delay,
@@ -21,18 +18,21 @@
// Blatant hack to avoid compile warnings.
const InstSeqNum MaxInstSeqNum = 0 - 1;
-template<class Impl>
+template <class Impl>
InstructionQueue<Impl>::InstructionQueue(Params &params)
- : numEntries(params.numIQEntries),
+ : memDepUnit(params),
+ numEntries(params.numIQEntries),
intWidth(params.executeIntWidth),
floatWidth(params.executeFloatWidth),
+ totalWidth(params.issueWidth),
numPhysIntRegs(params.numPhysIntRegs),
numPhysFloatRegs(params.numPhysFloatRegs),
commitToIEWDelay(params.commitToIEWDelay)
{
// HACK: HARDCODED NUMBER. REMOVE LATER AND ADD TO PARAMETER.
- totalWidth = 1;
branchWidth = 1;
+ memoryWidth = 1;
+
DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth);
// Initialize the number of free IQ entries.
@@ -66,7 +66,7 @@ InstructionQueue<Impl>::InstructionQueue(Params &params)
}
-template<class Impl>
+template <class Impl>
void
InstructionQueue<Impl>::setCPU(FullCPU *cpu_ptr)
{
@@ -75,7 +75,7 @@ InstructionQueue<Impl>::setCPU(FullCPU *cpu_ptr)
tail = cpu->instList.begin();
}
-template<class Impl>
+template <class Impl>
void
InstructionQueue<Impl>::setIssueToExecuteQueue(
TimeBuffer<IssueStruct> *i2e_ptr)
@@ -84,7 +84,7 @@ InstructionQueue<Impl>::setIssueToExecuteQueue(
issueToExecuteQueue = i2e_ptr;
}
-template<class Impl>
+template <class Impl>
void
InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
@@ -96,7 +96,7 @@ InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
// Might want to do something more complex if it knows how many instructions
// will be issued this cycle.
-template<class Impl>
+template <class Impl>
bool
InstructionQueue<Impl>::isFull()
{
@@ -107,16 +107,16 @@ InstructionQueue<Impl>::isFull()
}
}
-template<class Impl>
+template <class Impl>
unsigned
InstructionQueue<Impl>::numFreeEntries()
{
return freeEntries;
}
-template<class Impl>
+template <class Impl>
void
-InstructionQueue<Impl>::insert(DynInst *new_inst)
+InstructionQueue<Impl>::insert(DynInstPtr &new_inst)
{
// Make sure the instruction is valid
assert(new_inst);
@@ -157,18 +157,78 @@ InstructionQueue<Impl>::insert(DynInst *new_inst)
// register(s).
createDependency(new_inst);
+ // If it's a memory instruction, add it to the memory dependency
+ // unit.
+ if (new_inst->isMemRef()) {
+ memDepUnit.insert(new_inst);
+ }
+
// If the instruction is ready then add it to the ready list.
addIfReady(new_inst);
assert(freeEntries == (numEntries - countInsts()));
}
+template <class Impl>
+void
+InstructionQueue<Impl>::insertNonSpec(DynInstPtr &inst)
+{
+ nonSpecInsts[inst->seqNum] = inst;
+
+ // @todo: Clean up this code; can do it by setting inst as unable
+ // to issue, then calling normal insert on the inst.
+
+ // Make sure the instruction is valid
+ assert(inst);
+
+ DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n",
+ inst->readPC());
+
+ // Check if there are any free entries. Panic if there are none.
+ // Might want to have this return a fault in the future instead of
+ // panicing.
+ assert(freeEntries != 0);
+
+ // If the IQ currently has nothing in it, then there's a possibility
+ // that the tail iterator is invalid (might have been pointing at an
+ // instruction that was retired). Reset the tail iterator.
+ if (freeEntries == numEntries) {
+ tail = cpu->instList.begin();
+ }
+
+ // Move the tail iterator. Instructions may not have been issued
+ // to the IQ, so we may have to increment the iterator more than once.
+ while ((*tail) != inst) {
+ tail++;
+
+ // Make sure the tail iterator points at something legal.
+ assert(tail != cpu->instList.end());
+ }
+
+ // Decrease the number of free entries.
+ --freeEntries;
+
+ // Look through its source registers (physical regs), and mark any
+ // dependencies.
+// addToDependents(inst);
+
+ // Have this instruction set itself as the producer of its destination
+ // register(s).
+ createDependency(inst);
+
+ // If it's a memory instruction, add it to the memory dependency
+ // unit.
+ if (inst->isMemRef()) {
+ memDepUnit.insert(inst);
+ }
+}
+
// Slightly hack function to advance the tail iterator in the case that
// the IEW stage issues an instruction that is not added to the IQ. This
// is needed in case a long chain of such instructions occurs.
-template<class Impl>
+template <class Impl>
void
-InstructionQueue<Impl>::advanceTail(DynInst *inst)
+InstructionQueue<Impl>::advanceTail(DynInstPtr &inst)
{
// Make sure the instruction is valid
assert(inst);
@@ -205,10 +265,11 @@ InstructionQueue<Impl>::advanceTail(DynInst *inst)
}
// Need to make sure the number of float and integer instructions
-// issued does not exceed the total issue bandwidth. Probably should
-// have some sort of limit of total number of branches that can be issued
-// as well.
-template<class Impl>
+// issued does not exceed the total issue bandwidth.
+// @todo: Figure out a better way to remove the squashed items from the
+// lists. Checking the top item of each list to see if it's squashed
+// wastes time and forces jumps.
+template <class Impl>
void
InstructionQueue<Impl>::scheduleReadyInsts()
{
@@ -218,6 +279,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
int int_issued = 0;
int float_issued = 0;
int branch_issued = 0;
+ int memory_issued = 0;
int squashed_issued = 0;
int total_issued = 0;
@@ -226,6 +288,8 @@ InstructionQueue<Impl>::scheduleReadyInsts()
bool insts_available = !readyBranchInsts.empty() ||
!readyIntInsts.empty() ||
!readyFloatInsts.empty() ||
+ !readyMemInsts.empty() ||
+ !readyMiscInsts.empty() ||
!squashedInsts.empty();
// Note: Requires a globally defined constant.
@@ -233,10 +297,12 @@ InstructionQueue<Impl>::scheduleReadyInsts()
InstList list_with_oldest = None;
// Temporary values.
- DynInst *int_head_inst;
- DynInst *float_head_inst;
- DynInst *branch_head_inst;
- DynInst *squashed_head_inst;
+ DynInstPtr int_head_inst;
+ DynInstPtr float_head_inst;
+ DynInstPtr branch_head_inst;
+ DynInstPtr mem_head_inst;
+ DynInstPtr misc_head_inst;
+ DynInstPtr squashed_head_inst;
// Somewhat nasty code to look at all of the lists where issuable
// instructions are located, and choose the oldest instruction among
@@ -257,7 +323,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
insts_available = true;
- int_head_inst = readyIntInsts.top().inst;
+ int_head_inst = readyIntInsts.top();
if (int_head_inst->isSquashed()) {
readyIntInsts.pop();
@@ -274,7 +340,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
insts_available = true;
- float_head_inst = readyFloatInsts.top().inst;
+ float_head_inst = readyFloatInsts.top();
if (float_head_inst->isSquashed()) {
readyFloatInsts.pop();
@@ -291,7 +357,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
insts_available = true;
- branch_head_inst = readyBranchInsts.top().inst;
+ branch_head_inst = readyBranchInsts.top();
if (branch_head_inst->isSquashed()) {
readyBranchInsts.pop();
@@ -304,11 +370,44 @@ InstructionQueue<Impl>::scheduleReadyInsts()
}
+ if (!readyMemInsts.empty() &&
+ memory_issued < memoryWidth) {
+
+ insts_available = true;
+
+ mem_head_inst = readyMemInsts.top();
+
+ if (mem_head_inst->isSquashed()) {
+ readyMemInsts.pop();
+ continue;
+ } else if (mem_head_inst->seqNum < oldest_inst) {
+ oldest_inst = mem_head_inst->seqNum;
+
+ list_with_oldest = Memory;
+ }
+ }
+
+ if (!readyMiscInsts.empty()) {
+
+ insts_available = true;
+
+ misc_head_inst = readyMiscInsts.top();
+
+ if (misc_head_inst->isSquashed()) {
+ readyMiscInsts.pop();
+ continue;
+ } else if (misc_head_inst->seqNum < oldest_inst) {
+ oldest_inst = misc_head_inst->seqNum;
+
+ list_with_oldest = Misc;
+ }
+ }
+
if (!squashedInsts.empty()) {
insts_available = true;
- squashed_head_inst = squashedInsts.top().inst;
+ squashed_head_inst = squashedInsts.top();
if (squashed_head_inst->seqNum < oldest_inst) {
list_with_oldest = Squashed;
@@ -316,13 +415,14 @@ InstructionQueue<Impl>::scheduleReadyInsts()
}
- DynInst *issuing_inst = NULL;
+ DynInstPtr issuing_inst = NULL;
switch (list_with_oldest) {
case None:
DPRINTF(IQ, "IQ: Not able to schedule any instructions. Issuing "
"inst is %#x.\n", issuing_inst);
break;
+
case Int:
issuing_inst = int_head_inst;
readyIntInsts.pop();
@@ -330,6 +430,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
DPRINTF(IQ, "IQ: Issuing integer instruction PC %#x.\n",
issuing_inst->readPC());
break;
+
case Float:
issuing_inst = float_head_inst;
readyFloatInsts.pop();
@@ -337,6 +438,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
DPRINTF(IQ, "IQ: Issuing float instruction PC %#x.\n",
issuing_inst->readPC());
break;
+
case Branch:
issuing_inst = branch_head_inst;
readyBranchInsts.pop();
@@ -344,6 +446,25 @@ InstructionQueue<Impl>::scheduleReadyInsts()
DPRINTF(IQ, "IQ: Issuing branch instruction PC %#x.\n",
issuing_inst->readPC());
break;
+
+ case Memory:
+ issuing_inst = mem_head_inst;
+
+ memDepUnit.issue(mem_head_inst);
+
+ readyMemInsts.pop();
+ ++memory_issued;
+ DPRINTF(IQ, "IQ: Issuing memory instruction PC %#x.\n",
+ issuing_inst->readPC());
+ break;
+
+ case Misc:
+ issuing_inst = misc_head_inst;
+ readyMiscInsts.pop();
+ DPRINTF(IQ, "IQ: Issuing a miscellaneous instruction PC %#x.\n",
+ issuing_inst->readPC());
+ break;
+
case Squashed:
issuing_inst = squashed_head_inst;
squashedInsts.pop();
@@ -366,7 +487,52 @@ InstructionQueue<Impl>::scheduleReadyInsts()
}
}
-template<class Impl>
+template <class Impl>
+void
+InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
+{
+ non_spec_it_t inst_it = nonSpecInsts.find(inst);
+
+ assert(inst_it != nonSpecInsts.end());
+
+ // Mark this instruction as ready to issue.
+ (*inst_it).second->setCanIssue();
+
+ // Now schedule the instruction.
+ addIfReady((*inst_it).second);
+
+ nonSpecInsts.erase(inst_it);
+}
+
+template <class Impl>
+void
+InstructionQueue<Impl>::violation(DynInstPtr &store,
+ DynInstPtr &faulting_load)
+{
+ memDepUnit.violation(store, faulting_load);
+}
+
+template <class Impl>
+void
+InstructionQueue<Impl>::squash()
+{
+ DPRINTF(IQ, "IQ: Starting to squash instructions in the IQ.\n");
+
+ // Read instruction sequence number of last instruction out of the
+ // time buffer.
+ squashedSeqNum = fromCommit->commitInfo.doneSeqNum;
+
+ // Setup the squash iterator to point to the tail.
+ squashIt = tail;
+
+ // Call doSquash.
+ doSquash();
+
+ // Also tell the memory dependence unit to squash.
+ memDepUnit.squash(squashedSeqNum);
+}
+
+template <class Impl>
void
InstructionQueue<Impl>::doSquash()
{
@@ -380,64 +546,59 @@ InstructionQueue<Impl>::doSquash()
// Squash any instructions younger than the squashed sequence number
// given.
while ((*squashIt)->seqNum > squashedSeqNum) {
- DynInst *squashed_inst = (*squashIt);
+ DynInstPtr squashed_inst = (*squashIt);
// Only handle the instruction if it actually is in the IQ and
// hasn't already been squashed in the IQ.
if (!squashed_inst->isIssued() &&
!squashed_inst->isSquashedInIQ()) {
// Remove the instruction from the dependency list.
- int8_t total_src_regs = squashed_inst->numSrcRegs();
-
- for (int src_reg_idx = 0;
- src_reg_idx < total_src_regs;
- src_reg_idx++)
- {
- // Only remove it from the dependency graph if it was
- // placed there in the first place.
- // HACK: This assumes that instructions woken up from the
- // dependency chain aren't informed that a specific src
- // register has become ready. This may not always be true
- // in the future.
- if (!squashed_inst->isReadySrcRegIdx(src_reg_idx)) {
- int8_t src_reg =
+ // Hack for now: These below don't add themselves to the
+ // dependency list, so don't try to remove them.
+ if (!squashed_inst->isNonSpeculative() &&
+ !squashed_inst->isStore()) {
+ int8_t total_src_regs = squashed_inst->numSrcRegs();
+
+ for (int src_reg_idx = 0;
+ src_reg_idx < total_src_regs;
+ src_reg_idx++)
+ {
+ PhysRegIndex src_reg =
squashed_inst->renamedSrcRegIdx(src_reg_idx);
- dependGraph[src_reg].remove(squashed_inst);
+
+ // Only remove it from the dependency graph if it was
+ // placed there in the first place.
+ // HACK: This assumes that instructions woken up from the
+ // dependency chain aren't informed that a specific src
+ // register has become ready. This may not always be true
+ // in the future.
+ if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
+ src_reg < numPhysRegs) {
+ dependGraph[src_reg].remove(squashed_inst);
+ }
}
}
+ // Might want to also clear out the head of the dependency graph.
+
// Mark it as squashed within the IQ.
squashed_inst->setSquashedInIQ();
- ReadyEntry temp(squashed_inst);
-
- squashedInsts.push(temp);
+ squashedInsts.push(squashed_inst);
DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n",
squashed_inst->readPC());
}
- squashIt--;
- }
-}
-
-template<class Impl>
-void
-InstructionQueue<Impl>::squash()
-{
- DPRINTF(IQ, "IQ: Starting to squash instructions in the IQ.\n");
- // Read instruction sequence number of last instruction out of the
- // time buffer.
- squashedSeqNum = fromCommit->commitInfo.doneSeqNum;
-
- // Setup the squash iterator to point to the tail.
- squashIt = tail;
+ if (squashed_inst->isNonSpeculative() || squashed_inst->isStore()) {
+ nonSpecInsts.erase(squashed_inst->seqNum);
+ }
- // Call doSquash.
- doSquash();
+ --squashIt;
+ }
}
-template<class Impl>
+template <class Impl>
void
InstructionQueue<Impl>::stopSquash()
{
@@ -448,36 +609,9 @@ InstructionQueue<Impl>::stopSquash()
squashIt = cpu->instList.end();
}
-template<class Impl>
-int
-InstructionQueue<Impl>::countInsts()
-{
- ListIt count_it = cpu->instList.begin();
- int total_insts = 0;
-
- while (count_it != tail) {
- if (!(*count_it)->isIssued()) {
- ++total_insts;
- }
-
- count_it++;
-
- assert(count_it != cpu->instList.end());
- }
-
- // Need to count the tail iterator as well.
- if (count_it != cpu->instList.end() &&
- (*count_it) != NULL &&
- !(*count_it)->isIssued()) {
- ++total_insts;
- }
-
- return total_insts;
-}
-
-template<class Impl>
+template <class Impl>
void
-InstructionQueue<Impl>::wakeDependents(DynInst *completed_inst)
+InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
{
DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n");
//Look at the physical destination register of the DynInst
@@ -487,6 +621,13 @@ InstructionQueue<Impl>::wakeDependents(DynInst *completed_inst)
DependencyEntry *curr;
+ // Tell the memory dependence unit to wake any dependents on this
+ // instruction if it is a memory instruction.
+
+ if (completed_inst->isMemRef()) {
+ memDepUnit.wakeDependents(completed_inst);
+ }
+
for (int dest_reg_idx = 0;
dest_reg_idx < total_dest_regs;
dest_reg_idx++)
@@ -507,7 +648,7 @@ InstructionQueue<Impl>::wakeDependents(DynInst *completed_inst)
//Maybe abstract this part into a function.
//Go through the dependency chain, marking the registers as ready
//within the waiting instructions.
- while (dependGraph[dest_reg].next != NULL) {
+ while (dependGraph[dest_reg].next) {
curr = dependGraph[dest_reg].next;
@@ -537,9 +678,9 @@ InstructionQueue<Impl>::wakeDependents(DynInst *completed_inst)
}
}
-template<class Impl>
+template <class Impl>
bool
-InstructionQueue<Impl>::addToDependents(DynInst *new_inst)
+InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst)
{
// Loop through the instruction's source registers, adding
// them to the dependency list if they are not ready.
@@ -558,7 +699,9 @@ InstructionQueue<Impl>::addToDependents(DynInst *new_inst)
// hasn't become ready while the instruction was in flight
// between stages. Only if it really isn't ready should
// it be added to the dependency graph.
- if (regScoreboard[src_reg] == false) {
+ if (src_reg >= numPhysRegs) {
+ continue;
+ } else if (regScoreboard[src_reg] == false) {
DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that "
"is being added to the dependency chain.\n",
new_inst->readPC(), src_reg);
@@ -581,9 +724,9 @@ InstructionQueue<Impl>::addToDependents(DynInst *new_inst)
return return_val;
}
-template<class Impl>
+template <class Impl>
void
-InstructionQueue<Impl>::createDependency(DynInst *new_inst)
+InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst)
{
//Actually nothing really needs to be marked when an
//instruction becomes the producer of a register's value,
@@ -595,20 +738,32 @@ InstructionQueue<Impl>::createDependency(DynInst *new_inst)
dest_reg_idx < total_dest_regs;
dest_reg_idx++)
{
- int8_t dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
- dependGraph[dest_reg].inst = new_inst;
- if (dependGraph[dest_reg].next != NULL) {
- panic("Dependency chain is not empty.\n");
+ PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
+
+ // Instructions that use the misc regs will have a reg number
+ // higher than the normal physical registers. In this case these
+ // registers are not renamed, and there is no need to track
+ // dependencies as these instructions must be executed at commit.
+ if (dest_reg >= numPhysRegs) {
+ continue;
}
+ dependGraph[dest_reg].inst = new_inst;
+#if 0
+ if (dependGraph[dest_reg].next) {
+ panic("Dependency chain of dest reg %i is not empty.\n",
+ dest_reg);
+ }
+#endif
+ assert(!dependGraph[dest_reg].next);
// Mark the scoreboard to say it's not yet ready.
regScoreboard[dest_reg] = false;
}
}
-template<class Impl>
+template <class Impl>
void
-InstructionQueue<Impl>::DependencyEntry::insert(DynInst *new_inst)
+InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
{
//Add this new, dependent instruction at the head of the dependency
//chain.
@@ -623,9 +778,9 @@ InstructionQueue<Impl>::DependencyEntry::insert(DynInst *new_inst)
this->next = new_entry;
}
-template<class Impl>
+template <class Impl>
void
-InstructionQueue<Impl>::DependencyEntry::remove(DynInst *inst_to_remove)
+InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
{
DependencyEntry *prev = this;
DependencyEntry *curr = this->next;
@@ -643,6 +798,8 @@ InstructionQueue<Impl>::DependencyEntry::remove(DynInst *inst_to_remove)
{
prev = curr;
curr = curr->next;
+
+ assert(curr != NULL);
}
// Now remove this instruction from the list.
@@ -651,34 +808,140 @@ InstructionQueue<Impl>::DependencyEntry::remove(DynInst *inst_to_remove)
delete curr;
}
-template<class Impl>
+template <class Impl>
+void
+InstructionQueue<Impl>::dumpDependGraph()
+{
+ DependencyEntry *curr;
+
+ for (int i = 0; i < numPhysRegs; ++i)
+ {
+ curr = &dependGraph[i];
+
+ if (curr->inst) {
+ cprintf("dependGraph[%i]: producer: %#x consumer: ", i,
+ curr->inst->readPC());
+ } else {
+ cprintf("dependGraph[%i]: No producer. consumer: ", i);
+ }
+
+ while (curr->next != NULL) {
+ curr = curr->next;
+
+ cprintf("%#x ", curr->inst->readPC());
+ }
+
+ cprintf("\n");
+ }
+}
+
+template <class Impl>
void
-InstructionQueue<Impl>::addIfReady(DynInst *inst)
+InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
{
//If the instruction now has all of its source registers
// available, then add it to the list of ready instructions.
if (inst->readyToIssue()) {
- ReadyEntry to_add(inst);
+
//Add the instruction to the proper ready list.
- if (inst->isInteger()) {
+ if (inst->isControl()) {
+
+ DPRINTF(IQ, "IQ: Branch instruction is ready to issue, "
+ "putting it onto the ready list, PC %#x.\n",
+ inst->readPC());
+ readyBranchInsts.push(inst);
+
+ } else if (inst->isMemRef()) {
+
+ DPRINTF(IQ, "IQ: Checking if memory instruction can issue.\n");
+
+ if (memDepUnit.readyToIssue(inst)) {
+ DPRINTF(IQ, "IQ: Memory instruction is ready to issue, "
+ "putting it onto the ready list, PC %#x.\n",
+ inst->readPC());
+ readyMemInsts.push(inst);
+ }
+
+ } else if (inst->isInteger()) {
+
DPRINTF(IQ, "IQ: Integer instruction is ready to issue, "
"putting it onto the ready list, PC %#x.\n",
inst->readPC());
- readyIntInsts.push(to_add);
+ readyIntInsts.push(inst);
+
} else if (inst->isFloating()) {
+
DPRINTF(IQ, "IQ: Floating instruction is ready to issue, "
"putting it onto the ready list, PC %#x.\n",
inst->readPC());
- readyFloatInsts.push(to_add);
- } else if (inst->isControl()) {
- DPRINTF(IQ, "IQ: Branch instruction is ready to issue, "
- "putting it onto the ready list, PC %#x.\n",
- inst->readPC());
- readyBranchInsts.push(to_add);
+ readyFloatInsts.push(inst);
+
} else {
- panic("IQ: Instruction not an expected type.\n");
+ DPRINTF(IQ, "IQ: Miscellaneous instruction is ready to issue, "
+ "putting it onto the ready list, PC %#x..\n",
+ inst->readPC());
+
+ readyMiscInsts.push(inst);
}
}
}
+template <class Impl>
+int
+InstructionQueue<Impl>::countInsts()
+{
+ ListIt count_it = cpu->instList.begin();
+ int total_insts = 0;
+
+ while (count_it != tail) {
+ if (!(*count_it)->isIssued()) {
+ ++total_insts;
+ }
+
+ ++count_it;
+
+ assert(count_it != cpu->instList.end());
+ }
+
+ // Need to count the tail iterator as well.
+ if (count_it != cpu->instList.end() &&
+ (*count_it) &&
+ !(*count_it)->isIssued()) {
+ ++total_insts;
+ }
+
+ return total_insts;
+}
+
+template <class Impl>
+void
+InstructionQueue<Impl>::dumpLists()
+{
+ cprintf("Ready integer list size: %i\n", readyIntInsts.size());
+
+ cprintf("Ready float list size: %i\n", readyFloatInsts.size());
+
+ cprintf("Ready branch list size: %i\n", readyBranchInsts.size());
+
+ cprintf("Ready memory list size: %i\n", readyMemInsts.size());
+
+ cprintf("Ready misc list size: %i\n", readyMiscInsts.size());
+
+ cprintf("Squashed list size: %i\n", squashedInsts.size());
+
+ cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
+
+ non_spec_it_t non_spec_it = nonSpecInsts.begin();
+
+ cprintf("Non speculative list: ");
+
+ while (non_spec_it != nonSpecInsts.end()) {
+ cprintf("%#x ", (*non_spec_it).second->readPC());
+ ++non_spec_it;
+ }
+
+ cprintf("\n");
+
+}
+
#endif // __INST_QUEUE_IMPL_HH__
diff --git a/cpu/beta_cpu/mem_dep_unit.cc b/cpu/beta_cpu/mem_dep_unit.cc
new file mode 100644
index 000000000..3175997f6
--- /dev/null
+++ b/cpu/beta_cpu/mem_dep_unit.cc
@@ -0,0 +1,9 @@
+
+#include "cpu/beta_cpu/alpha_dyn_inst.hh"
+#include "cpu/beta_cpu/alpha_impl.hh"
+#include "cpu/beta_cpu/store_set.hh"
+#include "cpu/beta_cpu/mem_dep_unit_impl.hh"
+
+// Force instantation of memory dependency unit using store sets and
+// AlphaSimpleImpl.
+template MemDepUnit<StoreSet, AlphaSimpleImpl>;
diff --git a/cpu/beta_cpu/mem_dep_unit.hh b/cpu/beta_cpu/mem_dep_unit.hh
new file mode 100644
index 000000000..4821c63b7
--- /dev/null
+++ b/cpu/beta_cpu/mem_dep_unit.hh
@@ -0,0 +1,70 @@
+
+#ifndef __MEM_DEP_UNIT_HH__
+#define __MEM_DEP_UNIT_HH__
+
+#include <set>
+#include <map>
+
+#include "cpu/inst_seq.hh"
+
+/**
+ * Memory dependency unit class. This holds the memory dependence predictor.
+ * As memory operations are issued to the IQ, they are also issued to this
+ * unit, which then looks up the prediction as to what they are dependent
+ * upon. This unit must be checked prior to a memory operation being able
+ * to issue. Although this is templated, it's somewhat hard to make a generic
+ * memory dependence unit. This one is mostly for store sets; it will be
+ * quite limited in what other memory dependence predictions it can also
+ * utilize. Thus this class should be most likely be rewritten for other
+ * dependence prediction schemes.
+ */
+template <class MemDepPred, class Impl>
+class MemDepUnit {
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ public:
+ typedef typename std::set<InstSeqNum>::iterator sn_it_t;
+ typedef typename std::map<InstSeqNum, vector<InstSeqNum> >::iterator
+ dep_it_t;
+
+ public:
+ MemDepUnit(Params &params);
+
+ void insert(DynInstPtr &inst);
+
+ bool readyToIssue(DynInstPtr &inst);
+
+ void issue(DynInstPtr &inst);
+
+ void wakeDependents(DynInstPtr &inst);
+
+ void squash(const InstSeqNum &squashed_num);
+
+ void violation(DynInstPtr &store_inst, DynInstPtr &violating_load);
+
+ private:
+ /** List of instructions that have passed through rename, yet are still
+ * waiting on a memory dependence to resolve before they can issue.
+ */
+ std::set<InstSeqNum> renamedInsts;
+
+ /** List of instructions that have all their predicted memory dependences
+ * resolved. They are ready in terms of being free of memory
+ * dependences; however they may still have to wait on source registers.
+ */
+ std::set<InstSeqNum> readyInsts;
+
+ std::map<InstSeqNum, vector<InstSeqNum> > dependencies;
+
+ /** The memory dependence predictor. It is accessed upon new
+ * instructions being added to the IQ, and responds by telling
+ * this unit what instruction the newly added instruction is dependent
+ * upon.
+ */
+ MemDepPred depPred;
+
+};
+
+#endif
diff --git a/cpu/beta_cpu/mem_dep_unit_impl.hh b/cpu/beta_cpu/mem_dep_unit_impl.hh
new file mode 100644
index 000000000..4299acb7a
--- /dev/null
+++ b/cpu/beta_cpu/mem_dep_unit_impl.hh
@@ -0,0 +1,166 @@
+
+#include <map>
+
+#include "cpu/beta_cpu/mem_dep_unit.hh"
+
+// Hack: dependence predictor sizes are hardcoded.
+template <class MemDepPred, class Impl>
+MemDepUnit<MemDepPred, Impl>::MemDepUnit(Params &params)
+ : depPred(4028, 128)
+{
+ DPRINTF(MemDepUnit, "MemDepUnit: Creating MemDepUnit object.\n");
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
+{
+ InstSeqNum inst_seq_num = inst->seqNum;
+
+
+ InstSeqNum producing_store = depPred.checkInst(inst->readPC());
+
+ if (producing_store == 0 ||
+ dependencies.find(producing_store) == dependencies.end()) {
+ readyInsts.insert(inst_seq_num);
+ } else {
+ // If it's not already ready, then add it to the renamed
+ // list and the dependencies.
+ renamedInsts.insert(inst_seq_num);
+
+ dependencies[producing_store].push_back(inst_seq_num);
+ }
+
+ if (inst->isStore()) {
+ depPred.insertStore(inst->readPC(), inst_seq_num);
+
+ // Make sure this store isn't already in this list.
+ assert(dependencies.find(inst_seq_num) == dependencies.end());
+
+ // Put a dependency entry in at the store's sequence number.
+ // Uh, not sure how this works...I want to create an entry but
+ // I don't have anything to put into the value yet.
+ dependencies[inst_seq_num];
+ } else if (!inst->isLoad()) {
+ panic("MemDepUnit: Unknown type! (most likely a barrier).");
+ }
+}
+
+template <class MemDepPred, class Impl>
+bool
+MemDepUnit<MemDepPred, Impl>::readyToIssue(DynInstPtr &inst)
+{
+ InstSeqNum inst_seq_num = inst->seqNum;
+
+ if (readyInsts.find(inst_seq_num) == readyInsts.end()) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::issue(DynInstPtr &inst)
+{
+ assert(readyInsts.find(inst->seqNum) != readyInsts.end());
+
+ // Remove the instruction from the ready list.
+ readyInsts.erase(inst->seqNum);
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
+{
+ // Wake any dependencies.
+ dep_it_t dep_it = dependencies.find(inst);
+
+ // If there's no entry, then return. Really there should only be
+ // no entry if the instruction is a load.
+ if (dep_it == dependencies.end()) {
+ return;
+ }
+
+ assert(inst->isStore());
+
+ for(int i = 0; i < (*dep_it).second.size(); ++i ) {
+ InstSeqNum woken_inst = (*dep_it).second[i];
+
+ // Should we have reached instructions that are actually squashed,
+ // there will be no more useful instructions in this dependency
+ // list. Break out early.
+ if (renamedInsts.find(woken_inst) == renamedInsts.end()) {
+ DPRINTF(MemDepUnit, "MemDepUnit: Dependents on inst PC %#x "
+ "are squashed, starting at SN %i. Breaking early.\n",
+ inst->readPC(), woken_inst);
+ break;
+ }
+
+ // Remove it from the renamed instructions.
+ renamedInsts.erase(woken_inst);
+
+ // Add it to the ready list.
+ readyInsts.insert(woken_inst);
+ }
+
+ dependencies.erase(dep_it);
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num)
+{
+
+ if (!renamedInsts.empty()) {
+ sn_it_t renamed_it = renamedInsts.end();
+
+ --renamed_it;
+
+ // Remove entries from the renamed list as long as we haven't reached
+ // the end and the entries continue to be younger than the squashed.
+ while (!renamedInsts.empty() &&
+ (*renamed_it) > squashed_num)
+ {
+ renamedInsts.erase(renamed_it--);
+ }
+ }
+
+ if (!readyInsts.empty()) {
+ sn_it_t ready_it = readyInsts.end();
+
+ --ready_it;
+
+ // Same for the ready list.
+ while (!readyInsts.empty() &&
+ (*ready_it) > squashed_num)
+ {
+ readyInsts.erase(ready_it--);
+ }
+ }
+
+ if (!dependencies.empty()) {
+ dep_it_t dep_it = dependencies.end();
+
+ --dep_it;
+
+ // Same for the dependencies list.
+ while (!dependencies.empty() &&
+ (*dep_it).first > squashed_num)
+ {
+ dependencies.erase(dep_it--);
+ }
+ }
+
+ // Tell the dependency predictor to squash as well.
+ depPred.squash(squashed_num);
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::violation(DynInstPtr &store_inst,
+ DynInstPtr &violating_load)
+{
+ // Tell the memory dependence unit of the violation.
+ depPred.violation(violating_load->readPC(), store_inst->readPC());
+}
diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh
index 21e0ce218..aba897fdc 100644
--- a/cpu/beta_cpu/regfile.hh
+++ b/cpu/beta_cpu/regfile.hh
@@ -13,11 +13,11 @@ using namespace std;
// Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA,
// and should go in the AlphaFullCPU.
-template<class Impl>
+template <class Impl>
class PhysRegFile
{
//Note that most of the definitions of the IntReg, FloatReg, etc. exist
- //within the Impl class and not within this PhysRegFile class.
+ //within the Impl/ISA class and not within this PhysRegFile class.
//Will need some way to allow stuff like swap_palshadow to access the
//correct registers. Might require code changes to swap_palshadow and
@@ -42,6 +42,8 @@ class PhysRegFile
uint64_t readIntReg(PhysRegIndex reg_idx)
{
+ assert(reg_idx < numPhysicalIntRegs);
+
DPRINTF(IEW, "RegFile: Access to int register %i, has data "
"%i\n", int(reg_idx), intRegFile[reg_idx]);
return intRegFile[reg_idx];
@@ -52,8 +54,10 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- DPRINTF(IEW, "RegFile: Access to float register %i, has data "
- "%f\n", int(reg_idx), (float)floatRegFile[reg_idx].d);
+ assert(reg_idx < numPhysicalFloatRegs);
+
+ DPRINTF(IEW, "RegFile: Access to float register %i as single, has "
+ "data %8.8f\n", int(reg_idx), (float)floatRegFile[reg_idx].d);
return (float)floatRegFile[reg_idx].d;
}
@@ -63,8 +67,10 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- DPRINTF(IEW, "RegFile: Access to float register %i, has data "
- "%f\n", int(reg_idx), floatRegFile[reg_idx].d);
+ assert(reg_idx < numPhysicalFloatRegs);
+
+ DPRINTF(IEW, "RegFile: Access to float register %i as double, has "
+ " data %8.8f\n", int(reg_idx), floatRegFile[reg_idx].d);
return floatRegFile[reg_idx].d;
}
@@ -74,14 +80,18 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- DPRINTF(IEW, "RegFile: Access to float register %i, has data "
- "%f\n", int(reg_idx), floatRegFile[reg_idx].q);
+ assert(reg_idx < numPhysicalFloatRegs);
+
+ DPRINTF(IEW, "RegFile: Access to float register %i as int, has data "
+ "%lli\n", int(reg_idx), floatRegFile[reg_idx].q);
return floatRegFile[reg_idx].q;
}
void setIntReg(PhysRegIndex reg_idx, uint64_t val)
{
+ assert(reg_idx < numPhysicalIntRegs);
+
DPRINTF(IEW, "RegFile: Setting int register %i to %lli\n",
int(reg_idx), val);
@@ -93,7 +103,9 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- DPRINTF(IEW, "RegFile: Setting float register %i to %f\n",
+ assert(reg_idx < numPhysicalFloatRegs);
+
+ DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n",
int(reg_idx), val);
floatRegFile[reg_idx].d = (double)val;
@@ -104,7 +116,9 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- DPRINTF(IEW, "RegFile: Setting float register %i to %f\n",
+ assert(reg_idx < numPhysicalFloatRegs);
+
+ DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n",
int(reg_idx), val);
floatRegFile[reg_idx].d = val;
@@ -115,6 +129,8 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
+ assert(reg_idx < numPhysicalFloatRegs);
+
DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n",
int(reg_idx), val);
@@ -185,7 +201,7 @@ class PhysRegFile
unsigned numPhysicalFloatRegs;
};
-template<class Impl>
+template <class Impl>
PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs)
: numPhysicalIntRegs(_numPhysicalIntRegs),
@@ -203,7 +219,7 @@ PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs,
//Problem: This code doesn't make sense at the RegFile level because it
//needs things such as the itb and dtb. Either put it at the CPU level or
//the DynInst level.
-template<class Impl>
+template <class Impl>
uint64_t
PhysRegFile<Impl>::readIpr(int idx, Fault &fault)
{
@@ -319,7 +335,7 @@ PhysRegFile<Impl>::readIpr(int idx, Fault &fault)
int break_ipl = -1;
#endif
-template<class Impl>
+template <class Impl>
Fault
PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
{
diff --git a/cpu/beta_cpu/rename.hh b/cpu/beta_cpu/rename.hh
index cd66ce686..9f031012a 100644
--- a/cpu/beta_cpu/rename.hh
+++ b/cpu/beta_cpu/rename.hh
@@ -1,25 +1,14 @@
// Todo:
-// Figure out rename map for reg vs fp (probably just have one rename map).
-// In simple case, there is no renaming, so have this stage do basically
-// nothing.
-// Fix up trap and barrier handling. Fix up squashing too, as it's too
-// dependent upon the iew stage continually telling it to squash.
-// Have commit send back information whenever a branch has committed. This
-// way the history buffer can be cleared beyond the point where the branch
-// was.
+// Fix up trap and barrier handling.
+// May want to have different statuses to differentiate the different stall
+// conditions.
#ifndef __SIMPLE_RENAME_HH__
#define __SIMPLE_RENAME_HH__
-//Will want to include: time buffer, structs, free list, rename map
#include <list>
#include "base/timebuf.hh"
-#include "cpu/beta_cpu/comm.hh"
-#include "cpu/beta_cpu/rename_map.hh"
-#include "cpu/beta_cpu/free_list.hh"
-
-using namespace std;
// Will need rename maps for both the int reg file and fp reg file.
// Or change rename map class to handle both. (RegFile handles both.)
@@ -30,14 +19,14 @@ class SimpleRename
// Typedefs from the Impl.
typedef typename Impl::ISA ISA;
typedef typename Impl::CPUPol CPUPol;
- typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::FullCPU FullCPU;
typedef typename Impl::Params Params;
- typedef typename Impl::FetchStruct FetchStruct;
- typedef typename Impl::DecodeStruct DecodeStruct;
- typedef typename Impl::RenameStruct RenameStruct;
- typedef typename Impl::TimeStruct TimeStruct;
+ typedef typename CPUPol::FetchStruct FetchStruct;
+ typedef typename CPUPol::DecodeStruct DecodeStruct;
+ typedef typename CPUPol::RenameStruct RenameStruct;
+ typedef typename CPUPol::TimeStruct TimeStruct;
// Typedefs from the CPUPol
typedef typename CPUPol::FreeList FreeList;
@@ -94,6 +83,14 @@ class SimpleRename
void removeFromHistory(InstSeqNum inst_seq_num);
+ inline void renameSrcRegs(DynInstPtr &inst);
+
+ inline void renameDestRegs(DynInstPtr &inst);
+
+ inline int calcFreeROBEntries();
+
+ inline int calcFreeIQEntries();
+
/** Holds the previous information for each rename.
* Note that often times the inst may have been deleted, so only access
* the pointer for the address and do not dereference it.
@@ -123,7 +120,7 @@ class SimpleRename
bool placeHolder;
};
- list<RenameHistory> historyBuffer;
+ std::list<RenameHistory> historyBuffer;
/** CPU interface. */
FullCPU *cpu;
@@ -155,7 +152,7 @@ class SimpleRename
typename TimeBuffer<DecodeStruct>::wire fromDecode;
/** Skid buffer between rename and decode. */
- queue<DecodeStruct> skidBuffer;
+ std::queue<DecodeStruct> skidBuffer;
/** Rename map interface. */
SimpleRenameMap *renameMap;
@@ -179,6 +176,12 @@ class SimpleRename
* instructions might have freed registers in the previous cycle.
*/
unsigned commitWidth;
+
+ /** The instruction that rename is currently on. It needs to have
+ * persistent state so that when a stall occurs in the middle of a
+ * group of instructions, it can restart at the proper instruction.
+ */
+ unsigned numInst;
};
#endif // __SIMPLE_RENAME_HH__
diff --git a/cpu/beta_cpu/rename_impl.hh b/cpu/beta_cpu/rename_impl.hh
index 2b60c2f50..47464d961 100644
--- a/cpu/beta_cpu/rename_impl.hh
+++ b/cpu/beta_cpu/rename_impl.hh
@@ -2,18 +2,19 @@
#include "cpu/beta_cpu/rename.hh"
-template<class Impl>
+template <class Impl>
SimpleRename<Impl>::SimpleRename(Params &params)
: iewToRenameDelay(params.iewToRenameDelay),
decodeToRenameDelay(params.decodeToRenameDelay),
commitToRenameDelay(params.commitToRenameDelay),
renameWidth(params.renameWidth),
- commitWidth(params.commitWidth)
+ commitWidth(params.commitWidth),
+ numInst(0)
{
_status = Idle;
}
-template<class Impl>
+template <class Impl>
void
SimpleRename<Impl>::setCPU(FullCPU *cpu_ptr)
{
@@ -21,7 +22,7 @@ SimpleRename<Impl>::setCPU(FullCPU *cpu_ptr)
cpu = cpu_ptr;
}
-template<class Impl>
+template <class Impl>
void
SimpleRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
@@ -38,7 +39,7 @@ SimpleRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
toDecode = timeBuffer->getWire(0);
}
-template<class Impl>
+template <class Impl>
void
SimpleRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
{
@@ -49,7 +50,7 @@ SimpleRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
toIEW = renameQueue->getWire(0);
}
-template<class Impl>
+template <class Impl>
void
SimpleRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
{
@@ -61,7 +62,7 @@ SimpleRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
}
-template<class Impl>
+template <class Impl>
void
SimpleRename<Impl>::setRenameMap(RenameMap *rm_ptr)
{
@@ -69,7 +70,7 @@ SimpleRename<Impl>::setRenameMap(RenameMap *rm_ptr)
renameMap = rm_ptr;
}
-template<class Impl>
+template <class Impl>
void
SimpleRename<Impl>::setFreeList(FreeList *fl_ptr)
{
@@ -77,7 +78,7 @@ SimpleRename<Impl>::setFreeList(FreeList *fl_ptr)
freeList = fl_ptr;
}
-template<class Impl>
+template <class Impl>
void
SimpleRename<Impl>::dumpHistory()
{
@@ -93,7 +94,7 @@ SimpleRename<Impl>::dumpHistory()
}
}
-template<class Impl>
+template <class Impl>
void
SimpleRename<Impl>::block()
{
@@ -110,12 +111,12 @@ SimpleRename<Impl>::block()
// the previous stages are expected to check all possible stall signals.
}
-template<class Impl>
+template <class Impl>
inline void
SimpleRename<Impl>::unblock()
{
- DPRINTF(Rename, "Rename: Reading instructions out of skid "
- "buffer.\n");
+ DPRINTF(Rename, "Rename: Read instructions out of skid buffer this "
+ "cycle.\n");
// Remove the now processed instructions from the skid buffer.
skidBuffer.pop();
@@ -130,12 +131,12 @@ SimpleRename<Impl>::unblock()
}
}
-template<class Impl>
+template <class Impl>
void
SimpleRename<Impl>::doSquash()
{
typename list<RenameHistory>::iterator hb_it = historyBuffer.begin();
- typename list<RenameHistory>::iterator delete_it;
+// typename list<RenameHistory>::iterator delete_it;
InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum;
@@ -166,15 +167,17 @@ SimpleRename<Impl>::doSquash()
freeList->addReg(hb_it->newPhysReg);
}
- delete_it = hb_it;
+// delete_it = hb_it;
+
+// hb_it++;
- hb_it++;
+ historyBuffer.erase(hb_it++);
- historyBuffer.erase(delete_it);
+ assert(hb_it != historyBuffer.end());
}
}
-template<class Impl>
+template <class Impl>
void
SimpleRename<Impl>::squash()
{
@@ -182,6 +185,8 @@ SimpleRename<Impl>::squash()
// Set the status to Squashing.
_status = Squashing;
+ numInst = 0;
+
// Clear the skid buffer in case it has any data in it.
while (!skidBuffer.empty())
{
@@ -199,10 +204,10 @@ void
SimpleRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num)
{
DPRINTF(Rename, "Rename: Removing a committed instruction from the "
- "history buffer, sequence number %lli.\n", inst_seq_num);
+ "history buffer, until sequence number %lli.\n", inst_seq_num);
typename list<RenameHistory>::iterator hb_it = historyBuffer.end();
- hb_it--;
+ --hb_it;
if (hb_it->instSeqNum > inst_seq_num) {
DPRINTF(Rename, "Rename: Old sequence number encountered. Ensure "
@@ -210,7 +215,7 @@ SimpleRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num)
return;
}
- for ( ; hb_it->instSeqNum != inst_seq_num; hb_it--)
+ while ((*hb_it).instSeqNum != inst_seq_num)
{
// Make sure we haven't gone off the end of the list.
assert(hb_it != historyBuffer.end());
@@ -222,10 +227,19 @@ SimpleRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num)
// be the last instruction in the list, as it is the instruction
// that was just committed that is being removed.
assert(hb_it->instSeqNum < inst_seq_num);
- DPRINTF(Rename, "Rename: Committed instruction is not the last "
- "entry in the history buffer.\n");
+ DPRINTF(Rename, "Rename: Freeing up older rename of reg %i, sequence"
+ " number %i.\n",
+ (*hb_it).prevPhysReg, (*hb_it).instSeqNum);
+
+ if (!(*hb_it).placeHolder) {
+ freeList->addReg((*hb_it).prevPhysReg);
+ }
+
+ historyBuffer.erase(hb_it--);
}
+ // Finally free up the previous register of the squashed instruction
+ // itself.
if (!(*hb_it).placeHolder) {
freeList->addReg(hb_it->prevPhysReg);
}
@@ -234,6 +248,113 @@ SimpleRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num)
}
+template <class Impl>
+inline void
+SimpleRename<Impl>::renameSrcRegs(DynInstPtr &inst)
+{
+ unsigned num_src_regs = inst->numSrcRegs();
+
+ // Get the architectual register numbers from the source and
+ // destination operands, and redirect them to the right register.
+ // Will need to mark dependencies though.
+ for (int src_idx = 0; src_idx < num_src_regs; src_idx++)
+ {
+ RegIndex src_reg = inst->srcRegIdx(src_idx);
+
+ // Look up the source registers to get the phys. register they've
+ // been renamed to, and set the sources to those registers.
+ RegIndex renamed_reg = renameMap->lookup(src_reg);
+
+ DPRINTF(Rename, "Rename: Looking up arch reg %i, got "
+ "physical reg %i.\n", (int)src_reg, (int)renamed_reg);
+
+ inst->renameSrcReg(src_idx, renamed_reg);
+
+ // Either incorporate it into the info passed back,
+ // or make another function call to see if that register is
+ // ready or not.
+ if (renameMap->isReady(renamed_reg)) {
+ DPRINTF(Rename, "Rename: Register is ready.\n");
+
+ inst->markSrcRegReady(src_idx);
+ }
+ }
+}
+
+template <class Impl>
+inline void
+SimpleRename<Impl>::renameDestRegs(DynInstPtr &inst)
+{
+ typename SimpleRenameMap::RenameInfo rename_result;
+
+ unsigned num_dest_regs = inst->numDestRegs();
+
+ // Rename the destination registers.
+ for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++)
+ {
+ RegIndex dest_reg = inst->destRegIdx(dest_idx);
+
+ // Get the physical register that the destination will be
+ // renamed to.
+ rename_result = renameMap->rename(dest_reg);
+
+ DPRINTF(Rename, "Rename: Renaming arch reg %i to physical "
+ "reg %i.\n", (int)dest_reg,
+ (int)rename_result.first);
+
+ // Record the rename information so that a history can be kept.
+ RenameHistory hb_entry(inst->seqNum, dest_reg,
+ rename_result.first,
+ rename_result.second);
+
+ historyBuffer.push_front(hb_entry);
+
+ DPRINTF(Rename, "Rename: Adding instruction to history buffer, "
+ "sequence number %lli.\n",
+ (*historyBuffer.begin()).instSeqNum);
+
+ // Tell the instruction to rename the appropriate destination
+ // register (dest_idx) to the new physical register
+ // (rename_result.first), and record the previous physical
+ // register that the same logical register was renamed to
+ // (rename_result.second).
+ inst->renameDestReg(dest_idx,
+ rename_result.first,
+ rename_result.second);
+ }
+
+ // If it's an instruction with no destination registers, then put
+ // a placeholder within the history buffer. It might be better
+ // to not put it in the history buffer at all (other than branches,
+ // which always need at least a place holder), and differentiate
+ // between instructions with and without destination registers
+ // when getting from commit the instructions that committed.
+ if (num_dest_regs == 0) {
+ RenameHistory hb_entry(inst->seqNum);
+
+ historyBuffer.push_front(hb_entry);
+
+ DPRINTF(Rename, "Rename: Adding placeholder instruction to "
+ "history buffer, sequence number %lli.\n",
+ inst->seqNum);
+ }
+}
+
+template <class Impl>
+inline int
+SimpleRename<Impl>::calcFreeROBEntries()
+{
+ return fromCommit->commitInfo.freeROBEntries -
+ renameWidth * iewToRenameDelay;
+}
+
+template <class Impl>
+inline int
+SimpleRename<Impl>::calcFreeIQEntries()
+{
+ return fromIEW->iewInfo.freeIQEntries - renameWidth * iewToRenameDelay;
+}
+
template<class Impl>
void
SimpleRename<Impl>::tick()
@@ -258,12 +379,18 @@ SimpleRename<Impl>::tick()
// buffer were used. Remove those instructions and handle
// the rest of unblocking.
if (_status == Unblocking) {
+ if (fromDecode->size > 0) {
+ // Add the current inputs onto the skid buffer, so they can be
+ // reprocessed when this stage unblocks.
+ skidBuffer.push(*fromDecode);
+ }
+
unblock();
}
} else if (_status == Blocked) {
// If stage is blocked and still receiving valid instructions,
// make sure to store them in the skid buffer.
- if (fromDecode->insts[0] != NULL) {
+ if (fromDecode->size > 0) {
block();
@@ -273,8 +400,9 @@ SimpleRename<Impl>::tick()
if (!fromIEW->iewInfo.stall &&
!fromCommit->commitInfo.stall &&
- fromCommit->commitInfo.freeROBEntries != 0 &&
- fromIEW->iewInfo.freeIQEntries != 0) {
+ calcFreeROBEntries() > 0 &&
+ calcFreeIQEntries() > 0 &&
+ renameMap->numFreeEntries() > 0) {
// Need to be sure to check all blocking conditions above.
// If they have cleared, then start unblocking.
@@ -344,6 +472,7 @@ SimpleRename<Impl>::rename()
// the rename map and the free list.
if (fromCommit->commitInfo.squash ||
fromCommit->commitInfo.robSquashing) {
+ DPRINTF(Rename, "Rename: Receiving signal from Commit to squash.\n");
squash();
return;
}
@@ -368,37 +497,38 @@ SimpleRename<Impl>::rename()
// Check the decode queue to see if instructions are available.
// If there are no available instructions to rename, then do nothing.
// Or, if the stage is currently unblocking, then go ahead and run it.
- if (fromDecode->insts[0] == NULL && _status != Unblocking) {
+ if (fromDecode->size == 0 && _status != Unblocking) {
DPRINTF(Rename, "Rename: Nothing to do, breaking out early.\n");
// Should I change status to idle?
return;
}
- DynInst *inst;
- unsigned num_inst = 0;
+ ////////////////////////////////////
+ // Actual rename part.
+ ////////////////////////////////////
- bool insts_available = _status == Unblocking ?
- skidBuffer.front().insts[num_inst] != NULL :
- fromDecode->insts[num_inst] != NULL;
+ DynInstPtr inst;
- typename SimpleRenameMap::RenameInfo rename_result;
+ // If we're unblocking, then we may be in the middle of an instruction
+ // group. Subtract off numInst to get the proper number of instructions
+ // left.
+ int insts_available = _status == Unblocking ?
+ skidBuffer.front().size - numInst :
+ fromDecode->size;
- unsigned num_src_regs;
- unsigned num_dest_regs;
+ bool block_this_cycle = false;
// Will have to do a different calculation for the number of free
// entries. Number of free entries recorded on this cycle -
// renameWidth * renameToDecodeDelay
- // Can I avoid a multiply?
- unsigned free_rob_entries =
- fromCommit->commitInfo.freeROBEntries - iewToRenameDelay;
- DPRINTF(Rename, "Rename: ROB has %d free entries.\n",
- free_rob_entries);
- unsigned free_iq_entries =
- fromIEW->iewInfo.freeIQEntries - iewToRenameDelay;
+ int free_rob_entries = calcFreeROBEntries();
+ int free_iq_entries = calcFreeIQEntries();
+ int min_iq_rob = min(free_rob_entries, free_iq_entries);
+
+ unsigned to_iew_index = 0;
// Check if there's any space left.
- if (free_rob_entries == 0 || free_iq_entries == 0) {
+ if (min_iq_rob <= 0) {
DPRINTF(Rename, "Rename: Blocking due to no free ROB or IQ "
"entries.\n"
"Rename: ROB has %d free entries.\n"
@@ -410,22 +540,40 @@ SimpleRename<Impl>::rename()
toDecode->renameInfo.stall = true;
return;
- }
+ } else if (min_iq_rob < insts_available) {
+ DPRINTF(Rename, "Rename: Will have to block this cycle. Only "
+ "%i insts can be renamed due to IQ/ROB limits.\n",
+ min_iq_rob);
+
+ insts_available = min_iq_rob;
- unsigned min_iq_rob = min(free_rob_entries, free_iq_entries);
- unsigned num_insts_to_rename = min(min_iq_rob, renameWidth);
+ block_this_cycle = true;
+ }
- while (insts_available &&
- num_inst < num_insts_to_rename) {
+ while (insts_available > 0) {
DPRINTF(Rename, "Rename: Sending instructions to iew.\n");
// Get the next instruction either from the skid buffer or the
// decode queue.
- inst = _status == Unblocking ? skidBuffer.front().insts[num_inst] :
- fromDecode->insts[num_inst];
+ inst = _status == Unblocking ? skidBuffer.front().insts[numInst] :
+ fromDecode->insts[numInst];
+
+ if (inst->isSquashed()) {
+ DPRINTF(Rename, "Rename: instruction %i with PC %#x is "
+ "squashed, skipping.\n",
+ inst->seqNum, inst->readPC());
+
+ // Go to the next instruction.
+ ++numInst;
+
+ // Decrement how many instructions are available.
+ --insts_available;
+
+ continue;
+ }
DPRINTF(Rename, "Rename: Processing instruction %i with PC %#x.\n",
- inst, inst->readPC());
+ inst->seqNum, inst->readPC());
// If it's a trap instruction, then it needs to wait here within
// rename until the ROB is empty. Needs a way to detect that the
@@ -438,156 +586,59 @@ SimpleRename<Impl>::rename()
panic("Rename: Serializing instruction encountered.\n");
DPRINTF(Rename, "Rename: Serializing instruction "
"encountered.\n");
- block();
// Change status over to BarrierStall so that other stages know
// what this is blocked on.
_status = BarrierStall;
- // Tell the previous stage to stall.
- toDecode->renameInfo.stall = true;
-
- break;
- }
-
- // Make sure there's enough room in the ROB and the IQ.
- // This doesn't really need to be done dynamically; consider
- // moving outside of this function.
- if (free_rob_entries == 0 || free_iq_entries == 0) {
- DPRINTF(Rename, "Rename: Blocking due to lack of ROB or IQ "
- "entries.\n");
- // Call some sort of function to handle all the setup of being
- // blocked.
- block();
-
- // Not really sure how to schedule an event properly, but an
- // event must be scheduled such that upon freeing a ROB entry,
- // this stage will restart up. Perhaps add in a ptr to an Event
- // within the ROB that will be able to execute that Event
- // if a free register is added to the freelist.
-
- // Tell the previous stage to stall.
- toDecode->renameInfo.stall = true;
+ block_this_cycle = true;
break;
}
- // Temporary variables to hold number of source and destination regs.
- num_src_regs = inst->numSrcRegs();
- num_dest_regs = inst->numDestRegs();
-
// Check here to make sure there are enough destination registers
// to rename to. Otherwise block.
- if (renameMap->numFreeEntries() < num_dest_regs)
+ if (renameMap->numFreeEntries() < inst->numDestRegs())
{
DPRINTF(Rename, "Rename: Blocking due to lack of free "
"physical registers to rename to.\n");
- // Call function to handle blocking.
- block();
-
// Need some sort of event based on a register being freed.
- // Tell the previous stage to stall.
- toDecode->renameInfo.stall = true;
+ block_this_cycle = true;
- // Break out of rename loop.
break;
}
- // Get the architectual register numbers from the source and
- // destination operands, and redirect them to the right register.
- // Will need to mark dependencies though.
- for (int src_idx = 0; src_idx < num_src_regs; src_idx++)
- {
- RegIndex src_reg = inst->srcRegIdx(src_idx);
-
- // Look up the source registers to get the phys. register they've
- // been renamed to, and set the sources to those registers.
- RegIndex renamed_reg = renameMap->lookup(src_reg);
-
- DPRINTF(Rename, "Rename: Looking up arch reg %i, got "
- "physical reg %i.\n", (int)src_reg, (int)renamed_reg);
-
- inst->renameSrcReg(src_idx, renamed_reg);
-
- // Either incorporate it into the info passed back,
- // or make another function call to see if that register is
- // ready or not.
- if (renameMap->isReady(renamed_reg)) {
- DPRINTF(Rename, "Rename: Register is ready.\n");
-
- inst->markSrcRegReady(src_idx);
- }
- }
-
- // Rename the destination registers.
- for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++)
- {
- RegIndex dest_reg = inst->destRegIdx(dest_idx);
-
- // Get the physical register that the destination will be
- // renamed to.
- rename_result = renameMap->rename(dest_reg);
-
- DPRINTF(Rename, "Rename: Renaming arch reg %i to physical "
- "register %i.\n", (int)dest_reg,
- (int)rename_result.first);
-
- // Record the rename information so that a history can be kept.
- RenameHistory hb_entry(inst->seqNum, dest_reg,
- rename_result.first,
- rename_result.second);
-
- historyBuffer.push_front(hb_entry);
-
- DPRINTF(Rename, "Rename: Adding instruction to history buffer, "
- "sequence number %lli.\n", inst->seqNum);
-
- // Tell the instruction to rename the appropriate destination
- // register (dest_idx) to the new physical register
- // (rename_result.first), and record the previous physical
- // register that the same logical register was renamed to
- // (rename_result.second).
- inst->renameDestReg(dest_idx,
- rename_result.first,
- rename_result.second);
- }
-
- // If it's an instruction with no destination registers, then put
- // a placeholder within the history buffer. It might be better
- // to not put it in the history buffer at all (other than branches,
- // which always need at least a place holder), and differentiate
- // between instructions with and without destination registers
- // when getting from commit the instructions that committed.
- if (num_dest_regs == 0) {
- RenameHistory hb_entry(inst->seqNum);
-
- historyBuffer.push_front(hb_entry);
+ renameSrcRegs(inst);
- DPRINTF(Rename, "Rename: Adding placeholder instruction to "
- "history buffer, sequence number %lli.\n",
- inst->seqNum);
- }
+ renameDestRegs(inst);
// Put instruction in rename queue.
- toIEW->insts[num_inst] = inst;
+ toIEW->insts[to_iew_index] = inst;
+ ++(toIEW->size);
// Decrease the number of free ROB and IQ entries.
--free_rob_entries;
--free_iq_entries;
// Increment which instruction we're on.
- ++num_inst;
-
- // Check whether or not there are instructions available.
- // Either need to check within the skid buffer, or the decode
- // queue, depending if this stage is unblocking or not.
- // Hmm, dangerous check. Can touch memory not allocated. Might
- // be better to just do check at beginning of loop. Or better
- // yet actually pass the number of instructions issued.
- insts_available = _status == Unblocking ?
- skidBuffer.front().insts[num_inst] != NULL :
- fromDecode->insts[num_inst] != NULL;
+ ++to_iew_index;
+ ++numInst;
+
+ // Decrement how many instructions are available.
+ --insts_available;
}
+ // Check if there's any instructions left that haven't yet been renamed.
+ // If so then block.
+ if (block_this_cycle) {
+ block();
+
+ toDecode->renameInfo.stall = true;
+ } else {
+ // If we had a successful rename and didn't have to exit early, then
+ // reset numInst so it will refer to the correct instruction on next
+ // run.
+ numInst = 0;
+ }
}
diff --git a/cpu/beta_cpu/rename_map.cc b/cpu/beta_cpu/rename_map.cc
index c234182f0..cb9720d28 100644
--- a/cpu/beta_cpu/rename_map.cc
+++ b/cpu/beta_cpu/rename_map.cc
@@ -3,12 +3,10 @@
// Todo: Consider making functions inline. Avoid having things that are
// using the zero register or misc registers from adding on the registers
-// to the free list.
-
-SimpleRenameMap::RenameEntry::RenameEntry()
- : physical_reg(0), valid(false)
-{
-}
+// to the free list. Possibly remove the direct communication between
+// this and the freelist. Considering making inline bool functions that
+// determine if the register is a logical int, logical fp, physical int,
+// physical fp, etc.
SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs,
unsigned _numPhysicalIntRegs,
@@ -35,11 +33,12 @@ SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs,
//Create the rename maps, and their scoreboards.
intRenameMap = new RenameEntry[numLogicalIntRegs];
- floatRenameMap = new RenameEntry[numLogicalFloatRegs];
+ floatRenameMap = new RenameEntry[numLogicalRegs];
+ // Should combine this into one scoreboard.
intScoreboard.resize(numPhysicalIntRegs);
- floatScoreboard.resize(numPhysicalFloatRegs);
- miscScoreboard.resize(numMiscRegs);
+ floatScoreboard.resize(numPhysicalRegs);
+ miscScoreboard.resize(numPhysicalRegs + numMiscRegs);
// Initialize the entries in the integer rename map to point to the
// physical registers of the same index, and consider each register
@@ -59,31 +58,50 @@ SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs,
intScoreboard[index] = 0;
}
+ int float_reg_idx = numPhysicalIntRegs;
+
// Initialize the entries in the floating point rename map to point to
// the physical registers of the same index, and consider each register
// ready until the first rename occurs.
- for (RegIndex index = 0; index < numLogicalFloatRegs; ++index)
+ // Although the index refers purely to architected registers, because
+ // the floating reg indices come after the integer reg indices, they
+ // may exceed the size of a normal RegIndex (short).
+ for (PhysRegIndex index = numLogicalIntRegs;
+ index < numLogicalRegs; ++index)
+ {
+ floatRenameMap[index].physical_reg = float_reg_idx++;
+ }
+
+ for (RegIndex index = numPhysicalIntRegs;
+ index < numPhysicalIntRegs + numLogicalFloatRegs; ++index)
{
- floatRenameMap[index].physical_reg = index + numPhysicalIntRegs;
floatScoreboard[index] = 1;
}
// Initialize the rest of the physical registers (the ones that don't
// directly map to a logical register) as unready.
- for (PhysRegIndex index = numLogicalFloatRegs;
- index < numPhysicalFloatRegs;
+ for (PhysRegIndex index = numPhysicalIntRegs + numLogicalFloatRegs;
+ index < numPhysicalRegs;
++index)
{
floatScoreboard[index] = 0;
}
// Initialize the entries in the misc register scoreboard to be ready.
- for (RegIndex index = 0; index < numMiscRegs; ++index)
+ for (RegIndex index = numPhysicalRegs;
+ index < numPhysicalRegs + numMiscRegs; ++index)
{
miscScoreboard[index] = 1;
}
}
+SimpleRenameMap::~SimpleRenameMap()
+{
+ // Delete the rename maps as they were allocated with new.
+ delete [] intRenameMap;
+ delete [] floatRenameMap;
+}
+
void
SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr)
{
@@ -116,6 +134,8 @@ SimpleRenameMap::rename(RegIndex arch_reg)
// Update the integer rename map.
intRenameMap[arch_reg].physical_reg = renamed_reg;
+ assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs);
+
// Mark register as not ready.
intScoreboard[renamed_reg] = false;
} else {
@@ -124,7 +144,7 @@ SimpleRenameMap::rename(RegIndex arch_reg)
}
} else if (arch_reg < numLogicalRegs) {
// Subtract off the base offset for floating point registers.
- arch_reg = arch_reg - numLogicalIntRegs;
+// arch_reg = arch_reg - numLogicalIntRegs;
// Record the current physical register that is renamed to the
// requested architected register.
@@ -139,6 +159,9 @@ SimpleRenameMap::rename(RegIndex arch_reg)
// Update the floating point rename map.
floatRenameMap[arch_reg].physical_reg = renamed_reg;
+ assert(renamed_reg < numPhysicalRegs &&
+ renamed_reg >= numPhysicalIntRegs);
+
// Mark register as not ready.
floatScoreboard[renamed_reg] = false;
} else {
@@ -160,6 +183,8 @@ SimpleRenameMap::rename(RegIndex arch_reg)
// so the free list can avoid adding it.
prev_reg = renamed_reg;
+ assert(renamed_reg < numPhysicalRegs + numMiscRegs);
+
miscScoreboard[renamed_reg] = false;
}
@@ -175,7 +200,7 @@ SimpleRenameMap::lookup(RegIndex arch_reg)
return intRenameMap[arch_reg].physical_reg;
} else if (arch_reg < numLogicalRegs) {
// Subtract off the base FP offset.
- arch_reg = arch_reg - numLogicalIntRegs;
+// arch_reg = arch_reg - numLogicalIntRegs;
return floatRenameMap[arch_reg].physical_reg;
} else {
@@ -196,12 +221,12 @@ SimpleRenameMap::isReady(PhysRegIndex phys_reg)
} else if (phys_reg < numPhysicalRegs) {
// Subtract off the base FP offset.
- phys_reg = phys_reg - numPhysicalIntRegs;
+// phys_reg = phys_reg - numPhysicalIntRegs;
return floatScoreboard[phys_reg];
} else {
// Subtract off the misc registers offset.
- phys_reg = phys_reg - numPhysicalRegs;
+// phys_reg = phys_reg - numPhysicalRegs;
return miscScoreboard[phys_reg];
}
@@ -218,13 +243,10 @@ SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg)
intRenameMap[arch_reg].physical_reg = renamed_reg;
} else {
-// assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs));
-
- // Subtract off the base FP offset.
- arch_reg = arch_reg - numLogicalIntRegs;
+ assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs));
DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n",
- (int)arch_reg, renamed_reg);
+ (int)arch_reg - numLogicalIntRegs, renamed_reg);
floatRenameMap[arch_reg].physical_reg = renamed_reg;
}
@@ -234,6 +256,8 @@ void
SimpleRenameMap::squash(vector<RegIndex> freed_regs,
vector<UnmapInfo> unmaps)
{
+ panic("Not sure this function should be called.");
+
// Not sure the rename map should be able to access the free list
// like this.
while (!freed_regs.empty()) {
@@ -260,16 +284,18 @@ SimpleRenameMap::markAsReady(PhysRegIndex ready_reg)
(int)ready_reg);
if (ready_reg < numPhysicalIntRegs) {
+ assert(ready_reg >= 0);
+
intScoreboard[ready_reg] = 1;
} else if (ready_reg < numPhysicalRegs) {
// Subtract off the base FP offset.
- ready_reg = ready_reg - numPhysicalIntRegs;
+// ready_reg = ready_reg - numPhysicalIntRegs;
floatScoreboard[ready_reg] = 1;
} else {
//Subtract off the misc registers offset.
- ready_reg = ready_reg - numPhysicalRegs;
+// ready_reg = ready_reg - numPhysicalRegs;
miscScoreboard[ready_reg] = 1;
}
diff --git a/cpu/beta_cpu/rename_map.hh b/cpu/beta_cpu/rename_map.hh
index 05b52bfb2..e68fa05a8 100644
--- a/cpu/beta_cpu/rename_map.hh
+++ b/cpu/beta_cpu/rename_map.hh
@@ -1,6 +1,5 @@
// Todo: Create destructor.
-// Make it so that there's a proper separation between int and fp. Also
-// have it so that there's a more meaningful name given to the variable
+// Have it so that there's a more meaningful name given to the variable
// that marks the beginning of the FP registers.
#ifndef __RENAME_MAP_HH__
@@ -10,7 +9,6 @@
#include <vector>
#include <utility>
-//Will want to include faults
#include "cpu/beta_cpu/free_list.hh"
using namespace std;
@@ -18,8 +16,6 @@ using namespace std;
class SimpleRenameMap
{
public:
-// typedef typename Impl::RegIndex RegIndex;
-
/**
* Pair of a logical register and a physical register. Tells the
* previous mapping of a logical register to a physical register.
@@ -45,6 +41,9 @@ class SimpleRenameMap
RegIndex _intZeroReg,
RegIndex _floatZeroReg);
+ /** Destructor. */
+ ~SimpleRenameMap();
+
void setFreeList(SimpleFreeList *fl_ptr);
//Tell rename map to get a free physical register for a given
@@ -110,7 +109,9 @@ class SimpleRenameMap
PhysRegIndex physical_reg;
bool valid;
- RenameEntry();
+ RenameEntry()
+ : physical_reg(0), valid(false)
+ { }
};
/** Integer rename map. */
@@ -122,6 +123,8 @@ class SimpleRenameMap
/** Free list interface. */
SimpleFreeList *freeList;
+ // Might want to make all these scoreboards into one large scoreboard.
+
/** Scoreboard of physical integer registers, saying whether or not they
* are ready.
*/
diff --git a/cpu/beta_cpu/rob.hh b/cpu/beta_cpu/rob.hh
index 7963d1b01..c921c0619 100644
--- a/cpu/beta_cpu/rob.hh
+++ b/cpu/beta_cpu/rob.hh
@@ -16,24 +16,20 @@ using namespace std;
/**
* ROB class. Uses the instruction list that exists within the CPU to
- * represent the ROB. This class doesn't contain that structure, but instead
- * a pointer to the CPU to get access to the structure. The ROB has a large
- * hand in squashing instructions within the CPU, and is responsible for
- * sending out the squash signal as well as what instruction is to be
- * squashed. The ROB also controls most of the calls to the CPU to delete
- * instructions; the only other call is made in the first stage of the pipe-
- * line, which tells the CPU to delete all instructions not in the ROB.
+ * represent the ROB. This class doesn't contain that list, but instead
+ * a pointer to the CPU to get access to the list. The ROB, in this first
+ * implementation, is largely what drives squashing.
*/
-template<class Impl>
+template <class Impl>
class ROB
{
public:
//Typedefs from the Impl.
typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
- typedef pair<RegIndex, PhysRegIndex> UnmapInfo;
- typedef typename list<DynInst *>::iterator InstIt;
+ typedef pair<RegIndex, PhysRegIndex> UnmapInfo_t;
+ typedef typename list<DynInstPtr>::iterator InstIt_t;
public:
/** ROB constructor.
@@ -56,15 +52,15 @@ class ROB
* @params inst The instruction being inserted into the ROB.
* @todo Remove the parameter once correctness is ensured.
*/
- void insertInst(DynInst *inst);
+ void insertInst(DynInstPtr &inst);
/** Returns pointer to the head instruction within the ROB. There is
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the head of the ROB.
*/
- DynInst *readHeadInst() { return cpu->instList.front(); }
+ DynInstPtr readHeadInst() { return cpu->instList.front(); }
- DynInst *readTailInst() { return (*tail); }
+ DynInstPtr readTailInst() { return (*tail); }
void retireHead();
@@ -108,15 +104,28 @@ class ROB
/** Pointer to the CPU. */
FullCPU *cpu;
+ /** Number of instructions in the ROB. */
unsigned numEntries;
/** Number of instructions that can be squashed in a single cycle. */
unsigned squashWidth;
- InstIt tail;
-
- InstIt squashIt;
+ /** Iterator pointing to the instruction which is the last instruction
+ * in the ROB. This may at times be invalid (ie when the ROB is empty),
+ * however it should never be incorrect.
+ */
+ InstIt_t tail;
+
+ /** Iterator used for walking through the list of instructions when
+ * squashing. Used so that there is persistent state between cycles;
+ * when squashing, the instructions are marked as squashed but not
+ * immediately removed, meaning the tail iterator remains the same before
+ * and after a squash.
+ * This will always be set to cpu->instList.end() if it is invalid.
+ */
+ InstIt_t squashIt;
+ /** Number of instructions in the ROB. */
int numInstsInROB;
/** The sequence number of the squashed instruction. */
diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh
index 308a8010f..862008429 100644
--- a/cpu/beta_cpu/rob_impl.hh
+++ b/cpu/beta_cpu/rob_impl.hh
@@ -3,7 +3,7 @@
#include "cpu/beta_cpu/rob.hh"
-template<class Impl>
+template <class Impl>
ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth)
: numEntries(_numEntries),
squashWidth(_squashWidth),
@@ -13,43 +13,60 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth)
doneSquashing = true;
}
-template<class Impl>
+template <class Impl>
void
ROB<Impl>::setCPU(FullCPU *cpu_ptr)
{
cpu = cpu_ptr;
+ // Set the tail to the beginning of the CPU instruction list so that
+ // upon the first instruction being inserted into the ROB, the tail
+ // iterator can simply be incremented.
tail = cpu->instList.begin();
+ // Set the squash iterator to the end of the instruction list.
squashIt = cpu->instList.end();
}
-template<class Impl>
+template <class Impl>
int
ROB<Impl>::countInsts()
{
-/*
- int return_val = 0;
+ // Start at 1; if the tail matches cpu->instList.begin(), then there is
+ // one inst in the ROB.
+ int return_val = 1;
+
+ // There are quite a few special cases. Do not use this function other
+ // than for debugging purposes.
+ if (cpu->instList.begin() == cpu->instList.end()) {
+ // In this case there are no instructions in the list. The ROB
+ // must be empty.
+ return 0;
+ } else if (tail == cpu->instList.end()) {
+ // In this case, the tail is not yet pointing to anything valid.
+ // The ROB must be empty.
+ return 0;
+ }
// Iterate through the ROB from the head to the tail, counting the
// entries.
- for (InstIt i = cpu->instList.begin(); i != tail; i++)
+ for (InstIt_t i = cpu->instList.begin(); i != tail; ++i)
{
assert(i != cpu->instList.end());
- return_val++;
+ ++return_val;
}
return return_val;
-*/
+
// Because the head won't be tracked properly until the ROB gets the
// first instruction, and any time that the ROB is empty and has not
// yet gotten the instruction, this function doesn't work.
- return numInstsInROB;
+// return numInstsInROB;
}
-template<class Impl>
+template <class Impl>
void
-ROB<Impl>::insertInst(DynInst *inst)
+ROB<Impl>::insertInst(DynInstPtr &inst)
{
// Make sure we have the right number of instructions.
assert(numInstsInROB == countInsts());
@@ -68,7 +85,7 @@ ROB<Impl>::insertInst(DynInst *inst)
// in which case the tail will be pointing at instList.end(). If that
// happens, then reset the tail to the beginning of the list.
if (tail != cpu->instList.end()) {
- tail++;
+ ++tail;
} else {
tail = cpu->instList.begin();
}
@@ -83,13 +100,14 @@ ROB<Impl>::insertInst(DynInst *inst)
// Whatever calls this function needs to ensure that it properly frees up
// registers prior to this function.
-template<class Impl>
+template <class Impl>
void
ROB<Impl>::retireHead()
{
assert(numInstsInROB == countInsts());
+ assert(numInstsInROB > 0);
- DynInst *head_inst;
+ DynInstPtr head_inst;
// Get the head ROB instruction.
head_inst = cpu->instList.front();
@@ -116,12 +134,12 @@ ROB<Impl>::retireHead()
}
}
-template<class Impl>
+template <class Impl>
bool
ROB<Impl>::isHeadReady()
{
if (numInstsInROB != 0) {
- DynInst *head_inst = cpu->instList.front();
+ DynInstPtr head_inst = cpu->instList.front();
return head_inst->readyToCommit();
}
@@ -129,7 +147,7 @@ ROB<Impl>::isHeadReady()
return false;
}
-template<class Impl>
+template <class Impl>
unsigned
ROB<Impl>::numFreeEntries()
{
@@ -138,7 +156,7 @@ ROB<Impl>::numFreeEntries()
return numEntries - numInstsInROB;
}
-template<class Impl>
+template <class Impl>
void
ROB<Impl>::doSquash()
{
@@ -162,6 +180,12 @@ ROB<Impl>::doSquash()
(*squashIt)->setCanCommit();
+ // Special case for when squashing due to a syscall. It's possible
+ // that the squash happened after the head instruction was already
+ // committed, meaning that (*squashIt)->seqNum != squashedSeqNum
+ // will never be false. Normally the squash would never be able
+ // to go past the head of the ROB; in this case it might, so it
+ // must be handled otherwise it will segfault.
#ifndef FULL_SYSTEM
if (squashIt == cpu->instList.begin()) {
DPRINTF(ROB, "ROB: Reached head of instruction list while "
@@ -190,7 +214,7 @@ ROB<Impl>::doSquash()
}
}
-template<class Impl>
+template <class Impl>
void
ROB<Impl>::squash(InstSeqNum squash_num)
{
@@ -206,41 +230,41 @@ ROB<Impl>::squash(InstSeqNum squash_num)
doSquash();
}
-template<class Impl>
+template <class Impl>
uint64_t
ROB<Impl>::readHeadPC()
{
assert(numInstsInROB == countInsts());
- DynInst *head_inst = cpu->instList.front();
+ DynInstPtr head_inst = cpu->instList.front();
return head_inst->readPC();
}
-template<class Impl>
+template <class Impl>
uint64_t
ROB<Impl>::readHeadNextPC()
{
assert(numInstsInROB == countInsts());
- DynInst *head_inst = cpu->instList.front();
+ DynInstPtr head_inst = cpu->instList.front();
return head_inst->readNextPC();
}
-template<class Impl>
+template <class Impl>
InstSeqNum
ROB<Impl>::readHeadSeqNum()
{
// Return the last sequence number that has not been squashed. Other
// stages can use it to squash any instructions younger than the current
// tail.
- DynInst *head_inst = cpu->instList.front();
+ DynInstPtr head_inst = cpu->instList.front();
return head_inst->seqNum;
}
-template<class Impl>
+template <class Impl>
uint64_t
ROB<Impl>::readTailPC()
{
@@ -251,7 +275,7 @@ ROB<Impl>::readTailPC()
return (*tail)->readPC();
}
-template<class Impl>
+template <class Impl>
InstSeqNum
ROB<Impl>::readTailSeqNum()
{
diff --git a/cpu/beta_cpu/store_set.cc b/cpu/beta_cpu/store_set.cc
new file mode 100644
index 000000000..46d763d37
--- /dev/null
+++ b/cpu/beta_cpu/store_set.cc
@@ -0,0 +1,192 @@
+#include "cpu/beta_cpu/store_set.hh"
+#include "base/trace.hh"
+
+StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
+ : SSIT_size(_SSIT_size), LFST_size(_LFST_size)
+{
+ DPRINTF(StoreSet, "StoreSet: Creating store set object.\n");
+
+ SSIT = new SSID[SSIT_size];
+
+ validSSIT.resize(SSIT_size);
+
+ for (int i = 0; i < SSIT_size; ++i)
+ validSSIT[i] = false;
+
+ LFST = new InstSeqNum[LFST_size];
+
+ validLFST.resize(LFST_size);
+
+ SSCounters = new int[LFST_size];
+
+ for (int i = 0; i < LFST_size; ++i)
+ {
+ validLFST[i] = false;
+ SSCounters[i] = 0;
+ }
+
+ index_mask = SSIT_size - 1;
+
+ offset_bits = 2;
+}
+
+void
+StoreSet::violation(Addr load_PC, Addr store_PC)
+{
+ int load_index = calcIndex(load_PC);
+ int store_index = calcIndex(store_PC);
+
+ bool valid_load_SSID = validSSIT[load_index];
+ bool valid_store_SSID = validSSIT[store_index];
+
+ if (!valid_load_SSID && !valid_store_SSID) {
+ // Calculate a new SSID here.
+ SSID new_set = calcSSID(load_PC);
+
+ validSSIT[load_index] = true;
+
+ SSIT[load_index] = new_set;
+
+ validSSIT[store_index] = true;
+
+ SSIT[store_index] = new_set;
+
+ SSCounters[new_set]++;
+ } else if (valid_load_SSID && !valid_store_SSID) {
+ SSID load_SSID = SSIT[load_index];
+
+ validSSIT[store_index] = true;
+
+ SSIT[store_index] = load_SSID;
+
+ SSCounters[load_SSID]++;
+ } else if (!valid_load_SSID && valid_store_SSID) {
+ SSID store_SSID = SSIT[store_index];
+
+ validSSIT[load_index] = true;
+
+ SSIT[load_index] = store_SSID;
+
+ // Because we are having a load point to an already existing set,
+ // the size of the store set is not incremented.
+ } else {
+ SSID load_SSID = SSIT[load_index];
+ SSID store_SSID = SSIT[store_index];
+
+ int load_SS_size = SSCounters[load_SSID];
+ int store_SS_size = SSCounters[store_SSID];
+
+ // If the load has the bigger store set, then assign the store
+ // to the same store set as the load. Otherwise vice-versa.
+ if (load_SS_size > store_SS_size) {
+ SSIT[store_index] = load_SSID;
+
+ SSCounters[load_SSID]++;
+ SSCounters[store_SSID]--;
+ } else {
+ SSIT[load_index] = store_SSID;
+
+ SSCounters[store_SSID]++;
+ SSCounters[load_SSID]--;
+ }
+ }
+}
+
+void
+StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num)
+{
+ // Does nothing.
+ return;
+}
+
+void
+StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num)
+{
+ int index = calcIndex(store_PC);
+
+ int store_SSID;
+
+ if (!validSSIT[index]) {
+ // Do nothing if there's no valid entry.
+ return;
+ } else {
+ store_SSID = SSIT[index];
+
+ assert(store_SSID < LFST_size);
+
+ // Update the last store that was fetched with the current one.
+ LFST[store_SSID] = store_seq_num;
+ }
+}
+
+InstSeqNum
+StoreSet::checkInst(Addr PC)
+{
+ int index = calcIndex(PC);
+
+ int inst_SSID;
+
+ if (!validSSIT[index]) {
+ // Return 0 if there's no valid entry.
+ return 0;
+ } else {
+ inst_SSID = SSIT[index];
+
+ assert(inst_SSID < LFST_size);
+
+ if (!validLFST[inst_SSID]) {
+ return 0;
+ } else {
+ return LFST[inst_SSID];
+ }
+ }
+}
+
+void
+StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store)
+{
+ // This only is updated upon a store being issued.
+ if (!is_store) {
+ return;
+ }
+
+ int index = calcIndex(issued_PC);
+
+ int store_SSID;
+
+ // Make sure the SSIT still has a valid entry for the issued store.
+ assert(validSSIT[index]);
+
+ store_SSID = SSIT[index];
+
+ // If the last fetched store in the store set refers to the store that
+ // was just issued, then invalidate the entry.
+ if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) {
+ validLFST[store_SSID] = false;
+ }
+}
+
+void
+StoreSet::squash(InstSeqNum squashed_num)
+{
+ // Not really sure how to do this well.
+
+ for (int i = 0; i < LFST_size; ++i) {
+ if (LFST[i] < squashed_num) {
+ validLFST[i] = false;
+ }
+ }
+}
+
+void
+StoreSet::clear()
+{
+ for (int i = 0; i < SSIT_size; ++i) {
+ validSSIT[i] = false;
+ }
+
+ for (int i = 0; i < LFST_size; ++i) {
+ validLFST[i] = false;
+ }
+}
+
diff --git a/cpu/beta_cpu/store_set.hh b/cpu/beta_cpu/store_set.hh
new file mode 100644
index 000000000..701c60a2d
--- /dev/null
+++ b/cpu/beta_cpu/store_set.hh
@@ -0,0 +1,58 @@
+#ifndef __STORE_SET_HH__
+#define __STORE_SET_HH__
+
+#include <vector>
+
+#include "arch/alpha/isa_traits.hh"
+#include "cpu/inst_seq.hh"
+
+class StoreSet
+{
+ public:
+ typedef unsigned SSID;
+
+ public:
+ StoreSet(int SSIT_size, int LFST_size);
+
+ void violation(Addr load_PC, Addr store_PC);
+
+ void insertLoad(Addr load_PC, InstSeqNum load_seq_num);
+
+ void insertStore(Addr store_PC, InstSeqNum store_seq_num);
+
+ InstSeqNum checkInst(Addr PC);
+
+ void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store);
+
+ void squash(InstSeqNum squashed_num);
+
+ void clear();
+
+ private:
+ inline int calcIndex(Addr PC)
+ { return (PC >> offset_bits) & index_mask; }
+
+ inline SSID calcSSID(Addr PC)
+ { return ((PC ^ (PC >> 10)) % LFST_size); }
+
+ SSID *SSIT;
+
+ std::vector<bool> validSSIT;
+
+ InstSeqNum *LFST;
+
+ std::vector<bool> validLFST;
+
+ int *SSCounters;
+
+ int SSIT_size;
+
+ int LFST_size;
+
+ int index_mask;
+
+ // HACK: Hardcoded for now.
+ int offset_bits;
+};
+
+#endif // __STORE_SET_HH__