summaryrefslogtreecommitdiff
path: root/cpu
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2005-05-19 01:28:25 -0400
committerKevin Lim <ktlim@umich.edu>2005-05-19 01:28:25 -0400
commitc2fcac7c0dd8dff182cb262bdf35d5c67117aa42 (patch)
treefc8804bfbe1aa820c8afa446622b9ec8c658b75e /cpu
parente5721ce6777726fa54aee49be414233656bd98d1 (diff)
downloadgem5-c2fcac7c0dd8dff182cb262bdf35d5c67117aa42.tar.xz
Fix up code for initial release. The main bug that remains is properly forwarding data from stores to loads, specifically when they are of differing sizes.
cpu/base_dyn_inst.cc: Remove unused commented out code. cpu/base_dyn_inst.hh: Fix up comments. cpu/beta_cpu/2bit_local_pred.cc: Reorder code to match header file. cpu/beta_cpu/2bit_local_pred.hh: Update comments. cpu/beta_cpu/alpha_dyn_inst.hh: Remove useless comments. cpu/beta_cpu/alpha_dyn_inst_impl.hh: cpu/beta_cpu/alpha_full_cpu_impl.hh: cpu/beta_cpu/comm.hh: cpu/beta_cpu/iew_impl.hh: Remove unused commented code. cpu/beta_cpu/alpha_full_cpu.hh: Remove obsolete comment. cpu/beta_cpu/alpha_impl.hh: cpu/beta_cpu/full_cpu.hh: Alphabetize includes. cpu/beta_cpu/bpred_unit.hh: Remove unused global history code. cpu/beta_cpu/btb.hh: cpu/beta_cpu/free_list.hh: Use full path in #defines. cpu/beta_cpu/commit.hh: cpu/beta_cpu/decode.hh: Reorder functions. cpu/beta_cpu/commit_impl.hh: Remove obsolete commented code. cpu/beta_cpu/fetch.hh: Remove obsolete comments. cpu/beta_cpu/fetch_impl.hh: cpu/beta_cpu/rename_impl.hh: Remove commented code. cpu/beta_cpu/full_cpu.cc: Remove useless defines. cpu/beta_cpu/inst_queue.hh: Use full path for #defines. cpu/beta_cpu/inst_queue_impl.hh: Reorder functions to match header file. cpu/beta_cpu/mem_dep_unit.hh: Use full path name for #defines. cpu/beta_cpu/ras.hh: Use full path names for #defines. Remove mod operation. cpu/beta_cpu/regfile.hh: Remove unused commented code, fix up current comments. cpu/beta_cpu/tournament_pred.cc: cpu/beta_cpu/tournament_pred.hh: Update programming style. --HG-- extra : convert_revision : fb9d18a853f58a1108ff827e3c123d5b52a0608a
Diffstat (limited to 'cpu')
-rw-r--r--cpu/base_dyn_inst.cc29
-rw-r--r--cpu/base_dyn_inst.hh49
-rw-r--r--cpu/beta_cpu/2bit_local_pred.cc34
-rw-r--r--cpu/beta_cpu/2bit_local_pred.hh4
-rw-r--r--cpu/beta_cpu/alpha_dyn_inst.hh3
-rw-r--r--cpu/beta_cpu/alpha_dyn_inst_impl.hh1
-rw-r--r--cpu/beta_cpu/alpha_full_cpu.hh3
-rw-r--r--cpu/beta_cpu/alpha_full_cpu_impl.hh11
-rw-r--r--cpu/beta_cpu/alpha_impl.hh2
-rw-r--r--cpu/beta_cpu/bpred_unit.hh11
-rw-r--r--cpu/beta_cpu/btb.hh6
-rw-r--r--cpu/beta_cpu/comm.hh9
-rw-r--r--cpu/beta_cpu/commit.hh13
-rw-r--r--cpu/beta_cpu/commit_impl.hh31
-rw-r--r--cpu/beta_cpu/decode.hh9
-rw-r--r--cpu/beta_cpu/fetch.hh61
-rw-r--r--cpu/beta_cpu/fetch_impl.hh4
-rw-r--r--cpu/beta_cpu/free_list.hh22
-rw-r--r--cpu/beta_cpu/full_cpu.cc5
-rw-r--r--cpu/beta_cpu/full_cpu.hh13
-rw-r--r--cpu/beta_cpu/iew_impl.hh22
-rw-r--r--cpu/beta_cpu/inst_queue.hh40
-rw-r--r--cpu/beta_cpu/inst_queue_impl.hh240
-rw-r--r--cpu/beta_cpu/mem_dep_unit.hh25
-rw-r--r--cpu/beta_cpu/ras.hh8
-rw-r--r--cpu/beta_cpu/regfile.hh17
-rw-r--r--cpu/beta_cpu/rename_impl.hh20
-rw-r--r--cpu/beta_cpu/tournament_pred.cc148
-rw-r--r--cpu/beta_cpu/tournament_pred.hh30
29 files changed, 376 insertions, 494 deletions
diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc
index ecfe5a4b0..af172f5b0 100644
--- a/cpu/base_dyn_inst.cc
+++ b/cpu/base_dyn_inst.cc
@@ -63,11 +63,6 @@ typedef m5::hash_map<const BaseDynInst *, const BaseDynInst *, MyHashFunc> my_ha
my_hash_t thishash;
#endif
-/** This may need to be specific to an implementation. */
-//int BaseDynInst<Impl>::instcount = 0;
-
-//int break_inst = -1;
-
template <class Impl>
BaseDynInst<Impl>::BaseDynInst(MachInst machInst, Addr inst_PC,
Addr pred_PC, InstSeqNum seq_num,
@@ -129,32 +124,12 @@ BaseDynInst<Impl>::initVars()
template <class Impl>
BaseDynInst<Impl>::~BaseDynInst()
{
-/*
- if (specMemWrite) {
- // Remove effects of this instruction from speculative memory
- xc->spec_mem->erase(effAddr);
- }
-*/
--instcount;
DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n",
instcount);
}
-/*
-template <class Impl>
-FunctionalMemory *
-BaseDynInst<Impl>::getMemory(void)
-{
- return xc->mem;
-}
template <class Impl>
-IntReg *
-BaseDynInst<Impl>::getIntegerRegs(void)
-{
- return (spec_mode ? xc->specIntRegFile : xc->regs.intRegFile);
-}
-*/
-template <class Impl>
void
BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags)
{
@@ -369,8 +344,6 @@ BaseDynInst<Impl>::eaSrcsReady()
// EA calc depends on. (i.e. src reg 0 is the source of the data to be
// stored)
-// StaticInstPtr<ISA> eaInst = staticInst->eaCompInst();
-
for (int i = 1; i < numSrcRegs(); ++i)
{
if (!_readySrcRegIdx[i])
@@ -380,7 +353,7 @@ BaseDynInst<Impl>::eaSrcsReady()
return true;
}
-// Forward declaration...
+// Forward declaration
template class BaseDynInst<AlphaSimpleImpl>;
template <>
diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh
index 509874fad..0c9b43adc 100644
--- a/cpu/base_dyn_inst.hh
+++ b/cpu/base_dyn_inst.hh
@@ -78,6 +78,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
MaxInstDestRegs = ISA::MaxInstDestRegs, //< Max dest regs
};
+ /** The static inst used by this dyn inst. */
StaticInstPtr<ISA> staticInst;
////////////////////////////////////////////
@@ -99,7 +100,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
Fault copySrcTranslate(Addr src);
Fault copy(Addr dest);
- // Probably should be private...
+ /** @todo: Consider making this private. */
public:
/** Is this instruction valid. */
bool valid;
@@ -219,6 +220,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
~BaseDynInst();
private:
+ /** Function to initialize variables in the constructors. */
void initVars();
public:
@@ -244,9 +246,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
*/
bool doneTargCalc() { return false; }
- /** Returns the calculated target of the branch. */
-// Addr readCalcTarg() { return nextPC; }
-
+ /** Returns the next PC. This could be the speculative next PC if it is
+ * called prior to the actual branch target being calculated.
+ */
Addr readNextPC() { return nextPC; }
/** Set the predicted target of this current instruction. */
@@ -294,7 +296,10 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Returns the branch target address. */
Addr branchTarget() const { return staticInst->branchTarget(PC); }
+ /** Number of source registers. */
int8_t numSrcRegs() const { return staticInst->numSrcRegs(); }
+
+ /** Number of destination registers. */
int8_t numDestRegs() const { return staticInst->numDestRegs(); }
// the following are used to track physical register usage
@@ -314,8 +319,13 @@ class BaseDynInst : public FastAlloc, public RefCounted
return staticInst->srcRegIdx(i);
}
+ /** Returns the result of an integer instruction. */
uint64_t readIntResult() { return instResult.integer; }
+
+ /** Returns the result of a floating point instruction. */
float readFloatResult() { return instResult.fp; }
+
+ /** Returns the result of a floating point (double) instruction. */
double readDoubleResult() { return instResult.dbl; }
//Push to .cc file.
@@ -328,6 +338,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
}
}
+ /** Marks a specific register as ready.
+ * @todo: Move this to .cc file.
+ */
void markSrcRegReady(RegIndex src_idx)
{
++readyRegs;
@@ -339,13 +352,16 @@ class BaseDynInst : public FastAlloc, public RefCounted
}
}
+ /** Returns if a source register is ready. */
bool isReadySrcRegIdx(int idx) const
{
return this->_readySrcRegIdx[idx];
}
+ /** Sets this instruction as completed. */
void setCompleted() { completed = true; }
+ /** Returns whethe or not this instruction is completed. */
bool isCompleted() const { return completed; }
/** Sets this instruction as ready to issue. */
@@ -393,20 +409,39 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Set the next PC of this instruction (its actual target). */
void setNextPC(uint64_t val) { nextPC = val; }
+ /** Returns the exec context.
+ * @todo: Remove this once the ExecContext is no longer used.
+ */
ExecContext *xcBase() { return xc; }
private:
+ /** Instruction effective address.
+ * @todo: Consider if this is necessary or not.
+ */
Addr instEffAddr;
+ /** Whether or not the effective address calculation is completed.
+ * @todo: Consider if this is necessary or not.
+ */
bool eaCalcDone;
public:
+ /** Sets the effective address. */
void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
+
+ /** Returns the effective address. */
const Addr &getEA() const { return instEffAddr; }
+
+ /** Returns whether or not the eff. addr. calculation has been completed. */
bool doneEACalc() { return eaCalcDone; }
+
+ /** Returns whether or not the eff. addr. source registers are ready. */
bool eaSrcsReady();
public:
+ /** Load queue index. */
int16_t lqIdx;
+
+ /** Store queue index. */
int16_t sqIdx;
};
@@ -439,8 +474,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
if (fault == No_Fault) {
fault = cpu->read(req, data, lqIdx);
- }
- else {
+ } else {
// Return a fixed value to keep simulation deterministic even
// along misspeculated paths.
data = (T)-1;
@@ -464,9 +498,6 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
traceData->setData(data);
}
-// storeSize = sizeof(T);
-// storeData = data;
-
MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags);
req->asid = asid;
diff --git a/cpu/beta_cpu/2bit_local_pred.cc b/cpu/beta_cpu/2bit_local_pred.cc
index e5bf9647f..bcd3ba00d 100644
--- a/cpu/beta_cpu/2bit_local_pred.cc
+++ b/cpu/beta_cpu/2bit_local_pred.cc
@@ -30,21 +30,6 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize,
instShiftAmt);
}
-inline
-bool
-DefaultBP::getPrediction(uint8_t &count)
-{
- // Get the MSB of the count
- return (count >> (localCtrBits - 1));
-}
-
-inline
-unsigned
-DefaultBP::getLocalIndex(Addr &branch_addr)
-{
- return (branch_addr >> instShiftAmt) & indexMask;
-}
-
bool
DefaultBP::lookup(Addr &branch_addr)
{
@@ -91,15 +76,26 @@ DefaultBP::update(Addr &branch_addr, bool taken)
assert(local_predictor_idx < localPredictorSize);
- // Increment or decrement twice to undo speculative update, then
- // properly update
if (taken) {
DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n");
localCtrs[local_predictor_idx].increment();
-// localCtrs[local_predictor_idx].increment();
} else {
DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n");
localCtrs[local_predictor_idx].decrement();
-// localCtrs[local_predictor_idx].decrement();
}
}
+
+inline
+bool
+DefaultBP::getPrediction(uint8_t &count)
+{
+ // Get the MSB of the count
+ return (count >> (localCtrBits - 1));
+}
+
+inline
+unsigned
+DefaultBP::getLocalIndex(Addr &branch_addr)
+{
+ return (branch_addr >> instShiftAmt) & indexMask;
+}
diff --git a/cpu/beta_cpu/2bit_local_pred.hh b/cpu/beta_cpu/2bit_local_pred.hh
index cda7d3e65..6f9b9eedc 100644
--- a/cpu/beta_cpu/2bit_local_pred.hh
+++ b/cpu/beta_cpu/2bit_local_pred.hh
@@ -31,8 +31,12 @@ class DefaultBP
private:
+ /** Returns the taken/not taken prediction given the value of the
+ * counter.
+ */
inline bool getPrediction(uint8_t &count);
+ /** Calculates the local index based on the PC. */
inline unsigned getLocalIndex(Addr &PC);
/** Array of counters that make up the local predictor. */
diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh
index d34fa071c..61770d59d 100644
--- a/cpu/beta_cpu/alpha_dyn_inst.hh
+++ b/cpu/beta_cpu/alpha_dyn_inst.hh
@@ -1,5 +1,3 @@
-//Todo:
-
#ifndef __CPU_BETA_CPU_ALPHA_DYN_INST_HH__
#define __CPU_BETA_CPU_ALPHA_DYN_INST_HH__
@@ -123,6 +121,7 @@ class AlphaDynInst : public BaseDynInst<Impl>
{
return this->cpu->readFloatRegInt(_srcRegIdx[idx]);
}
+
/** @todo: Make results into arrays so they can handle multiple dest
* registers.
*/
diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh
index 3f530e182..25f98fa90 100644
--- a/cpu/beta_cpu/alpha_dyn_inst_impl.hh
+++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh
@@ -130,7 +130,6 @@ void
AlphaDynInst<Impl>::syscall()
{
this->cpu->syscall(this->threadNumber);
-// this->cpu->syscall();
}
#endif
diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh
index 065b2fc4e..01413b414 100644
--- a/cpu/beta_cpu/alpha_full_cpu.hh
+++ b/cpu/beta_cpu/alpha_full_cpu.hh
@@ -103,6 +103,9 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
this->regFile.setFpcr(val);
}
+ // Most of the full system code and syscall emulation is not yet
+ // implemented. These functions do show what the final interface will
+ // look like.
#ifdef FULL_SYSTEM
uint64_t *getIpr();
uint64_t readIpr(int idx, Fault &fault);
diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh
index c42e9e362..8132ec859 100644
--- a/cpu/beta_cpu/alpha_full_cpu_impl.hh
+++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh
@@ -71,8 +71,8 @@ AlphaFullCPU<Impl>::syscall(short thread_num)
// Copy over all important state to xc once all the unrolling is done.
copyToXC();
+ // This is hardcoded to thread 0 while the CPU is only single threaded.
this->thread[0]->syscall();
-// this->thread[thread_num]->syscall();
// Copy over all important state back to CPU.
copyFromXC();
@@ -355,15 +355,6 @@ AlphaFullCPU<Impl>::swapPALShadow(bool use_shadow)
// Will have to lookup in rename map to get physical registers, then
// swap.
-/*
- for (int i = 0; i < AlphaISA::NumIntRegs; i++) {
- if (reg_redir[i]) {
- AlphaISA::IntReg temp = regs->intRegFile[i];
- regs->intRegFile[i] = regs->palregs[i];
- regs->palregs[i] = temp;
- }
- }
-*/
}
#endif // FULL_SYSTEM
diff --git a/cpu/beta_cpu/alpha_impl.hh b/cpu/beta_cpu/alpha_impl.hh
index 81a1aba9b..375cb22db 100644
--- a/cpu/beta_cpu/alpha_impl.hh
+++ b/cpu/beta_cpu/alpha_impl.hh
@@ -3,8 +3,8 @@
#include "arch/alpha/isa_traits.hh"
-#include "cpu/beta_cpu/cpu_policy.hh"
#include "cpu/beta_cpu/alpha_params.hh"
+#include "cpu/beta_cpu/cpu_policy.hh"
// Forward declarations.
template <class Impl>
diff --git a/cpu/beta_cpu/bpred_unit.hh b/cpu/beta_cpu/bpred_unit.hh
index 53c7146c5..55fba8dd7 100644
--- a/cpu/beta_cpu/bpred_unit.hh
+++ b/cpu/beta_cpu/bpred_unit.hh
@@ -36,19 +36,16 @@ class TwobitBPredUnit
bool predict(DynInstPtr &inst, Addr &PC);
- void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
- bool actually_taken);
+ void update(const InstSeqNum &done_sn);
void squash(const InstSeqNum &squashed_sn);
- void update(const InstSeqNum &done_sn);
+ void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
+ bool actually_taken);
bool BPLookup(Addr &inst_PC)
{ return BP.lookup(inst_PC); }
- unsigned BPReadGlobalHist()
- { return 0; }
-
bool BTBValid(Addr &inst_PC)
{ return BTB.valid(inst_PC); }
@@ -56,7 +53,7 @@ class TwobitBPredUnit
{ return BTB.lookup(inst_PC); }
// Will want to include global history.
- void BPUpdate(Addr &inst_PC, unsigned global_history, bool taken)
+ void BPUpdate(Addr &inst_PC, bool taken)
{ BP.update(inst_PC, taken); }
void BTBUpdate(Addr &inst_PC, Addr &target_PC)
diff --git a/cpu/beta_cpu/btb.hh b/cpu/beta_cpu/btb.hh
index 81069eabe..0ed128137 100644
--- a/cpu/beta_cpu/btb.hh
+++ b/cpu/beta_cpu/btb.hh
@@ -1,5 +1,5 @@
-#ifndef __BTB_HH__
-#define __BTB_HH__
+#ifndef __CPU_BETA_CPU_BTB_HH__
+#define __CPU_BETA_CPU_BTB_HH__
// For Addr type.
#include "arch/alpha/isa_traits.hh"
@@ -49,4 +49,4 @@ class DefaultBTB
unsigned tagShiftAmt;
};
-#endif // __BTB_HH__
+#endif // __CPU_BETA_CPU_BTB_HH__
diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh
index 18f76d921..fc972491e 100644
--- a/cpu/beta_cpu/comm.hh
+++ b/cpu/beta_cpu/comm.hh
@@ -3,6 +3,7 @@
#include <stdint.h>
#include <vector>
+
#include "arch/alpha/isa_traits.hh"
#include "cpu/inst_seq.hh"
@@ -112,11 +113,6 @@ struct TimeBufStruct {
uint64_t mispredPC;
uint64_t nextPC;
- // Think of better names here.
- // Will need to be a variety of sizes...
- // Maybe make it a vector, that way only need one object.
-// std::vector<PhysRegIndex> freeRegs;
-
bool robSquashing;
// Represents the instruction that has either been retired or
@@ -124,9 +120,8 @@ struct TimeBufStruct {
// retired or squashed sequence number.
InstSeqNum doneSeqNum;
- // Extra bits of information so that the LDSTQ only updates when it
+ // Extra bit of information so that the LDSTQ only updates when it
// needs to.
-// bool commitIsStore;
bool commitIsLoad;
// Communication specifically to the IQ to tell the IQ that it can
diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh
index c04dc8085..df1269b82 100644
--- a/cpu/beta_cpu/commit.hh
+++ b/cpu/beta_cpu/commit.hh
@@ -72,10 +72,6 @@ class SimpleCommit
void commit();
- uint64_t readCommitPC();
-
- void setSquashing() { _status = ROBSquashing; }
-
private:
void commitInsts();
@@ -86,6 +82,12 @@ class SimpleCommit
void markCompletedInsts();
+ public:
+ uint64_t readCommitPC();
+
+ void setSquashing() { _status = ROBSquashing; }
+
+ private:
/** Time buffer interface. */
TimeBuffer<TimeStruct> *timeBuffer;
@@ -113,9 +115,6 @@ class SimpleCommit
/** Pointer to FullCPU. */
FullCPU *cpu;
- //Store buffer interface? Will need to move committed stores to the
- //store buffer
-
/** Memory interface. Used for d-cache accesses. */
MemInterface *dcacheInterface;
diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh
index 17ede9694..de7ecf57e 100644
--- a/cpu/beta_cpu/commit_impl.hh
+++ b/cpu/beta_cpu/commit_impl.hh
@@ -1,10 +1,3 @@
-// @todo: Bug when something reaches execute, and mispredicts, but is never
-// put into the ROB because the ROB is full. Need rename stage to predict
-// the free ROB entries better.
-
-#ifndef __COMMIT_IMPL_HH__
-#define __COMMIT_IMPL_HH__
-
#include "base/timebuf.hh"
#include "cpu/beta_cpu/commit.hh"
#include "cpu/exetrace.hh"
@@ -274,13 +267,6 @@ SimpleCommit<Impl>::commitInsts()
// time. However, we need to avoid updating any other state
// incorrectly if it's already been squashed.
if (head_inst->isSquashed()) {
- // Hack to avoid the instruction being retired (and deleted) if
- // it hasn't been through the IEW stage yet.
-/*
- if (!head_inst->isExecuted()) {
- break;
- }
-*/
DPRINTF(Commit, "Commit: Retiring squashed instruction from "
"ROB.\n");
@@ -418,21 +404,6 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
++commitCommittedBranches;
}
-#if 0
- // Explicit communication back to the LDSTQ that a load has been committed
- // and can be removed from the LDSTQ. Stores don't need this because
- // the LDSTQ will already have been told that a store has reached the head
- // of the ROB. Consider including communication if it's a store as well
- // to keep things orthagonal.
- if (head_inst->isMemRef()) {
- ++commitCommittedMemRefs;
- if (head_inst->isLoad()) {
- toIEW->commitInfo.commitIsLoad = true;
- ++commitCommittedLoads;
- }
- }
-#endif
-
// Now that the instruction is going to be committed, finalize its
// trace data.
if (head_inst->traceData) {
@@ -501,5 +472,3 @@ SimpleCommit<Impl>::readCommitPC()
{
return rob->readHeadPC();
}
-
-#endif // __COMMIT_IMPL_HH__
diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh
index af2a5ee54..21f6799b7 100644
--- a/cpu/beta_cpu/decode.hh
+++ b/cpu/beta_cpu/decode.hh
@@ -64,9 +64,6 @@ class SimpleDecode
void decode();
- // Might want to make squash a friend function.
- void squash();
-
private:
inline bool fetchInstsValid();
@@ -76,8 +73,11 @@ class SimpleDecode
void squash(DynInstPtr &inst);
- void dumpFetchQueue();
+ public:
+ // Might want to make squash a friend function.
+ void squash();
+ private:
// Interfaces to objects outside of decode.
/** CPU interface. */
FullCPU *cpu;
@@ -113,7 +113,6 @@ class SimpleDecode
/** Skid buffer between fetch and decode. */
std::queue<FetchStruct> skidBuffer;
- private:
//Consider making these unsigned to avoid any confusion.
/** Rename to decode delay, in ticks. */
unsigned renameToDecodeDelay;
diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh
index da22baa9b..c7e72be47 100644
--- a/cpu/beta_cpu/fetch.hh
+++ b/cpu/beta_cpu/fetch.hh
@@ -1,15 +1,9 @@
-// Todo: add in statistics, only get the MachInst and let decode actually
-// decode, think about SMT fetch,
-// fix up branch prediction stuff into one thing,
-// Figure out where to advance time buffer. Add a way to get a
-// stage's current status.
+// Todo: SMT fetch,
+// Add a way to get a stage's current status.
#ifndef __CPU_BETA_CPU_SIMPLE_FETCH_HH__
#define __CPU_BETA_CPU_SIMPLE_FETCH_HH__
-//Will want to include: time buffer, structs, MemInterface, Event,
-//whatever class bzero uses, MemReqPtr
-
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/pc_event.hh"
@@ -57,6 +51,19 @@ class SimpleFetch
bool stalled;
public:
+ class CacheCompletionEvent : public Event
+ {
+ private:
+ SimpleFetch *fetch;
+
+ public:
+ CacheCompletionEvent(SimpleFetch *_fetch);
+
+ virtual void process();
+ virtual const char *description();
+ };
+
+ public:
/** SimpleFetch constructor. */
SimpleFetch(Params &params);
@@ -68,20 +75,9 @@ class SimpleFetch
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
- void tick();
-
- void fetch();
-
void processCacheCompletion();
- // Figure out PC vs next PC and how it should be updated
- void squash(const Addr &new_PC);
-
private:
- inline void doSquash(const Addr &new_PC);
-
- void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num);
-
/**
* Looks up in the branch predictor to see if the next PC should be
* either next PC+=MachInst or a branch target.
@@ -101,6 +97,18 @@ class SimpleFetch
*/
Fault fetchCacheLine(Addr fetch_PC);
+ inline void doSquash(const Addr &new_PC);
+
+ void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num);
+
+ public:
+ // Figure out PC vs next PC and how it should be updated
+ void squash(const Addr &new_PC);
+
+ void tick();
+
+ void fetch();
+
// Align an address (typically a PC) to the start of an I-cache block.
// We fold in the PISA 64- to 32-bit conversion here as well.
Addr icacheBlockAlignPC(Addr addr)
@@ -109,21 +117,6 @@ class SimpleFetch
return (addr & ~(cacheBlkMask));
}
- public:
- class CacheCompletionEvent : public Event
- {
- private:
- SimpleFetch *fetch;
-
- public:
- CacheCompletionEvent(SimpleFetch *_fetch);
-
- virtual void process();
- virtual const char *description();
- };
-
-// CacheCompletionEvent cacheCompletionEvent;
-
private:
/** Pointer to the FullCPU. */
FullCPU *cpu;
diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh
index 0ec4c63a3..7adfecc52 100644
--- a/cpu/beta_cpu/fetch_impl.hh
+++ b/cpu/beta_cpu/fetch_impl.hh
@@ -35,8 +35,7 @@ SimpleFetch<Impl>::CacheCompletionEvent::description()
template<class Impl>
SimpleFetch<Impl>::SimpleFetch(Params &params)
- : //cacheCompletionEvent(this),
- icacheInterface(params.icacheInterface),
+ : icacheInterface(params.icacheInterface),
branchPred(params),
decodeToFetchDelay(params.decodeToFetchDelay),
renameToFetchDelay(params.renameToFetchDelay),
@@ -254,7 +253,6 @@ SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC)
// up this stage once the cache miss completes.
if (result != MA_HIT && icacheInterface->doEvents()) {
memReq->completionEvent = new CacheCompletionEvent(this);
-// lastIcacheStall = curTick;
// How does current model work as far as individual
// stages scheduling/unscheduling?
diff --git a/cpu/beta_cpu/free_list.hh b/cpu/beta_cpu/free_list.hh
index e8e75f7ec..d25bc1b78 100644
--- a/cpu/beta_cpu/free_list.hh
+++ b/cpu/beta_cpu/free_list.hh
@@ -1,13 +1,13 @@
-#ifndef __FREE_LIST_HH__
-#define __FREE_LIST_HH__
+#ifndef __CPU_BETA_CPU_FREE_LIST_HH__
+#define __CPU_BETA_CPU_FREE_LIST_HH__
#include <iostream>
#include <queue>
#include "arch/alpha/isa_traits.hh"
-#include "cpu/beta_cpu/comm.hh"
-#include "base/traceflags.hh"
#include "base/trace.hh"
+#include "base/traceflags.hh"
+#include "cpu/beta_cpu/comm.hh"
/**
* FreeList class that simply holds the list of free integer and floating
@@ -25,8 +25,6 @@
*/
class SimpleFreeList
{
- public:
-
private:
/** The list of free integer registers. */
std::queue<PhysRegIndex> freeIntRegs;
@@ -60,15 +58,15 @@ class SimpleFreeList
unsigned _numLogicalFloatRegs,
unsigned _numPhysicalFloatRegs);
- PhysRegIndex getIntReg();
+ inline PhysRegIndex getIntReg();
- PhysRegIndex getFloatReg();
+ inline PhysRegIndex getFloatReg();
- void addReg(PhysRegIndex freed_reg);
+ inline void addReg(PhysRegIndex freed_reg);
- void addIntReg(PhysRegIndex freed_reg);
+ inline void addIntReg(PhysRegIndex freed_reg);
- void addFloatReg(PhysRegIndex freed_reg);
+ inline void addFloatReg(PhysRegIndex freed_reg);
bool hasFreeIntRegs()
{ return !freeIntRegs.empty(); }
@@ -166,4 +164,4 @@ SimpleFreeList::addFloatReg(PhysRegIndex freed_reg)
freeFloatRegs.push(freed_reg);
}
-#endif // __FREE_LIST_HH__
+#endif // __CPU_BETA_CPU_FREE_LIST_HH__
diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc
index 3cf5d4aaa..8c2483630 100644
--- a/cpu/beta_cpu/full_cpu.cc
+++ b/cpu/beta_cpu/full_cpu.cc
@@ -1,6 +1,3 @@
-#ifndef __SIMPLE_FULL_CPU_CC__
-#define __SIMPLE_FULL_CPU_CC__
-
#ifdef FULL_SYSTEM
#include "sim/system.hh"
#else
@@ -528,5 +525,3 @@ FullBetaCPU<Impl>::wakeDependents(DynInstPtr &inst)
// Forward declaration of FullBetaCPU.
template class FullBetaCPU<AlphaSimpleImpl>;
-
-#endif // __SIMPLE_FULL_CPU_HH__
diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh
index 85fc49371..a7916f9ae 100644
--- a/cpu/beta_cpu/full_cpu.hh
+++ b/cpu/beta_cpu/full_cpu.hh
@@ -12,13 +12,12 @@
#include <list>
#include <vector>
-#include "cpu/beta_cpu/comm.hh"
-
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/base_cpu.hh"
-#include "cpu/exec_context.hh"
+#include "cpu/beta_cpu/comm.hh"
#include "cpu/beta_cpu/cpu_policy.hh"
+#include "cpu/exec_context.hh"
#include "sim/process.hh"
#ifdef FULL_SYSTEM
@@ -96,15 +95,15 @@ class FullBetaCPU : public BaseFullCPU
}
public:
- void tick();
-
FullBetaCPU(Params &params);
~FullBetaCPU();
- void init();
-
void fullCPURegStats();
+ void tick();
+
+ void init();
+
void activateContext(int thread_num, int delay);
void suspendContext(int thread_num);
void deallocateContext(int thread_num);
diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh
index 086d39320..79dd809ad 100644
--- a/cpu/beta_cpu/iew_impl.hh
+++ b/cpu/beta_cpu/iew_impl.hh
@@ -361,20 +361,7 @@ SimpleIEW<Impl>::dispatchInsts()
} else if (inst->isStore()) {
ldstQueue.insertStore(inst);
- // A bit of a hack. Set that it can commit so that
- // the commit stage will try committing it, and then
- // once commit realizes it's a store it will send back
- // a signal to this stage to issue and execute that
- // store. Change to be a bit that says the instruction
- // has extra work to do at commit.
-// inst->setCanCommit();
-
-// instQueue.insertNonSpec(inst);
-
++iewDispStoreInsts;
-// ++iewDispNonSpecInsts;
-
-// continue;
} else if (inst->isNonSpeculative()) {
DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction "
"encountered, skipping.\n");
@@ -404,8 +391,6 @@ SimpleIEW<Impl>::dispatchInsts()
DPRINTF(IEW, "IEW: Issue: Executed branch encountered, "
"skipping.\n");
-// assert(inst->isDirectCtrl());
-
inst->setIssued();
inst->setCanCommit();
@@ -614,10 +599,6 @@ SimpleIEW<Impl>::tick()
}
++iewSquashCycles;
-
- // Also should advance its own time buffers if the stage ran.
- // Not sure about this...
-// issueToExecQueue.advance();
} else if (_status == Blocked) {
// Continue to tell previous stage to stall.
toRename->iewInfo.stall = true;
@@ -654,14 +635,11 @@ SimpleIEW<Impl>::tick()
// or store to commit. Also check if it's being told to execute a
// nonspeculative instruction.
// This is pretty inefficient...
-// if (0/*fromCommit->commitInfo.commitIsStore*/) {
if (!fromCommit->commitInfo.squash &&
!fromCommit->commitInfo.robSquashing) {
ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum);
-// } else if (fromCommit->commitInfo.commitIsLoad) {
ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum);
}
-// }
if (fromCommit->commitInfo.nonSpecSeqNum != 0) {
instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum);
diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh
index 120e6b940..b97797101 100644
--- a/cpu/beta_cpu/inst_queue.hh
+++ b/cpu/beta_cpu/inst_queue.hh
@@ -1,5 +1,5 @@
-#ifndef __INST_QUEUE_HH__
-#define __INST_QUEUE_HH__
+#ifndef __CPU_BETA_CPU_INST_QUEUE_HH__
+#define __CPU_BETA_CPU_INST_QUEUE_HH__
#include <list>
#include <map>
@@ -103,19 +103,6 @@ class InstructionQueue
void stopSquash();
- /** Debugging function to dump all the list sizes, as well as print
- * out the list of nonspeculative instructions. Should not be used
- * in any other capacity, but it has no harmful sideaffects.
- */
- void dumpLists();
-
- private:
- /** Debugging function to count how many entries are in the IQ. It does
- * a linear walk through the instructions, so do not call this function
- * during normal execution.
- */
- int countInsts();
-
private:
/** Pointer to the CPU. */
FullCPU *cpu;
@@ -157,9 +144,6 @@ class InstructionQueue
/** List of ready branch instructions. */
ReadyInstQueue readyBranchInsts;
- /** List of ready memory instructions. */
-// ReadyInstQueue readyMemInsts;
-
/** List of ready miscellaneous instructions. */
ReadyInstQueue readyMiscInsts;
@@ -281,10 +265,26 @@ class InstructionQueue
bool addToDependents(DynInstPtr &new_inst);
void insertDependency(DynInstPtr &new_inst);
void createDependency(DynInstPtr &new_inst);
- void dumpDependGraph();
void addIfReady(DynInstPtr &inst);
+ private:
+ /** Debugging function to count how many entries are in the IQ. It does
+ * a linear walk through the instructions, so do not call this function
+ * during normal execution.
+ */
+ int countInsts();
+
+ /** Debugging function to dump out the dependency graph.
+ */
+ void dumpDependGraph();
+
+ /** Debugging function to dump all the list sizes, as well as print
+ * out the list of nonspeculative instructions. Should not be used
+ * in any other capacity, but it has no harmful sideaffects.
+ */
+ void dumpLists();
+
Stats::Scalar<> iqInstsAdded;
Stats::Scalar<> iqNonSpecInstsAdded;
// Stats::Scalar<> iqIntInstsAdded;
@@ -305,4 +305,4 @@ class InstructionQueue
};
-#endif //__INST_QUEUE_HH__
+#endif //__CPU_BETA_CPU_INST_QUEUE_HH__
diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh
index d4e3939cf..9f7f13387 100644
--- a/cpu/beta_cpu/inst_queue_impl.hh
+++ b/cpu/beta_cpu/inst_queue_impl.hh
@@ -1,6 +1,3 @@
-#ifndef __INST_QUEUE_IMPL_HH__
-#define __INST_QUEUE_IMPL_HH__
-
// Todo:
// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake
// it; either do in reverse order, or have added instructions put into a
@@ -171,6 +168,13 @@ InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
fromCommit = timeBuffer->getWire(-commitToIEWDelay);
}
+template <class Impl>
+unsigned
+InstructionQueue<Impl>::numFreeEntries()
+{
+ return freeEntries;
+}
+
// Might want to do something more complex if it knows how many instructions
// will be issued this cycle.
template <class Impl>
@@ -185,13 +189,6 @@ InstructionQueue<Impl>::isFull()
}
template <class Impl>
-unsigned
-InstructionQueue<Impl>::numFreeEntries()
-{
- return freeEntries;
-}
-
-template <class Impl>
void
InstructionQueue<Impl>::insert(DynInstPtr &new_inst)
{
@@ -562,7 +559,6 @@ InstructionQueue<Impl>::scheduleReadyInsts()
break;
case Squashed:
-// issuing_inst = squashed_head_inst;
assert(0 && "Squashed insts should not issue any more!");
squashedInsts.pop();
// Set the squashed instruction as able to commit so that commit
@@ -621,6 +617,77 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
template <class Impl>
void
+InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
+{
+ DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n");
+ //Look at the physical destination register of the DynInst
+ //and look it up on the dependency graph. Then mark as ready
+ //any instructions within the instruction queue.
+ DependencyEntry *curr;
+
+ // Tell the memory dependence unit to wake any dependents on this
+ // instruction if it is a memory instruction.
+
+ if (completed_inst->isMemRef()) {
+ memDepUnit.wakeDependents(completed_inst);
+ }
+
+ for (int dest_reg_idx = 0;
+ dest_reg_idx < completed_inst->numDestRegs();
+ dest_reg_idx++)
+ {
+ PhysRegIndex dest_reg =
+ completed_inst->renamedDestRegIdx(dest_reg_idx);
+
+ // Special case of uniq or control registers. They are not
+ // handled by the IQ and thus have no dependency graph entry.
+ // @todo Figure out a cleaner way to handle this.
+ if (dest_reg >= numPhysRegs) {
+ continue;
+ }
+
+ DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n",
+ (int) dest_reg);
+
+ //Maybe abstract this part into a function.
+ //Go through the dependency chain, marking the registers as ready
+ //within the waiting instructions.
+ while (dependGraph[dest_reg].next) {
+
+ curr = dependGraph[dest_reg].next;
+
+ DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n",
+ curr->inst->readPC());
+
+ // Might want to give more information to the instruction
+ // so that it knows which of its source registers is ready.
+ // However that would mean that the dependency graph entries
+ // would need to hold the src_reg_idx.
+ curr->inst->markSrcRegReady();
+
+ addIfReady(curr->inst);
+
+ dependGraph[dest_reg].next = curr->next;
+
+ DependencyEntry::mem_alloc_counter--;
+
+ curr->inst = NULL;
+
+ delete curr;
+ }
+
+ // Reset the head node now that all of its dependents have been woken
+ // up.
+ dependGraph[dest_reg].next = NULL;
+ dependGraph[dest_reg].inst = NULL;
+
+ // Mark the scoreboard as having that register ready.
+ regScoreboard[dest_reg] = true;
+ }
+}
+
+template <class Impl>
+void
InstructionQueue<Impl>::violation(DynInstPtr &store,
DynInstPtr &faulting_load)
{
@@ -747,73 +814,56 @@ InstructionQueue<Impl>::stopSquash()
template <class Impl>
void
-InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
+InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
{
- DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n");
- //Look at the physical destination register of the DynInst
- //and look it up on the dependency graph. Then mark as ready
- //any instructions within the instruction queue.
- DependencyEntry *curr;
-
- // Tell the memory dependence unit to wake any dependents on this
- // instruction if it is a memory instruction.
-
- if (completed_inst->isMemRef()) {
- memDepUnit.wakeDependents(completed_inst);
- }
-
- for (int dest_reg_idx = 0;
- dest_reg_idx < completed_inst->numDestRegs();
- dest_reg_idx++)
- {
- PhysRegIndex dest_reg =
- completed_inst->renamedDestRegIdx(dest_reg_idx);
-
- // Special case of uniq or control registers. They are not
- // handled by the IQ and thus have no dependency graph entry.
- // @todo Figure out a cleaner way to handle this.
- if (dest_reg >= numPhysRegs) {
- continue;
- }
-
- DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n",
- (int) dest_reg);
+ //Add this new, dependent instruction at the head of the dependency
+ //chain.
- //Maybe abstract this part into a function.
- //Go through the dependency chain, marking the registers as ready
- //within the waiting instructions.
- while (dependGraph[dest_reg].next) {
+ // First create the entry that will be added to the head of the
+ // dependency chain.
+ DependencyEntry *new_entry = new DependencyEntry;
+ new_entry->next = this->next;
+ new_entry->inst = new_inst;
- curr = dependGraph[dest_reg].next;
+ // Then actually add it to the chain.
+ this->next = new_entry;
- DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n",
- curr->inst->readPC());
+ ++mem_alloc_counter;
+}
- // Might want to give more information to the instruction
- // so that it knows which of its source registers is ready.
- // However that would mean that the dependency graph entries
- // would need to hold the src_reg_idx.
- curr->inst->markSrcRegReady();
+template <class Impl>
+void
+InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
+{
+ DependencyEntry *prev = this;
+ DependencyEntry *curr = this->next;
- addIfReady(curr->inst);
+ // Make sure curr isn't NULL. Because this instruction is being
+ // removed from a dependency list, it must have been placed there at
+ // an earlier time. The dependency chain should not be empty,
+ // unless the instruction dependent upon it is already ready.
+ if (curr == NULL) {
+ return;
+ }
- dependGraph[dest_reg].next = curr->next;
+ // Find the instruction to remove within the dependency linked list.
+ while(curr->inst != inst_to_remove)
+ {
+ prev = curr;
+ curr = curr->next;
- DependencyEntry::mem_alloc_counter--;
+ assert(curr != NULL);
+ }
- curr->inst = NULL;
+ // Now remove this instruction from the list.
+ prev->next = curr->next;
- delete curr;
- }
+ --mem_alloc_counter;
- // Reset the head node now that all of its dependents have been woken
- // up.
- dependGraph[dest_reg].next = NULL;
- dependGraph[dest_reg].inst = NULL;
+ // Could push this off to the destructor of DependencyEntry
+ curr->inst = NULL;
- // Mark the scoreboard as having that register ready.
- regScoreboard[dest_reg] = true;
- }
+ delete curr;
}
template <class Impl>
@@ -900,60 +950,6 @@ InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst)
template <class Impl>
void
-InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
-{
- //Add this new, dependent instruction at the head of the dependency
- //chain.
-
- // First create the entry that will be added to the head of the
- // dependency chain.
- DependencyEntry *new_entry = new DependencyEntry;
- new_entry->next = this->next;
- new_entry->inst = new_inst;
-
- // Then actually add it to the chain.
- this->next = new_entry;
-
- ++mem_alloc_counter;
-}
-
-template <class Impl>
-void
-InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
-{
- DependencyEntry *prev = this;
- DependencyEntry *curr = this->next;
-
- // Make sure curr isn't NULL. Because this instruction is being
- // removed from a dependency list, it must have been placed there at
- // an earlier time. The dependency chain should not be empty,
- // unless the instruction dependent upon it is already ready.
- if (curr == NULL) {
- return;
- }
-
- // Find the instruction to remove within the dependency linked list.
- while(curr->inst != inst_to_remove)
- {
- prev = curr;
- curr = curr->next;
-
- assert(curr != NULL);
- }
-
- // Now remove this instruction from the list.
- prev->next = curr->next;
-
- --mem_alloc_counter;
-
- // Could push this off to the destructor of DependencyEntry
- curr->inst = NULL;
-
- delete curr;
-}
-
-template <class Impl>
-void
InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
{
//If the instruction now has all of its source registers
@@ -1090,8 +1086,6 @@ InstructionQueue<Impl>::dumpLists()
cprintf("Ready branch list size: %i\n", readyBranchInsts.size());
-// cprintf("Ready memory list size: %i\n", readyMemInsts.size());
-
cprintf("Ready misc list size: %i\n", readyMiscInsts.size());
cprintf("Squashed list size: %i\n", squashedInsts.size());
@@ -1110,5 +1104,3 @@ InstructionQueue<Impl>::dumpLists()
cprintf("\n");
}
-
-#endif // __INST_QUEUE_IMPL_HH__
diff --git a/cpu/beta_cpu/mem_dep_unit.hh b/cpu/beta_cpu/mem_dep_unit.hh
index e43543e09..5da57945f 100644
--- a/cpu/beta_cpu/mem_dep_unit.hh
+++ b/cpu/beta_cpu/mem_dep_unit.hh
@@ -1,12 +1,12 @@
-#ifndef __MEM_DEP_UNIT_HH__
-#define __MEM_DEP_UNIT_HH__
+#ifndef __CPU_BETA_CPU_MEM_DEP_UNIT_HH__
+#define __CPU_BETA_CPU_MEM_DEP_UNIT_HH__
-#include <set>
#include <map>
+#include <set>
-#include "cpu/inst_seq.hh"
#include "base/statistics.hh"
+#include "cpu/inst_seq.hh"
/**
* Memory dependency unit class. This holds the memory dependence predictor.
@@ -34,6 +34,12 @@ class MemDepUnit {
void insertNonSpec(DynInstPtr &inst);
+ // Will want to make this operation relatively fast. Right now it
+ // is somewhat slow.
+ DynInstPtr &top();
+
+ void pop();
+
void regsReady(DynInstPtr &inst);
void nonSpecInstReady(DynInstPtr &inst);
@@ -46,12 +52,6 @@ class MemDepUnit {
void violation(DynInstPtr &store_inst, DynInstPtr &violating_load);
- // Will want to make this operation relatively fast. Right now it
- // kind of sucks.
- DynInstPtr &top();
-
- void pop();
-
inline bool empty()
{ return readyInsts.empty(); }
@@ -91,11 +91,8 @@ class MemDepUnit {
}
};
-
- private:
inline void moveToReady(dep_it_t &woken_inst);
- private:
/** List of instructions that have passed through rename, yet are still
* waiting on either a memory dependence to resolve or source registers to
* become available before they can issue.
@@ -137,4 +134,4 @@ class MemDepUnit {
Stats::Scalar<> conflictingStores;
};
-#endif
+#endif // __CPU_BETA_CPU_MEM_DEP_UNIT_HH__
diff --git a/cpu/beta_cpu/ras.hh b/cpu/beta_cpu/ras.hh
index 7666f825f..51dab15e4 100644
--- a/cpu/beta_cpu/ras.hh
+++ b/cpu/beta_cpu/ras.hh
@@ -1,5 +1,5 @@
-#ifndef __RAS_HH__
-#define __RAS_HH__
+#ifndef __CPU_BETA_CPU_RAS_HH__
+#define __CPU_BETA_CPU_RAS_HH__
// For Addr type.
#include "arch/alpha/isa_traits.hh"
@@ -23,7 +23,7 @@ class ReturnAddrStack
private:
inline void incrTos()
- { tos = (tos + 1) % numEntries; }
+ { if (++tos == numEntries) tos = 0; }
inline void decrTos()
{ tos = (tos == 0 ? numEntries - 1 : tos - 1); }
@@ -37,4 +37,4 @@ class ReturnAddrStack
unsigned tos;
};
-#endif // __RAS_HH__
+#endif // __CPU_BETA_CPU_RAS_HH__
diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh
index c9d1b092f..d7664707d 100644
--- a/cpu/beta_cpu/regfile.hh
+++ b/cpu/beta_cpu/regfile.hh
@@ -8,8 +8,8 @@
#include "cpu/beta_cpu/comm.hh"
#ifdef FULL_SYSTEM
-#include "kern/kernel_stats.hh"
#include "arch/alpha/ev5.hh"
+#include "kern/kernel_stats.hh"
using namespace EV5;
#endif
@@ -19,8 +19,6 @@ using namespace EV5;
// Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA,
// and should go in the AlphaFullCPU.
-extern void debug_break();
-
template <class Impl>
class PhysRegFile
{
@@ -203,8 +201,11 @@ class PhysRegFile
/** Miscellaneous register file. */
MiscRegFile miscRegs;
- Addr pc; // program counter
- Addr npc; // next-cycle program counter
+ /** Program counter. */
+ Addr pc;
+
+ /** Next-cycle program counter. */
+ Addr npc;
#ifdef FULL_SYSTEM
private:
@@ -408,7 +409,6 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
// write entire quad w/ no side-effect
old = ipr[idx];
ipr[idx] = val;
-// kernelStats.context(old, val);
break;
case ISA::IPR_DTB_PTE:
@@ -435,14 +435,9 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
// only write least significant five bits - interrupt level
ipr[idx] = val & 0x1f;
-// kernelStats.swpipl(ipr[idx]);
break;
case ISA::IPR_DTB_CM:
-// if (val & 0x18)
-// kernelStats->mode(Kernel::user);
-// else
-// kernelStats->mode(Kernel::kernel);
case ISA::IPR_ICM:
// only write two mode bits - processor mode
diff --git a/cpu/beta_cpu/rename_impl.hh b/cpu/beta_cpu/rename_impl.hh
index 5a8e499e9..5ad0d1416 100644
--- a/cpu/beta_cpu/rename_impl.hh
+++ b/cpu/beta_cpu/rename_impl.hh
@@ -507,6 +507,7 @@ SimpleRename<Impl>::tick()
DPRINTF(Rename, "Rename: Done squashing, going to running.\n");
_status = Running;
+ rename();
} else {
doSquash();
}
@@ -523,25 +524,6 @@ SimpleRename<Impl>::tick()
#endif
}
- // Perhaps put this outside of this function, since this will
- // happen regardless of whether or not the stage is blocked or
- // squashing.
- // Read from the time buffer any necessary data.
- // Read registers that are freed, and add them to the freelist.
- // This is unnecessary due to the history buffer (assuming the history
- // buffer works properly).
-/*
- while(!fromCommit->commitInfo.freeRegs.empty())
- {
- PhysRegIndex freed_reg = fromCommit->commitInfo.freeRegs.back();
- DPRINTF(Rename, "Rename: Adding freed register %i to freelist.\n",
- (int)freed_reg);
- freeList->addReg(freed_reg);
-
- fromCommit->commitInfo.freeRegs.pop_back();
- }
-*/
-
}
template<class Impl>
diff --git a/cpu/beta_cpu/tournament_pred.cc b/cpu/beta_cpu/tournament_pred.cc
index 5a22278eb..41e34adef 100644
--- a/cpu/beta_cpu/tournament_pred.cc
+++ b/cpu/beta_cpu/tournament_pred.cc
@@ -10,52 +10,52 @@ TournamentBP::TournamentBP(unsigned _local_predictor_size,
unsigned _choice_predictor_size,
unsigned _choice_ctr_bits,
unsigned _instShiftAmt)
- : local_predictor_size(_local_predictor_size),
- local_ctr_bits(_local_ctr_bits),
- local_history_table_size(_local_history_table_size),
- local_history_bits(_local_history_bits),
- global_predictor_size(_global_predictor_size),
- global_ctr_bits(_global_ctr_bits),
- global_history_bits(_global_history_bits),
- choice_predictor_size(_global_predictor_size),
- choice_ctr_bits(_choice_ctr_bits),
+ : localPredictorSize(_local_predictor_size),
+ localCtrBits(_local_ctr_bits),
+ localHistoryTableSize(_local_history_table_size),
+ localHistoryBits(_local_history_bits),
+ globalPredictorSize(_global_predictor_size),
+ globalCtrBits(_global_ctr_bits),
+ globalHistoryBits(_global_history_bits),
+ choicePredictorSize(_global_predictor_size),
+ choiceCtrBits(_choice_ctr_bits),
instShiftAmt(_instShiftAmt)
{
//Should do checks here to make sure sizes are correct (powers of 2)
//Setup the array of counters for the local predictor
- local_ctrs = new SatCounter[local_predictor_size];
+ localCtrs = new SatCounter[localPredictorSize];
- for (int i = 0; i < local_predictor_size; ++i)
- local_ctrs[i].setBits(local_ctr_bits);
+ for (int i = 0; i < localPredictorSize; ++i)
+ localCtrs[i].setBits(localCtrBits);
//Setup the history table for the local table
- local_history_table = new unsigned[local_history_table_size];
+ localHistoryTable = new unsigned[localHistoryTableSize];
- for (int i = 0; i < local_history_table_size; ++i)
- local_history_table[i] = 0;
+ for (int i = 0; i < localHistoryTableSize; ++i)
+ localHistoryTable[i] = 0;
// Setup the local history mask
- localHistoryMask = (1 << local_history_bits) - 1;
+ localHistoryMask = (1 << localHistoryBits) - 1;
//Setup the array of counters for the global predictor
- global_ctrs = new SatCounter[global_predictor_size];
+ globalCtrs = new SatCounter[globalPredictorSize];
- for (int i = 0; i < global_predictor_size; ++i)
- global_ctrs[i].setBits(global_ctr_bits);
+ for (int i = 0; i < globalPredictorSize; ++i)
+ globalCtrs[i].setBits(globalCtrBits);
//Clear the global history
- global_history = 0;
+ globalHistory = 0;
// Setup the global history mask
- globalHistoryMask = (1 << global_history_bits) - 1;
+ globalHistoryMask = (1 << globalHistoryBits) - 1;
//Setup the array of counters for the choice predictor
- choice_ctrs = new SatCounter[choice_predictor_size];
+ choiceCtrs = new SatCounter[choicePredictorSize];
- for (int i = 0; i < choice_predictor_size; ++i)
- choice_ctrs[i].setBits(choice_ctr_bits);
+ for (int i = 0; i < choicePredictorSize; ++i)
+ choiceCtrs[i].setBits(choiceCtrBits);
- threshold = (1 << (local_ctr_bits - 1)) - 1;
+ threshold = (1 << (localCtrBits - 1)) - 1;
threshold = threshold / 2;
}
@@ -63,29 +63,29 @@ inline
unsigned
TournamentBP::calcLocHistIdx(Addr &branch_addr)
{
- return (branch_addr >> instShiftAmt) & (local_history_table_size - 1);
+ return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1);
}
inline
void
TournamentBP::updateHistoriesTaken(unsigned local_history_idx)
{
- global_history = (global_history << 1) | 1;
- global_history = global_history & globalHistoryMask;
+ globalHistory = (globalHistory << 1) | 1;
+ globalHistory = globalHistory & globalHistoryMask;
- local_history_table[local_history_idx] =
- (local_history_table[local_history_idx] << 1) | 1;
+ localHistoryTable[local_history_idx] =
+ (localHistoryTable[local_history_idx] << 1) | 1;
}
inline
void
TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx)
{
- global_history = (global_history << 1);
- global_history = global_history & globalHistoryMask;
+ globalHistory = (globalHistory << 1);
+ globalHistory = globalHistory & globalHistoryMask;
- local_history_table[local_history_idx] =
- (local_history_table[local_history_idx] << 1);
+ localHistoryTable[local_history_idx] =
+ (localHistoryTable[local_history_idx] << 1);
}
bool
@@ -100,15 +100,15 @@ TournamentBP::lookup(Addr &branch_addr)
//Lookup in the local predictor to get its branch prediction
local_history_idx = calcLocHistIdx(branch_addr);
- local_predictor_idx = local_history_table[local_history_idx]
+ local_predictor_idx = localHistoryTable[local_history_idx]
& localHistoryMask;
- local_prediction = local_ctrs[local_predictor_idx].read();
+ local_prediction = localCtrs[local_predictor_idx].read();
//Lookup in the global predictor to get its branch prediction
- global_prediction = global_ctrs[global_history].read();
+ global_prediction = globalCtrs[globalHistory].read();
//Lookup in the choice predictor to see which one to use
- choice_prediction = choice_ctrs[global_history].read();
+ choice_prediction = choiceCtrs[globalHistory].read();
//@todo Put a threshold value in for the three predictors that can
// be set through the constructor (so this isn't hard coded).
@@ -117,21 +117,21 @@ TournamentBP::lookup(Addr &branch_addr)
if (global_prediction > threshold) {
updateHistoriesTaken(local_history_idx);
- assert(global_history < global_predictor_size &&
- local_history_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
- global_ctrs[global_history].increment();
- local_ctrs[local_history_idx].increment();
+ globalCtrs[globalHistory].increment();
+ localCtrs[local_history_idx].increment();
return true;
} else {
updateHistoriesNotTaken(local_history_idx);
- assert(global_history < global_predictor_size &&
- local_history_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
- global_ctrs[global_history].decrement();
- local_ctrs[local_history_idx].decrement();
+ globalCtrs[globalHistory].decrement();
+ localCtrs[local_history_idx].decrement();
return false;
}
@@ -139,21 +139,21 @@ TournamentBP::lookup(Addr &branch_addr)
if (local_prediction > threshold) {
updateHistoriesTaken(local_history_idx);
- assert(global_history < global_predictor_size &&
- local_history_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
- global_ctrs[global_history].increment();
- local_ctrs[local_history_idx].increment();
+ globalCtrs[globalHistory].increment();
+ localCtrs[local_history_idx].increment();
return true;
} else {
updateHistoriesNotTaken(local_history_idx);
- assert(global_history < global_predictor_size &&
- local_history_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
- global_ctrs[global_history].decrement();
- local_ctrs[local_history_idx].decrement();
+ globalCtrs[globalHistory].decrement();
+ localCtrs[local_history_idx].decrement();
return false;
}
@@ -174,20 +174,20 @@ TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken)
bool global_pred_taken;
// Load the correct global history into the register.
- global_history = correct_gh;
+ globalHistory = correct_gh;
// Get the local predictor's current prediction, remove the incorrect
// update, and update the local predictor
local_history_idx = calcLocHistIdx(branch_addr);
- local_predictor_idx = local_history_table[local_history_idx];
+ local_predictor_idx = localHistoryTable[local_history_idx];
local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask;
- local_prediction = local_ctrs[local_predictor_idx].read();
+ local_prediction = localCtrs[local_predictor_idx].read();
local_pred_taken = local_prediction > threshold;
//Get the global predictor's current prediction, and update the
//global predictor
- global_prediction = global_ctrs[global_history].read();
+ global_prediction = globalCtrs[globalHistory].read();
global_pred_taken = global_prediction > threshold;
//Update the choice predictor to tell it which one was correct
@@ -195,34 +195,34 @@ TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken)
//If the local prediction matches the actual outcome, decerement
//the counter. Otherwise increment the counter.
if (local_pred_taken == taken) {
- choice_ctrs[global_history].decrement();
+ choiceCtrs[globalHistory].decrement();
} else {
- choice_ctrs[global_history].increment();
+ choiceCtrs[globalHistory].increment();
}
}
if (taken) {
- assert(global_history < global_predictor_size &&
- local_predictor_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_predictor_idx < localPredictorSize);
- local_ctrs[local_predictor_idx].increment();
- global_ctrs[global_history].increment();
+ localCtrs[local_predictor_idx].increment();
+ globalCtrs[globalHistory].increment();
- global_history = (global_history << 1) | 1;
- global_history = global_history & globalHistoryMask;
+ globalHistory = (globalHistory << 1) | 1;
+ globalHistory = globalHistory & globalHistoryMask;
- local_history_table[local_history_idx] |= 1;
+ localHistoryTable[local_history_idx] |= 1;
}
else {
- assert(global_history < global_predictor_size &&
- local_predictor_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_predictor_idx < localPredictorSize);
- local_ctrs[local_predictor_idx].decrement();
- global_ctrs[global_history].decrement();
+ localCtrs[local_predictor_idx].decrement();
+ globalCtrs[globalHistory].decrement();
- global_history = (global_history << 1);
- global_history = global_history & globalHistoryMask;
+ globalHistory = (globalHistory << 1);
+ globalHistory = globalHistory & globalHistoryMask;
- local_history_table[local_history_idx] &= ~1;
+ localHistoryTable[local_history_idx] &= ~1;
}
}
diff --git a/cpu/beta_cpu/tournament_pred.hh b/cpu/beta_cpu/tournament_pred.hh
index 1512abc78..563da6f23 100644
--- a/cpu/beta_cpu/tournament_pred.hh
+++ b/cpu/beta_cpu/tournament_pred.hh
@@ -37,7 +37,7 @@ class TournamentBP
*/
void update(Addr &branch_addr, unsigned global_history, bool taken);
- inline unsigned readGlobalHist() { return global_history; }
+ inline unsigned readGlobalHist() { return globalHistory; }
private:
@@ -50,56 +50,56 @@ class TournamentBP
inline void updateHistoriesNotTaken(unsigned local_history_idx);
/** Local counters. */
- SatCounter *local_ctrs;
+ SatCounter *localCtrs;
/** Size of the local predictor. */
- unsigned local_predictor_size;
+ unsigned localPredictorSize;
/** Number of bits of the local predictor's counters. */
- unsigned local_ctr_bits;
+ unsigned localCtrBits;
/** Array of local history table entries. */
- unsigned *local_history_table;
+ unsigned *localHistoryTable;
/** Size of the local history table. */
- unsigned local_history_table_size;
+ unsigned localHistoryTableSize;
/** Number of bits for each entry of the local history table.
* @todo Doesn't this come from the size of the local predictor?
*/
- unsigned local_history_bits;
+ unsigned localHistoryBits;
/** Mask to get the proper local history. */
unsigned localHistoryMask;
/** Array of counters that make up the global predictor. */
- SatCounter *global_ctrs;
+ SatCounter *globalCtrs;
/** Size of the global predictor. */
- unsigned global_predictor_size;
+ unsigned globalPredictorSize;
/** Number of bits of the global predictor's counters. */
- unsigned global_ctr_bits;
+ unsigned globalCtrBits;
/** Global history register. */
- unsigned global_history;
+ unsigned globalHistory;
/** Number of bits for the global history. */
- unsigned global_history_bits;
+ unsigned globalHistoryBits;
/** Mask to get the proper global history. */
unsigned globalHistoryMask;
/** Array of counters that make up the choice predictor. */
- SatCounter *choice_ctrs;
+ SatCounter *choiceCtrs;
/** Size of the choice predictor (identical to the global predictor). */
- unsigned choice_predictor_size;
+ unsigned choicePredictorSize;
/** Number of bits of the choice predictor's counters. */
- unsigned choice_ctr_bits;
+ unsigned choiceCtrBits;
/** Number of bits to shift the instruction over to get rid of the word
* offset.