summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/2bit_local_pred.cc25
-rw-r--r--src/cpu/o3/2bit_local_pred.hh22
-rw-r--r--src/cpu/o3/alpha_cpu.hh105
-rw-r--r--src/cpu/o3/alpha_cpu_builder.cc3
-rw-r--r--src/cpu/o3/alpha_cpu_impl.hh48
-rw-r--r--src/cpu/o3/alpha_dyn_inst.hh8
-rw-r--r--src/cpu/o3/alpha_dyn_inst_impl.hh16
-rw-r--r--src/cpu/o3/alpha_params.hh3
-rw-r--r--src/cpu/o3/bpred_unit.cc6
-rw-r--r--src/cpu/o3/bpred_unit.hh64
-rw-r--r--src/cpu/o3/bpred_unit_impl.hh182
-rw-r--r--src/cpu/o3/comm.hh11
-rw-r--r--src/cpu/o3/commit.hh40
-rw-r--r--src/cpu/o3/commit_impl.hh7
-rw-r--r--src/cpu/o3/cpu.hh67
-rw-r--r--src/cpu/o3/cpu_policy.hh30
-rw-r--r--src/cpu/o3/decode.hh3
-rw-r--r--src/cpu/o3/decode_impl.hh33
-rw-r--r--src/cpu/o3/dep_graph.hh22
-rw-r--r--src/cpu/o3/fetch.hh23
-rw-r--r--src/cpu/o3/fetch_impl.hh40
-rw-r--r--src/cpu/o3/fu_pool.cc2
-rw-r--r--src/cpu/o3/fu_pool.hh3
-rw-r--r--src/cpu/o3/iew.hh23
-rw-r--r--src/cpu/o3/iew_impl.hh13
-rw-r--r--src/cpu/o3/inst_queue.hh34
-rw-r--r--src/cpu/o3/inst_queue_impl.hh61
-rw-r--r--src/cpu/o3/lsq.hh7
-rw-r--r--src/cpu/o3/lsq_unit.hh9
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh7
-rw-r--r--src/cpu/o3/mem_dep_unit.hh13
-rw-r--r--src/cpu/o3/mem_dep_unit_impl.hh6
-rw-r--r--src/cpu/o3/rename.hh8
-rw-r--r--src/cpu/o3/rename_impl.hh2
-rw-r--r--src/cpu/o3/rename_map.hh7
-rw-r--r--src/cpu/o3/rob.hh3
-rw-r--r--src/cpu/o3/store_set.cc25
-rw-r--r--src/cpu/o3/store_set.hh40
-rw-r--r--src/cpu/o3/thread_state.hh29
-rw-r--r--src/cpu/o3/tournament_pred.cc232
-rw-r--r--src/cpu/o3/tournament_pred.hh90
41 files changed, 1028 insertions, 344 deletions
diff --git a/src/cpu/o3/2bit_local_pred.cc b/src/cpu/o3/2bit_local_pred.cc
index 3d6a78bed..77a45ea26 100644
--- a/src/cpu/o3/2bit_local_pred.cc
+++ b/src/cpu/o3/2bit_local_pred.cc
@@ -33,9 +33,9 @@
#include "base/trace.hh"
#include "cpu/o3/2bit_local_pred.hh"
-DefaultBP::DefaultBP(unsigned _localPredictorSize,
- unsigned _localCtrBits,
- unsigned _instShiftAmt)
+LocalBP::LocalBP(unsigned _localPredictorSize,
+ unsigned _localCtrBits,
+ unsigned _instShiftAmt)
: localPredictorSize(_localPredictorSize),
localCtrBits(_localCtrBits),
instShiftAmt(_instShiftAmt)
@@ -71,7 +71,7 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize,
}
void
-DefaultBP::reset()
+LocalBP::reset()
{
for (int i = 0; i < localPredictorSets; ++i) {
localCtrs[i].reset();
@@ -79,21 +79,21 @@ DefaultBP::reset()
}
bool
-DefaultBP::lookup(Addr &branch_addr)
+LocalBP::lookup(Addr &branch_addr, void * &bp_history)
{
bool taken;
- uint8_t local_prediction;
+ uint8_t counter_val;
unsigned local_predictor_idx = getLocalIndex(branch_addr);
DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n",
local_predictor_idx);
- local_prediction = localCtrs[local_predictor_idx].read();
+ counter_val = localCtrs[local_predictor_idx].read();
DPRINTF(Fetch, "Branch predictor: prediction is %i.\n",
- (int)local_prediction);
+ (int)counter_val);
- taken = getPrediction(local_prediction);
+ taken = getPrediction(counter_val);
#if 0
// Speculative update.
@@ -110,8 +110,9 @@ DefaultBP::lookup(Addr &branch_addr)
}
void
-DefaultBP::update(Addr &branch_addr, bool taken)
+LocalBP::update(Addr &branch_addr, bool taken, void *bp_history)
{
+ assert(bp_history == NULL);
unsigned local_predictor_idx;
// Update the local predictor.
@@ -131,7 +132,7 @@ DefaultBP::update(Addr &branch_addr, bool taken)
inline
bool
-DefaultBP::getPrediction(uint8_t &count)
+LocalBP::getPrediction(uint8_t &count)
{
// Get the MSB of the count
return (count >> (localCtrBits - 1));
@@ -139,7 +140,7 @@ DefaultBP::getPrediction(uint8_t &count)
inline
unsigned
-DefaultBP::getLocalIndex(Addr &branch_addr)
+LocalBP::getLocalIndex(Addr &branch_addr)
{
return (branch_addr >> instShiftAmt) & indexMask;
}
diff --git a/src/cpu/o3/2bit_local_pred.hh b/src/cpu/o3/2bit_local_pred.hh
index 6e02a49be..0a2a71d3e 100644
--- a/src/cpu/o3/2bit_local_pred.hh
+++ b/src/cpu/o3/2bit_local_pred.hh
@@ -37,7 +37,14 @@
#include <vector>
-class DefaultBP
+/**
+ * Implements a local predictor that uses the PC to index into a table of
+ * counters. Note that any time a pointer to the bp_history is given, it
+ * should be NULL using this predictor because it does not have any branch
+ * predictor state that needs to be recorded or updated; the update can be
+ * determined solely by the branch being taken or not taken.
+ */
+class LocalBP
{
public:
/**
@@ -46,28 +53,31 @@ class DefaultBP
* @param localCtrBits Number of bits per counter.
* @param instShiftAmt Offset amount for instructions to ignore alignment.
*/
- DefaultBP(unsigned localPredictorSize, unsigned localCtrBits,
- unsigned instShiftAmt);
+ LocalBP(unsigned localPredictorSize, unsigned localCtrBits,
+ unsigned instShiftAmt);
/**
* Looks up the given address in the branch predictor and returns
* a true/false value as to whether it is taken.
* @param branch_addr The address of the branch to look up.
+ * @param bp_history Pointer to any bp history state.
* @return Whether or not the branch is taken.
*/
- bool lookup(Addr &branch_addr);
+ bool lookup(Addr &branch_addr, void * &bp_history);
/**
* Updates the branch predictor with the actual result of a branch.
* @param branch_addr The address of the branch to update.
* @param taken Whether or not the branch was taken.
*/
- void update(Addr &branch_addr, bool taken);
+ void update(Addr &branch_addr, bool taken, void *bp_history);
+
+ void squash(void *bp_history)
+ { assert(bp_history == NULL); }
void reset();
private:
-
/**
* Returns the taken/not taken prediction given the value of the
* counter.
diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh
index 4d889866a..2e5c856a8 100644
--- a/src/cpu/o3/alpha_cpu.hh
+++ b/src/cpu/o3/alpha_cpu.hh
@@ -43,6 +43,14 @@ namespace Kernel {
class TranslatingPort;
+/**
+ * AlphaFullCPU class. Derives from the FullO3CPU class, and
+ * implements all ISA and implementation specific functions of the
+ * CPU. This is the CPU class that is used for the SimObjects, and is
+ * what is given to the DynInsts. Most of its state exists in the
+ * FullO3CPU; the state is has is mainly for ISA specific
+ * functionality.
+ */
template <class Impl>
class AlphaFullCPU : public FullO3CPU<Impl>
{
@@ -62,83 +70,120 @@ class AlphaFullCPU : public FullO3CPU<Impl>
/** Constructs an AlphaFullCPU with the given parameters. */
AlphaFullCPU(Params *params);
+ /**
+ * Derived ExecContext class for use with the AlphaFullCPU. It
+ * provides the interface for any external objects to access a
+ * single thread's state and some general CPU state. Any time
+ * external objects try to update state through this interface,
+ * the CPU will create an event to squash all in-flight
+ * instructions in order to ensure state is maintained correctly.
+ */
class AlphaXC : public ExecContext
{
public:
+ /** Pointer to the CPU. */
AlphaFullCPU<Impl> *cpu;
+ /** Pointer to the thread state that this XC corrseponds to. */
O3ThreadState<Impl> *thread;
+ /** Returns a pointer to this CPU. */
virtual BaseCPU *getCpuPtr() { return cpu; }
+ /** Sets this CPU's ID. */
virtual void setCpuId(int id) { cpu->cpu_id = id; }
+ /** Reads this CPU's ID. */
virtual int readCpuId() { return cpu->cpu_id; }
virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; }
#if FULL_SYSTEM
+ /** Returns a pointer to the system. */
virtual System *getSystemPtr() { return cpu->system; }
+ /** Returns a pointer to physical memory. */
virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
+ /** Returns a pointer to the ITB. */
virtual AlphaITB *getITBPtr() { return cpu->itb; }
- virtual AlphaDTB * getDTBPtr() { return cpu->dtb; }
+ /** Returns a pointer to the DTB. */
+ virtual AlphaDTB *getDTBPtr() { return cpu->dtb; }
+ /** Returns a pointer to this thread's kernel statistics. */
virtual Kernel::Statistics *getKernelStats()
{ return thread->kernelStats; }
#else
+ /** Returns a pointer to this thread's process. */
virtual Process *getProcessPtr() { return thread->process; }
#endif
-
+ /** Returns this thread's status. */
virtual Status status() const { return thread->status(); }
+ /** Sets this thread's status. */
virtual void setStatus(Status new_status)
{ thread->setStatus(new_status); }
- /// Set the status to Active. Optional delay indicates number of
- /// cycles to wait before beginning execution.
+ /** Set the status to Active. Optional delay indicates number of
+ * cycles to wait before beginning execution. */
virtual void activate(int delay = 1);
- /// Set the status to Suspended.
+ /** Set the status to Suspended. */
virtual void suspend();
- /// Set the status to Unallocated.
+ /** Set the status to Unallocated. */
virtual void deallocate();
- /// Set the status to Halted.
+ /** Set the status to Halted. */
virtual void halt();
#if FULL_SYSTEM
+ /** Dumps the function profiling information.
+ * @todo: Implement.
+ */
virtual void dumpFuncProfile();
#endif
-
+ /** Takes over execution of a thread from another CPU. */
virtual void takeOverFrom(ExecContext *old_context);
+ /** Registers statistics associated with this XC. */
virtual void regStats(const std::string &name);
+ /** Serializes state. */
virtual void serialize(std::ostream &os);
+ /** Unserializes state. */
virtual void unserialize(Checkpoint *cp, const std::string &section);
#if FULL_SYSTEM
+ /** Returns pointer to the quiesce event. */
virtual EndQuiesceEvent *getQuiesceEvent();
+ /** Reads the last tick that this thread was activated on. */
virtual Tick readLastActivate();
+ /** Reads the last tick that this thread was suspended on. */
virtual Tick readLastSuspend();
+ /** Clears the function profiling information. */
virtual void profileClear();
+ /** Samples the function profiling information. */
virtual void profileSample();
#endif
-
+ /** Returns this thread's ID number. */
virtual int getThreadNum() { return thread->tid; }
+ /** Returns the instruction this thread is currently committing.
+ * Only used when an instruction faults.
+ */
virtual TheISA::MachInst getInst();
+ /** Copies the architectural registers from another XC into this XC. */
virtual void copyArchRegs(ExecContext *xc);
+ /** Resets all architectural registers to 0. */
virtual void clearArchRegs();
+ /** Reads an integer register. */
virtual uint64_t readIntReg(int reg_idx);
virtual FloatReg readFloatReg(int reg_idx, int width);
@@ -149,6 +194,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual FloatRegBits readFloatRegBits(int reg_idx);
+ /** Sets an integer register to a value. */
virtual void setIntReg(int reg_idx, uint64_t val);
virtual void setFloatReg(int reg_idx, FloatReg val, int width);
@@ -159,14 +205,18 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual void setFloatRegBits(int reg_idx, FloatRegBits val);
+ /** Reads this thread's PC. */
virtual uint64_t readPC()
{ return cpu->readPC(thread->tid); }
+ /** Sets this thread's PC. */
virtual void setPC(uint64_t val);
+ /** Reads this thread's next PC. */
virtual uint64_t readNextPC()
{ return cpu->readNextPC(thread->tid); }
+ /** Sets this thread's next PC. */
virtual void setNextPC(uint64_t val);
virtual uint64_t readNextNPC()
@@ -178,43 +228,60 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual void setNextNPC(uint64_t val)
{ panic("Alpha has no NextNPC!"); }
+ /** Reads a miscellaneous register. */
virtual MiscReg readMiscReg(int misc_reg)
{ return cpu->readMiscReg(misc_reg, thread->tid); }
+ /** Reads a misc. register, including any side-effects the
+ * read might have as defined by the architecture. */
virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
{ return cpu->readMiscRegWithEffect(misc_reg, fault, thread->tid); }
+ /** Sets a misc. register. */
virtual Fault setMiscReg(int misc_reg, const MiscReg &val);
+ /** Sets a misc. register, including any side-effects the
+ * write might have as defined by the architecture. */
virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
+ /** Returns the number of consecutive store conditional failures. */
// @todo: Figure out where these store cond failures should go.
virtual unsigned readStCondFailures()
{ return thread->storeCondFailures; }
+ /** Sets the number of consecutive store conditional failures. */
virtual void setStCondFailures(unsigned sc_failures)
{ thread->storeCondFailures = sc_failures; }
#if FULL_SYSTEM
+ /** Returns if the thread is currently in PAL mode, based on
+ * the PC's value. */
virtual bool inPalMode()
{ return TheISA::PcPAL(cpu->readPC(thread->tid)); }
#endif
-
// Only really makes sense for old CPU model. Lots of code
// outside the CPU still checks this function, so it will
// always return false to keep everything working.
+ /** Checks if the thread is misspeculating. Because it is
+ * very difficult to determine if the thread is
+ * misspeculating, this is set as false. */
virtual bool misspeculating() { return false; }
#if !FULL_SYSTEM
+ /** Gets a syscall argument by index. */
virtual IntReg getSyscallArg(int i);
+ /** Sets a syscall argument. */
virtual void setSyscallArg(int i, IntReg val);
+ /** Sets the syscall return value. */
virtual void setSyscallReturn(SyscallReturn return_value);
+ /** Executes a syscall in SE mode. */
virtual void syscall(int64_t callnum)
{ return cpu->syscall(callnum, thread->tid); }
+ /** Reads the funcExeInst counter. */
virtual Counter readFuncExeInst() { return thread->funcExeInst; }
#endif
virtual void changeRegFileContext(TheISA::RegFile::ContextParam param,
@@ -274,19 +341,32 @@ class AlphaFullCPU : public FullO3CPU<Impl>
}
#endif
+ /** Reads a miscellaneous register. */
MiscReg readMiscReg(int misc_reg, unsigned tid);
+ /** Reads a misc. register, including any side effects the read
+ * might have as defined by the architecture.
+ */
MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid);
+ /** Sets a miscellaneous register. */
Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
+ /** Sets a misc. register, including any side effects the write
+ * might have as defined by the architecture.
+ */
Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
+ /** Initiates a squash of all in-flight instructions for a given
+ * thread. The source of the squash is an external update of
+ * state through the XC.
+ */
void squashFromXC(unsigned tid);
#if FULL_SYSTEM
+ /** Posts an interrupt. */
void post_interrupt(int int_num, int index);
-
+ /** Reads the interrupt flag. */
int readIntrFlag();
/** Sets the interrupt flags. */
void setIntrFlag(int val);
@@ -312,7 +392,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
/** Executes a syscall.
* @todo: Determine if this needs to be virtual.
*/
- void syscall(int64_t callnum, int thread_num);
+ void syscall(int64_t callnum, int tid);
/** Gets a syscall argument. */
IntReg getSyscallArg(int i, int tid);
@@ -438,6 +518,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
Addr lockAddr;
+ /** Temporary fix for the lock flag, works in the UP case. */
bool lockFlag;
};
diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc
index c91e2c7c9..1592261de 100644
--- a/src/cpu/o3/alpha_cpu_builder.cc
+++ b/src/cpu/o3/alpha_cpu_builder.cc
@@ -107,6 +107,7 @@ Param<unsigned> squashWidth;
Param<Tick> trapLatency;
Param<Tick> fetchTrapLatency;
+Param<std::string> predType;
Param<unsigned> localPredictorSize;
Param<unsigned> localCtrBits;
Param<unsigned> localHistoryTableSize;
@@ -229,6 +230,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
+ INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
INIT_PARAM(localPredictorSize, "Size of local predictor"),
INIT_PARAM(localCtrBits, "Bits per counter"),
INIT_PARAM(localHistoryTableSize, "Size of local history table"),
@@ -359,6 +361,7 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
params->trapLatency = trapLatency;
params->fetchTrapLatency = fetchTrapLatency;
+ params->predType = predType;
params->localPredictorSize = localPredictorSize;
params->localCtrBits = localCtrBits;
params->localHistoryTableSize = localHistoryTableSize;
diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh
index 6893f8c64..ad4401f7e 100644
--- a/src/cpu/o3/alpha_cpu_impl.hh
+++ b/src/cpu/o3/alpha_cpu_impl.hh
@@ -60,10 +60,12 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
{
DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n");
+ // Setup any thread state.
this->thread.resize(this->numThreads);
for (int i = 0; i < this->numThreads; ++i) {
#if FULL_SYSTEM
+ // SMT is not supported in FS mode yet.
assert(this->numThreads == 1);
this->thread[i] = new Thread(this, 0, params->mem);
this->thread[i]->setStatus(ExecContext::Suspended);
@@ -86,29 +88,34 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
}
#endif // !FULL_SYSTEM
- this->thread[i]->numInst = 0;
-
ExecContext *xc_proxy;
- AlphaXC *alpha_xc_proxy = new AlphaXC;
+ // Setup the XC that will serve as the interface to the threads/CPU.
+ AlphaXC *alpha_xc = new AlphaXC;
+ // If we're using a checker, then the XC should be the
+ // CheckerExecContext.
if (params->checker) {
- xc_proxy = new CheckerExecContext<AlphaXC>(alpha_xc_proxy, this->checker);
+ xc_proxy = new CheckerExecContext<AlphaXC>(
+ alpha_xc, this->checker);
} else {
- xc_proxy = alpha_xc_proxy;
+ xc_proxy = alpha_xc;
}
- alpha_xc_proxy->cpu = this;
- alpha_xc_proxy->thread = this->thread[i];
+ alpha_xc->cpu = this;
+ alpha_xc->thread = this->thread[i];
#if FULL_SYSTEM
+ // Setup quiesce event.
this->thread[i]->quiesceEvent =
new EndQuiesceEvent(xc_proxy);
this->thread[i]->lastActivate = 0;
this->thread[i]->lastSuspend = 0;
#endif
+ // Give the thread the XC.
this->thread[i]->xcProxy = xc_proxy;
+ // Add the XC to the CPU's list of XC's.
this->execContexts.push_back(xc_proxy);
}
@@ -170,6 +177,7 @@ AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
setStatus(old_context->status());
copyArchRegs(old_context);
setCpuId(old_context->readCpuId());
+
#if !FULL_SYSTEM
thread->funcExeInst = old_context->readFuncExeInst();
#else
@@ -391,7 +399,6 @@ template <class Impl>
uint64_t
AlphaFullCPU<Impl>::AlphaXC::readIntReg(int reg_idx)
{
- DPRINTF(Fault, "Reading int register through the XC!\n");
return cpu->readArchIntReg(reg_idx, thread->tid);
}
@@ -399,7 +406,6 @@ template <class Impl>
FloatReg
AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx, int width)
{
- DPRINTF(Fault, "Reading float register through the XC!\n");
switch(width) {
case 32:
return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
@@ -415,7 +421,6 @@ template <class Impl>
FloatReg
AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx)
{
- DPRINTF(Fault, "Reading float register through the XC!\n");
return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
}
@@ -431,7 +436,6 @@ template <class Impl>
FloatRegBits
AlphaFullCPU<Impl>::AlphaXC::readFloatRegBits(int reg_idx)
{
- DPRINTF(Fault, "Reading floatint register through the XC!\n");
return cpu->readArchFloatRegInt(reg_idx, thread->tid);
}
@@ -439,9 +443,9 @@ template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::setIntReg(int reg_idx, uint64_t val)
{
- DPRINTF(Fault, "Setting int register through the XC!\n");
cpu->setArchIntReg(reg_idx, val, thread->tid);
+ // Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@@ -451,7 +455,6 @@ template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val, int width)
{
- DPRINTF(Fault, "Setting float register through the XC!\n");
switch(width) {
case 32:
cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
@@ -461,6 +464,7 @@ AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val, int width)
break;
}
+ // Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@@ -470,7 +474,6 @@ template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val)
{
- DPRINTF(Fault, "Setting float register through the XC!\n");
cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
if (!thread->trapPending && !thread->inSyscall) {
@@ -486,6 +489,7 @@ AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val,
DPRINTF(Fault, "Setting floatint register through the XC!\n");
cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
+ // Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@@ -495,9 +499,9 @@ template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val)
{
- DPRINTF(Fault, "Setting floatint register through the XC!\n");
cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
+ // Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@@ -509,6 +513,7 @@ AlphaFullCPU<Impl>::AlphaXC::setPC(uint64_t val)
{
cpu->setPC(val, thread->tid);
+ // Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@@ -520,6 +525,7 @@ AlphaFullCPU<Impl>::AlphaXC::setNextPC(uint64_t val)
{
cpu->setNextPC(val, thread->tid);
+ // Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@@ -529,10 +535,9 @@ template <class Impl>
Fault
AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val)
{
- DPRINTF(Fault, "Setting misc register through the XC!\n");
-
Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->tid);
+ // Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@@ -542,12 +547,12 @@ AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val)
template <class Impl>
Fault
-AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg,
+ const MiscReg &val)
{
- DPRINTF(Fault, "Setting misc register through the XC!\n");
-
Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, thread->tid);
+ // Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@@ -628,7 +633,6 @@ AlphaFullCPU<Impl>::post_interrupt(int int_num, int index)
if (this->thread[0]->status() == ExecContext::Suspended) {
DPRINTF(IPI,"Suspended Processor awoke\n");
-// xcProxies[0]->activate();
this->execContexts[0]->activate();
}
}
@@ -691,6 +695,7 @@ template <class Impl>
void
AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid)
{
+ // Pass the thread's XC into the invoke method.
fault->invoke(this->execContexts[tid]);
}
@@ -741,6 +746,7 @@ AlphaFullCPU<Impl>::processInterrupts()
if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) {
this->setMiscReg(IPR_ISR, summary, 0);
this->setMiscReg(IPR_INTID, ipl, 0);
+ // Checker needs to know these two registers were updated.
if (this->checker) {
this->checker->cpuXCBase()->setMiscReg(IPR_ISR, summary);
this->checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl);
diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha_dyn_inst.hh
index af2858802..143ffe7e4 100644
--- a/src/cpu/o3/alpha_dyn_inst.hh
+++ b/src/cpu/o3/alpha_dyn_inst.hh
@@ -93,23 +93,31 @@ class AlphaDynInst : public BaseDynInst<Impl>
void initVars();
public:
+ /** Reads a miscellaneous register. */
MiscReg readMiscReg(int misc_reg)
{
return this->cpu->readMiscReg(misc_reg, this->threadNumber);
}
+ /** Reads a misc. register, including any side-effects the read
+ * might have as defined by the architecture.
+ */
MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
{
return this->cpu->readMiscRegWithEffect(misc_reg, fault,
this->threadNumber);
}
+ /** Sets a misc. register. */
Fault setMiscReg(int misc_reg, const MiscReg &val)
{
this->instResult.integer = val;
return this->cpu->setMiscReg(misc_reg, val, this->threadNumber);
}
+ /** Sets a misc. register, including any side-effects the write
+ * might have as defined by the architecture.
+ */
Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
{
return this->cpu->setMiscRegWithEffect(misc_reg, val,
diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha_dyn_inst_impl.hh
index 06755eb76..3a0727b45 100644
--- a/src/cpu/o3/alpha_dyn_inst_impl.hh
+++ b/src/cpu/o3/alpha_dyn_inst_impl.hh
@@ -66,9 +66,10 @@ template <class Impl>
Fault
AlphaDynInst<Impl>::execute()
{
- // @todo: Pretty convoluted way to avoid squashing from happening when using
- // the XC during an instruction's execution (specifically for instructions
- // that have sideeffects that use the XC). Fix this.
+ // @todo: Pretty convoluted way to avoid squashing from happening
+ // when using the XC during an instruction's execution
+ // (specifically for instructions that have side-effects that use
+ // the XC). Fix this.
bool in_syscall = this->thread->inSyscall;
this->thread->inSyscall = true;
@@ -83,9 +84,10 @@ template <class Impl>
Fault
AlphaDynInst<Impl>::initiateAcc()
{
- // @todo: Pretty convoluted way to avoid squashing from happening when using
- // the XC during an instruction's execution (specifically for instructions
- // that have sideeffects that use the XC). Fix this.
+ // @todo: Pretty convoluted way to avoid squashing from happening
+ // when using the XC during an instruction's execution
+ // (specifically for instructions that have side-effects that use
+ // the XC). Fix this.
bool in_syscall = this->thread->inSyscall;
this->thread->inSyscall = true;
@@ -118,9 +120,11 @@ template <class Impl>
Fault
AlphaDynInst<Impl>::hwrei()
{
+ // Can only do a hwrei when in pal mode.
if (!this->cpu->inPalMode(this->readPC()))
return new AlphaISA::UnimplementedOpcodeFault;
+ // Set the next PC based on the value of the EXC_ADDR IPR.
this->setNextPC(this->cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR,
this->threadNumber));
diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh
index 8c6779495..e48abd9ed 100644
--- a/src/cpu/o3/alpha_params.hh
+++ b/src/cpu/o3/alpha_params.hh
@@ -126,8 +126,9 @@ class AlphaSimpleParams : public BaseFullCPU::Params
Tick fetchTrapLatency;
//
- // Branch predictor (BP & BTB)
+ // Branch predictor (BP, BTB, RAS)
//
+ std::string predType;
unsigned localPredictorSize;
unsigned localCtrBits;
unsigned localHistoryTableSize;
diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc
index d0af5af92..b33543bdc 100644
--- a/src/cpu/o3/bpred_unit.cc
+++ b/src/cpu/o3/bpred_unit.cc
@@ -34,6 +34,6 @@
#include "cpu/ozone/ozone_impl.hh"
//#include "cpu/ozone/simple_impl.hh"
-template class TwobitBPredUnit<AlphaSimpleImpl>;
-template class TwobitBPredUnit<OzoneImpl>;
-//template class TwobitBPredUnit<SimpleImpl>;
+template class BPredUnit<AlphaSimpleImpl>;
+template class BPredUnit<OzoneImpl>;
+//template class BPredUnit<SimpleImpl>;
diff --git a/src/cpu/o3/bpred_unit.hh b/src/cpu/o3/bpred_unit.hh
index 39a88a3ce..2c0a39565 100644
--- a/src/cpu/o3/bpred_unit.hh
+++ b/src/cpu/o3/bpred_unit.hh
@@ -48,16 +48,25 @@
* and the BTB.
*/
template<class Impl>
-class TwobitBPredUnit
+class BPredUnit
{
- public:
+ private:
typedef typename Impl::Params Params;
typedef typename Impl::DynInstPtr DynInstPtr;
+ enum PredType {
+ Local,
+ Tournament
+ };
+
+ PredType predictor;
+
+ public:
+
/**
* @param params The params object, that has the size of the BP and BTB.
*/
- TwobitBPredUnit(Params *params);
+ BPredUnit(Params *params);
/**
* Registers statistics.
@@ -78,6 +87,9 @@ class TwobitBPredUnit
*/
bool predict(DynInstPtr &inst, Addr &PC, unsigned tid);
+ // @todo: Rename this function.
+ void BPUncond(void * &bp_history);
+
/**
* Tells the branch predictor to commit any updates until the given
* sequence number.
@@ -107,12 +119,19 @@ class TwobitBPredUnit
bool actually_taken, unsigned tid);
/**
+ * @param bp_history Pointer to the history object. The predictor
+ * will need to update any state and delete the object.
+ */
+ void BPSquash(void *bp_history);
+
+ /**
* Looks up a given PC in the BP to see if it is taken or not taken.
* @param inst_PC The PC to look up.
+ * @param bp_history Pointer that will be set to an object that
+ * has the branch predictor state associated with the lookup.
* @return Whether the branch is taken or not taken.
*/
- bool BPLookup(Addr &inst_PC)
- { return BP.lookup(inst_PC); }
+ bool BPLookup(Addr &inst_PC, void * &bp_history);
/**
* Looks up a given PC in the BTB to see if a matching entry exists.
@@ -134,10 +153,11 @@ class TwobitBPredUnit
* Updates the BP with taken/not taken information.
* @param inst_PC The branch's PC that will be updated.
* @param taken Whether the branch was taken or not taken.
+ * @param bp_history Pointer to the branch predictor state that is
+ * associated with the branch lookup that is being updated.
* @todo Make this update flexible enough to handle a global predictor.
*/
- void BPUpdate(Addr &inst_PC, bool taken)
- { BP.update(inst_PC, taken); }
+ void BPUpdate(Addr &inst_PC, bool taken, void *bp_history);
/**
* Updates the BTB with the target of a branch.
@@ -147,18 +167,20 @@ class TwobitBPredUnit
void BTBUpdate(Addr &inst_PC, Addr &target_PC)
{ BTB.update(inst_PC, target_PC,0); }
+ void dump();
+
private:
struct PredictorHistory {
/**
- * Makes a predictor history struct that contains a sequence number,
- * the PC of its instruction, and whether or not it was predicted
- * taken.
+ * Makes a predictor history struct that contains any
+ * information needed to update the predictor, BTB, and RAS.
*/
PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC,
- const bool pred_taken, const unsigned _tid)
- : seqNum(seq_num), PC(inst_PC), RASTarget(0), globalHistory(0),
+ const bool pred_taken, void *bp_history,
+ const unsigned _tid)
+ : seqNum(seq_num), PC(inst_PC), RASTarget(0),
RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0),
- wasCall(0)
+ wasCall(0), bpHistory(bp_history)
{ }
/** The sequence number for the predictor history entry. */
@@ -170,9 +192,6 @@ class TwobitBPredUnit
/** The RAS target (only valid if a return). */
Addr RASTarget;
- /** The global history at the time this entry was created. */
- unsigned globalHistory;
-
/** The RAS index of the instruction (only valid if a call). */
unsigned RASIndex;
@@ -187,6 +206,12 @@ class TwobitBPredUnit
/** Whether or not the instruction was a call. */
bool wasCall;
+
+ /** Pointer to the history object passed back from the branch
+ * predictor. It is used to update or restore state of the
+ * branch predictor.
+ */
+ void *bpHistory;
};
typedef std::list<PredictorHistory> History;
@@ -198,8 +223,11 @@ class TwobitBPredUnit
*/
History predHist[Impl::MaxThreads];
- /** The branch predictor. */
- DefaultBP BP;
+ /** The local branch predictor. */
+ LocalBP *localBP;
+
+ /** The tournament branch predictor. */
+ TournamentBP *tournamentBP;
/** The BTB. */
DefaultBTB BTB;
diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh
index cde9f28ab..0da02145b 100644
--- a/src/cpu/o3/bpred_unit_impl.hh
+++ b/src/cpu/o3/bpred_unit_impl.hh
@@ -38,21 +38,40 @@
using namespace std;
template<class Impl>
-TwobitBPredUnit<Impl>::TwobitBPredUnit(Params *params)
- : BP(params->localPredictorSize,
- params->localCtrBits,
- params->instShiftAmt),
- BTB(params->BTBEntries,
+BPredUnit<Impl>::BPredUnit(Params *params)
+ : BTB(params->BTBEntries,
params->BTBTagSize,
params->instShiftAmt)
{
+ // Setup the selected predictor.
+ if (params->predType == "local") {
+ localBP = new LocalBP(params->localPredictorSize,
+ params->localCtrBits,
+ params->instShiftAmt);
+ predictor = Local;
+ } else if (params->predType == "tournament") {
+ tournamentBP = new TournamentBP(params->localPredictorSize,
+ params->localCtrBits,
+ params->localHistoryTableSize,
+ params->localHistoryBits,
+ params->globalPredictorSize,
+ params->globalHistoryBits,
+ params->globalCtrBits,
+ params->choicePredictorSize,
+ params->choiceCtrBits,
+ params->instShiftAmt);
+ predictor = Tournament;
+ } else {
+ fatal("Invalid BP selected!");
+ }
+
for (int i=0; i < Impl::MaxThreads; i++)
RAS[i].init(params->RASSize);
}
template <class Impl>
void
-TwobitBPredUnit<Impl>::regStats()
+BPredUnit<Impl>::regStats()
{
lookups
.name(name() + ".BPredUnit.lookups")
@@ -98,17 +117,20 @@ TwobitBPredUnit<Impl>::regStats()
template <class Impl>
void
-TwobitBPredUnit<Impl>::switchOut()
+BPredUnit<Impl>::switchOut()
{
+ // Clear any state upon switch out.
for (int i = 0; i < Impl::MaxThreads; ++i) {
- predHist[i].clear();
+ squash(0, i);
}
}
template <class Impl>
void
-TwobitBPredUnit<Impl>::takeOverFrom()
+BPredUnit<Impl>::takeOverFrom()
{
+ // Can reset all predictor state, but it's not necessarily better
+ // than leaving it be.
/*
for (int i = 0; i < Impl::MaxThreads; ++i)
RAS[i].reset();
@@ -120,11 +142,10 @@ TwobitBPredUnit<Impl>::takeOverFrom()
template <class Impl>
bool
-TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
+BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
{
// See if branch predictor predicts taken.
// If so, get its target addr either from the BTB or the RAS.
- // Once that's done, speculatively update the predictor?
// Save off record of branch stuff so the RAS can be fixed
// up once it's done.
@@ -135,20 +156,25 @@ TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
++lookups;
+ void *bp_history = NULL;
+
if (inst->isUncondCtrl()) {
DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid);
pred_taken = true;
+ // Tell the BP there was an unconditional branch.
+ BPUncond(bp_history);
} else {
++condPredicted;
- pred_taken = BPLookup(PC);
+ pred_taken = BPLookup(PC, bp_history);
DPRINTF(Fetch, "BranchPred: [tid:%i]: Branch predictor predicted %i "
"for PC %#x\n",
tid, pred_taken, inst->readPC());
}
- PredictorHistory predict_record(inst->seqNum, PC, pred_taken, tid);
+ PredictorHistory predict_record(inst->seqNum, PC, pred_taken,
+ bp_history, tid);
// Now lookup in the BTB or RAS.
if (pred_taken) {
@@ -189,7 +215,7 @@ TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
if (BTB.valid(PC, tid)) {
++BTBHits;
- //If it's anything else, use the BTB to get the target addr.
+ // If it's not a return, use the BTB to get the target addr.
target = BTB.lookup(PC, tid);
DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x predicted"
@@ -223,7 +249,7 @@ TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
template <class Impl>
void
-TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
+BPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
{
DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence"
"number %lli.\n", tid, done_sn);
@@ -231,8 +257,9 @@ TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
while (!predHist[tid].empty() &&
predHist[tid].back().seqNum <= done_sn) {
// Update the branch predictor with the correct results.
- BP.update(predHist[tid].back().PC,
- predHist[tid].back().predTaken);
+ BPUpdate(predHist[tid].back().PC,
+ predHist[tid].back().predTaken,
+ predHist[tid].back().bpHistory);
predHist[tid].pop_back();
}
@@ -240,13 +267,13 @@ TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
template <class Impl>
void
-TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
+BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
{
History &pred_hist = predHist[tid];
while (!pred_hist.empty() &&
pred_hist.front().seqNum > squashed_sn) {
- if (pred_hist.front().usedRAS) {
+ if (pred_hist.front().usedRAS) {
DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i,"
" target: %#x.\n",
tid,
@@ -257,12 +284,15 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
pred_hist.front().RASTarget);
} else if (pred_hist.front().wasCall) {
- DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry added "
- "to the RAS.\n",tid);
+ DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry "
+ "added to the RAS.\n",tid);
RAS[tid].pop();
}
+ // This call should delete the bpHistory.
+ BPSquash(pred_hist.front().bpHistory);
+
pred_hist.pop_front();
}
@@ -270,10 +300,10 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
template <class Impl>
void
-TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
- const Addr &corr_target,
- const bool actually_taken,
- unsigned tid)
+BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
+ const Addr &corr_target,
+ const bool actually_taken,
+ unsigned tid)
{
// Now that we know that a branch was mispredicted, we need to undo
// all the branches that have been seen up until this branch and
@@ -287,40 +317,96 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
"setting target to %#x.\n",
tid, squashed_sn, corr_target);
- while (!pred_hist.empty() &&
- pred_hist.front().seqNum > squashed_sn) {
- if (pred_hist.front().usedRAS) {
- DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i, "
- "target: %#x.\n",
- tid,
- pred_hist.front().RASIndex,
- pred_hist.front().RASTarget);
-
- RAS[tid].restore(pred_hist.front().RASIndex,
- pred_hist.front().RASTarget);
- } else if (pred_hist.front().wasCall) {
- DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry"
- " added to the RAS.\n", tid);
-
- RAS[tid].pop();
- }
-
- pred_hist.pop_front();
- }
+ squash(squashed_sn, tid);
// If there's a squash due to a syscall, there may not be an entry
// corresponding to the squash. In that case, don't bother trying to
// fix up the entry.
if (!pred_hist.empty()) {
- pred_hist.front().predTaken = actually_taken;
-
+ assert(pred_hist.front().seqNum == squashed_sn);
if (pred_hist.front().usedRAS) {
++RASIncorrect;
}
- BP.update(pred_hist.front().PC, actually_taken);
+ BPUpdate(pred_hist.front().PC, actually_taken,
+ pred_hist.front().bpHistory);
BTB.update(pred_hist.front().PC, corr_target, tid);
pred_hist.pop_front();
}
}
+
+template <class Impl>
+void
+BPredUnit<Impl>::BPUncond(void * &bp_history)
+{
+ // Only the tournament predictor cares about unconditional branches.
+ if (predictor == Tournament) {
+ tournamentBP->uncondBr(bp_history);
+ }
+}
+
+template <class Impl>
+void
+BPredUnit<Impl>::BPSquash(void *bp_history)
+{
+ if (predictor == Local) {
+ localBP->squash(bp_history);
+ } else if (predictor == Tournament) {
+ tournamentBP->squash(bp_history);
+ } else {
+ panic("Predictor type is unexpected value!");
+ }
+}
+
+template <class Impl>
+bool
+BPredUnit<Impl>::BPLookup(Addr &inst_PC, void * &bp_history)
+{
+ if (predictor == Local) {
+ return localBP->lookup(inst_PC, bp_history);
+ } else if (predictor == Tournament) {
+ return tournamentBP->lookup(inst_PC, bp_history);
+ } else {
+ panic("Predictor type is unexpected value!");
+ }
+}
+
+template <class Impl>
+void
+BPredUnit<Impl>::BPUpdate(Addr &inst_PC, bool taken, void *bp_history)
+{
+ if (predictor == Local) {
+ localBP->update(inst_PC, taken, bp_history);
+ } else if (predictor == Tournament) {
+ tournamentBP->update(inst_PC, taken, bp_history);
+ } else {
+ panic("Predictor type is unexpected value!");
+ }
+}
+
+template <class Impl>
+void
+BPredUnit<Impl>::dump()
+{
+ typename History::iterator pred_hist_it;
+
+ for (int i = 0; i < Impl::MaxThreads; ++i) {
+ if (!predHist[i].empty()) {
+ pred_hist_it = predHist[i].begin();
+
+ cprintf("predHist[%i].size(): %i\n", i, predHist[i].size());
+
+ while (pred_hist_it != predHist[i].end()) {
+ cprintf("[sn:%lli], PC:%#x, tid:%i, predTaken:%i, "
+ "bpHistory:%#x\n",
+ (*pred_hist_it).seqNum, (*pred_hist_it).PC,
+ (*pred_hist_it).tid, (*pred_hist_it).predTaken,
+ (*pred_hist_it).bpHistory);
+ pred_hist_it++;
+ }
+
+ cprintf("\n");
+ }
+ }
+}
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index 84ad5f6b1..bf1bd08e8 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -43,6 +43,7 @@
// typedef yet are not templated on the Impl. For now it will be defined here.
typedef short int PhysRegIndex;
+/** Struct that defines the information passed from fetch to decode. */
template<class Impl>
struct DefaultFetchDefaultDecode {
typedef typename Impl::DynInstPtr DynInstPtr;
@@ -55,6 +56,7 @@ struct DefaultFetchDefaultDecode {
bool clearFetchFault;
};
+/** Struct that defines the information passed from decode to rename. */
template<class Impl>
struct DefaultDecodeDefaultRename {
typedef typename Impl::DynInstPtr DynInstPtr;
@@ -64,6 +66,7 @@ struct DefaultDecodeDefaultRename {
DynInstPtr insts[Impl::MaxWidth];
};
+/** Struct that defines the information passed from rename to IEW. */
template<class Impl>
struct DefaultRenameDefaultIEW {
typedef typename Impl::DynInstPtr DynInstPtr;
@@ -73,6 +76,7 @@ struct DefaultRenameDefaultIEW {
DynInstPtr insts[Impl::MaxWidth];
};
+/** Struct that defines the information passed from IEW to commit. */
template<class Impl>
struct DefaultIEWDefaultCommit {
typedef typename Impl::DynInstPtr DynInstPtr;
@@ -100,6 +104,7 @@ struct IssueStruct {
DynInstPtr insts[Impl::MaxWidth];
};
+/** Struct that defines all backwards communication. */
template<class Impl>
struct TimeBufStruct {
struct decodeComm {
@@ -121,13 +126,7 @@ struct TimeBufStruct {
decodeComm decodeInfo[Impl::MaxThreads];
- // Rename can't actually tell anything to squash or send a new PC back
- // because it doesn't do anything along those lines. But maybe leave
- // these fields in here to keep the stages mostly orthagonal.
struct renameComm {
- bool squash;
-
- uint64_t nextPC;
};
renameComm renameInfo[Impl::MaxThreads];
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index ae2aa2996..eef96b5fd 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -85,6 +85,9 @@ class DefaultCommit
typedef O3ThreadState<Impl> Thread;
+ /** Event class used to schedule a squash due to a trap (fault or
+ * interrupt) to happen on a specific cycle.
+ */
class TrapEvent : public Event {
private:
DefaultCommit<Impl> *commit;
@@ -162,7 +165,7 @@ class DefaultCommit
Fetch *fetchStage;
- /** Sets the poitner to the IEW stage. */
+ /** Sets the pointer to the IEW stage. */
void setIEWStage(IEW *iew_stage);
/** The pointer to the IEW stage. Used solely to ensure that
@@ -183,10 +186,13 @@ class DefaultCommit
/** Initializes stage by sending back the number of free entries. */
void initStage();
+ /** Initializes the switching out of commit. */
void switchOut();
+ /** Completes the switch out of commit. */
void doSwitchOut();
+ /** Takes over from another CPU's thread. */
void takeOverFrom();
/** Ticks the commit stage, which tries to commit instructions. */
@@ -200,11 +206,18 @@ class DefaultCommit
/** Returns the number of free ROB entries for a specific thread. */
unsigned numROBFreeEntries(unsigned tid);
+ /** Generates an event to schedule a squash due to a trap. */
+ void generateTrapEvent(unsigned tid);
+
+ /** Records that commit needs to initiate a squash due to an
+ * external state update through the XC.
+ */
void generateXCEvent(unsigned tid);
private:
/** Updates the overall status of commit with the nextStatus, and
- * tell the CPU if commit is active/inactive. */
+ * tell the CPU if commit is active/inactive.
+ */
void updateStatus();
/** Sets the next status based on threads' statuses, which becomes the
@@ -223,10 +236,13 @@ class DefaultCommit
*/
bool changedROBEntries();
+ /** Squashes all in flight instructions. */
void squashAll(unsigned tid);
+ /** Handles squashing due to a trap. */
void squashFromTrap(unsigned tid);
+ /** Handles squashing due to an XC write. */
void squashFromXC(unsigned tid);
/** Commits as many instructions as possible. */
@@ -237,8 +253,6 @@ class DefaultCommit
*/
bool commitHead(DynInstPtr &head_inst, unsigned inst_num);
- void generateTrapEvent(unsigned tid);
-
/** Gets instructions from rename and inserts them into the ROB. */
void getInsts();
@@ -260,12 +274,16 @@ class DefaultCommit
*/
uint64_t readPC() { return PC[0]; }
+ /** Returns the PC of a specific thread. */
uint64_t readPC(unsigned tid) { return PC[tid]; }
+ /** Sets the PC of a specific thread. */
void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
+ /** Reads the PC of a specific thread. */
uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
+ /** Sets the next PC of a specific thread. */
void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
private:
@@ -302,6 +320,7 @@ class DefaultCommit
/** Pointer to FullCPU. */
FullCPU *cpu;
+ /** Vector of all of the threads. */
std::vector<Thread *> thread;
Fault fetchFault;
@@ -360,17 +379,27 @@ class DefaultCommit
/** Number of Active Threads */
unsigned numThreads;
+ /** Is a switch out pending. */
bool switchPending;
+
+ /** Is commit switched out. */
bool switchedOut;
+ /** The latency to handle a trap. Used when scheduling trap
+ * squash event.
+ */
Tick trapLatency;
Tick fetchTrapLatency;
Tick fetchFaultTick;
+ /** The commit PC of each thread. Refers to the instruction that
+ * is currently being processed/committed.
+ */
Addr PC[Impl::MaxThreads];
+ /** The next PC of each thread. */
Addr nextPC[Impl::MaxThreads];
/** The sequence number of the youngest valid instruction in the ROB. */
@@ -382,6 +411,7 @@ class DefaultCommit
/** Rename map interface. */
RenameMap *renameMap[Impl::MaxThreads];
+ /** Updates commit stats based on this instruction. */
void updateComInstStats(DynInstPtr &inst);
/** Stat for the total number of committed instructions. */
@@ -415,7 +445,9 @@ class DefaultCommit
/** Total number of committed branches. */
Stats::Vector<> statComBranches;
+ /** Number of cycles where the commit bandwidth limit is reached. */
Stats::Scalar<> commitEligibleSamples;
+ /** Number of instructions not committed due to bandwidth limits. */
Stats::Vector<> commitEligible;
};
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 9efe30d24..f8a252b87 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -692,7 +692,7 @@ DefaultCommit<Impl>::commit()
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
-
+/*
if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
// Record the fault. Wait until it's empty in the ROB.
// Then handle the trap. Ignore it if there's already a
@@ -714,7 +714,7 @@ DefaultCommit<Impl>::commit()
commitStatus[0] = Running;
}
}
-
+*/
// Not sure which one takes priority. I think if we have
// both, that's a bad sign.
if (trapSquash[tid] == true) {
@@ -926,7 +926,7 @@ DefaultCommit<Impl>::commitInsts()
numCommittedDist.sample(num_committed);
if (num_committed == commitWidth) {
- commitEligible[0]++;
+ commitEligibleSamples++;
}
}
@@ -948,6 +948,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
head_inst->reachedCommit = true;
if (head_inst->isNonSpeculative() ||
+ head_inst->isStoreConditional() ||
head_inst->isMemBarrier() ||
head_inst->isWriteBarrier()) {
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 51fcb1adb..c2c5289bf 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -72,6 +72,11 @@ class BaseFullCPU : public BaseCPU
int cpu_id;
};
+/**
+ * FullO3CPU class, has each of the stages (fetch through commit)
+ * within it, as well as all of the time buffers between stages. The
+ * tick() function for the CPU is defined here.
+ */
template <class Impl>
class FullO3CPU : public BaseFullCPU
{
@@ -202,17 +207,13 @@ class FullO3CPU : public BaseFullCPU
*/
virtual void syscall(int tid) { panic("Unimplemented!"); }
- /** Check if there are any system calls pending. */
- void checkSyscalls();
-
- /** Switches out this CPU.
- */
+ /** Switches out this CPU. */
void switchOut(Sampler *sampler);
+ /** Signals to this CPU that a stage has completed switching out. */
void signalSwitched();
- /** Takes over from another CPU.
- */
+ /** Takes over from another CPU. */
void takeOverFrom(BaseCPU *oldCPU);
/** Get the current instruction sequence number, and increment it. */
@@ -244,9 +245,7 @@ class FullO3CPU : public BaseFullCPU
#endif
- //
- // New accessors for new decoder.
- //
+ /** Register accessors. Index refers to the physical register index. */
uint64_t readIntReg(int reg_idx);
FloatReg readFloatReg(int reg_idx);
@@ -275,6 +274,11 @@ class FullO3CPU : public BaseFullCPU
uint64_t readArchFloatRegInt(int reg_idx, unsigned tid);
+ /** Architectural register accessors. Looks up in the commit
+ * rename table to obtain the true physical index of the
+ * architected register first, then accesses that physical
+ * register.
+ */
void setArchIntReg(int reg_idx, uint64_t val, unsigned tid);
void setArchFloatRegSingle(int reg_idx, float val, unsigned tid);
@@ -283,13 +287,17 @@ class FullO3CPU : public BaseFullCPU
void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
+ /** Reads the commit PC of a specific thread. */
uint64_t readPC(unsigned tid);
- void setPC(Addr new_PC,unsigned tid);
+ /** Sets the commit PC of a specific thread. */
+ void setPC(Addr new_PC, unsigned tid);
+ /** Reads the next PC of a specific thread. */
uint64_t readNextPC(unsigned tid);
- void setNextPC(uint64_t val,unsigned tid);
+ /** Sets the next PC of a specific thread. */
+ void setNextPC(uint64_t val, unsigned tid);
/** Function to add instruction onto the head of the list of the
* instructions. Used when new instructions are fetched.
@@ -313,21 +321,15 @@ class FullO3CPU : public BaseFullCPU
/** Remove all instructions younger than the given sequence number. */
void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
+ /** Removes the instruction pointed to by the iterator. */
inline void squashInstIt(const ListIt &instIt, const unsigned &tid);
+ /** Cleans up all instructions on the remove list. */
void cleanUpRemovedInsts();
- /** Remove all instructions from the list. */
-// void removeAllInsts();
-
+ /** Debug function to print all instructions on the list. */
void dumpInsts();
- /** Basically a wrapper function so that instructions executed at
- * commit can tell the instruction queue that they have
- * completed. Eventually this hack should be removed.
- */
-// void wakeDependents(DynInstPtr &inst);
-
public:
/** List of all the instructions in flight. */
std::list<DynInstPtr> instList;
@@ -338,6 +340,9 @@ class FullO3CPU : public BaseFullCPU
std::queue<ListIt> removeList;
#ifdef DEBUG
+ /** Debug structure to keep track of the sequence numbers still in
+ * flight.
+ */
std::set<InstSeqNum> snList;
#endif
@@ -424,14 +429,22 @@ class FullO3CPU : public BaseFullCPU
/** The IEW stage's instruction queue. */
TimeBuffer<IEWStruct> iewQueue;
- public:
+ private:
+ /** The activity recorder; used to tell if the CPU has any
+ * activity remaining or if it can go to idle and deschedule
+ * itself.
+ */
ActivityRecorder activityRec;
+ public:
+ /** Records that there was time buffer activity this cycle. */
void activityThisCycle() { activityRec.activity(); }
+ /** Changes a stage's status to active within the activity recorder. */
void activateStage(const StageIdx idx)
{ activityRec.activateStage(idx); }
+ /** Changes a stage's status to inactive within the activity recorder. */
void deactivateStage(const StageIdx idx)
{ activityRec.deactivateStage(idx); }
@@ -442,7 +455,7 @@ class FullO3CPU : public BaseFullCPU
int getFreeTid();
public:
- /** Temporary function to get pointer to exec context. */
+ /** Returns a pointer to a thread's exec context. */
ExecContext *xcBase(unsigned tid)
{
return thread[tid]->getXCProxy();
@@ -451,6 +464,10 @@ class FullO3CPU : public BaseFullCPU
/** The global sequence number counter. */
InstSeqNum globalSeqNum;
+ /** Pointer to the checker, which can dynamically verify
+ * instruction results at run time. This can be set to NULL if it
+ * is not being used.
+ */
Checker<DynInstPtr> *checker;
#if FULL_SYSTEM
@@ -466,11 +483,13 @@ class FullO3CPU : public BaseFullCPU
/** Pointer to memory. */
MemObject *mem;
+ /** Pointer to the sampler */
Sampler *sampler;
+ /** Counter of how many stages have completed switching out. */
int switchCount;
- // List of all ExecContexts.
+ /** Pointers to all of the threads in the CPU. */
std::vector<Thread *> thread;
#if 0
diff --git a/src/cpu/o3/cpu_policy.hh b/src/cpu/o3/cpu_policy.hh
index 4ea4daee6..32a0adcf1 100644
--- a/src/cpu/o3/cpu_policy.hh
+++ b/src/cpu/o3/cpu_policy.hh
@@ -50,24 +50,50 @@
#include "cpu/o3/comm.hh"
+/**
+ * Struct that defines the key classes to be used by the CPU. All
+ * classes use the typedefs defined here to determine what are the
+ * classes of the other stages and communication buffers. In order to
+ * change a structure such as the IQ, simply change the typedef here
+ * to use the desired class instead, and recompile. In order to
+ * create a different CPU to be used simultaneously with this one, see
+ * the alpha_impl.hh file for instructions.
+ */
template<class Impl>
struct SimpleCPUPolicy
{
- typedef TwobitBPredUnit<Impl> BPredUnit;
+ /** Typedef for the branch prediction unit (which includes the BP,
+ * RAS, and BTB).
+ */
+ typedef BPredUnit<Impl> BPredUnit;
+ /** Typedef for the register file. Most classes assume a unified
+ * physical register file.
+ */
typedef PhysRegFile<Impl> RegFile;
+ /** Typedef for the freelist of registers. */
typedef SimpleFreeList FreeList;
+ /** Typedef for the rename map. */
typedef SimpleRenameMap RenameMap;
+ /** Typedef for the ROB. */
typedef ROB<Impl> ROB;
+ /** Typedef for the instruction queue/scheduler. */
typedef InstructionQueue<Impl> IQ;
+ /** Typedef for the memory dependence unit. */
typedef MemDepUnit<StoreSet, Impl> MemDepUnit;
+ /** Typedef for the LSQ. */
typedef LSQ<Impl> LSQ;
+ /** Typedef for the thread-specific LSQ units. */
typedef LSQUnit<Impl> LSQUnit;
-
+ /** Typedef for fetch. */
typedef DefaultFetch<Impl> Fetch;
+ /** Typedef for decode. */
typedef DefaultDecode<Impl> Decode;
+ /** Typedef for rename. */
typedef DefaultRename<Impl> Rename;
+ /** Typedef for Issue/Execute/Writeback. */
typedef DefaultIEW<Impl> IEW;
+ /** Typedef for commit. */
typedef DefaultCommit<Impl> Commit;
/** The struct for communication between fetch and decode. */
diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh
index 8abe1d480..ff88358d6 100644
--- a/src/cpu/o3/decode.hh
+++ b/src/cpu/o3/decode.hh
@@ -109,9 +109,12 @@ class DefaultDecode
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
+ /** Switches out the decode stage. */
void switchOut();
+ /** Takes over from another CPU's thread. */
void takeOverFrom();
+
/** Ticks decode, processing all input signals and decoding as many
* instructions as possible.
*/
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index b03daff11..64b04bc3d 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -43,6 +43,7 @@ DefaultDecode<Impl>::DefaultDecode(Params *params)
{
_status = Inactive;
+ // Setup status, make sure stall signals are clear.
for (int i = 0; i < numThreads; ++i) {
decodeStatus[i] = Idle;
@@ -167,6 +168,7 @@ template <class Impl>
void
DefaultDecode<Impl>::switchOut()
{
+ // Decode can immediately switch out.
cpu->signalSwitched();
}
@@ -176,6 +178,7 @@ DefaultDecode<Impl>::takeOverFrom()
{
_status = Inactive;
+ // Be sure to reset state and clear out any old instructions.
for (int i = 0; i < numThreads; ++i) {
decodeStatus[i] = Idle;
@@ -224,22 +227,22 @@ DefaultDecode<Impl>::block(unsigned tid)
{
DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid);
- // If the decode status is blocked or unblocking then decode has not yet
- // signalled fetch to unblock. In that case, there is no need to tell
- // fetch to block.
- if (decodeStatus[tid] != Blocked &&
- decodeStatus[tid] != Unblocking) {
- toFetch->decodeBlock[tid] = true;
- wroteToTimeBuffer = true;
- }
-
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
skidInsert(tid);
+ // If the decode status is blocked or unblocking then decode has not yet
+ // signalled fetch to unblock. In that case, there is no need to tell
+ // fetch to block.
if (decodeStatus[tid] != Blocked) {
// Set the status to Blocked.
decodeStatus[tid] = Blocked;
+
+ if (decodeStatus[tid] != Unblocking) {
+ toFetch->decodeBlock[tid] = true;
+ wroteToTimeBuffer = true;
+ }
+
return true;
}
@@ -272,13 +275,16 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction "
"detected at decode.\n", tid);
+ // Send back mispredict information.
toFetch->decodeInfo[tid].branchMispredict = true;
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].predIncorrect = true;
toFetch->decodeInfo[tid].squash = true;
toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
- toFetch->decodeInfo[tid].branchTaken = true;
+ toFetch->decodeInfo[tid].branchTaken =
+ inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
+ // Might have to tell fetch to unblock.
if (decodeStatus[tid] == Blocked ||
decodeStatus[tid] == Unblocking) {
toFetch->decodeUnblock[tid] = 1;
@@ -294,11 +300,12 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
}
}
+ // Clear the instruction list and skid buffer in case they have any
+ // insts in them.
while (!insts[tid].empty()) {
insts[tid].pop();
}
- // Clear the skid buffer in case it has any data in it.
while (!skidBuffer[tid].empty()) {
skidBuffer[tid].pop();
}
@@ -343,11 +350,12 @@ DefaultDecode<Impl>::squash(unsigned tid)
}
}
+ // Clear the instruction list and skid buffer in case they have any
+ // insts in them.
while (!insts[tid].empty()) {
insts[tid].pop();
}
- // Clear the skid buffer in case it has any data in it.
while (!skidBuffer[tid].empty()) {
skidBuffer[tid].pop();
}
@@ -723,6 +731,7 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Might want to set some sort of boolean and just do
// a check at the end
squash(inst, inst->threadNumber);
+ inst->setPredTarg(inst->branchTarget());
break;
}
diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh
index f8ae38da4..b6c5f1ab1 100644
--- a/src/cpu/o3/dep_graph.hh
+++ b/src/cpu/o3/dep_graph.hh
@@ -4,6 +4,7 @@
#include "cpu/o3/comm.hh"
+/** Node in a linked list. */
template <class DynInstPtr>
class DependencyEntry
{
@@ -18,32 +19,50 @@ class DependencyEntry
DependencyEntry<DynInstPtr> *next;
};
+/** Array of linked list that maintains the dependencies between
+ * producing instructions and consuming instructions. Each linked
+ * list represents a single physical register, having the future
+ * producer of the register's value, and all consumers waiting on that
+ * value on the list. The head node of each linked list represents
+ * the producing instruction of that register. Instructions are put
+ * on the list upon reaching the IQ, and are removed from the list
+ * either when the producer completes, or the instruction is squashed.
+*/
template <class DynInstPtr>
class DependencyGraph
{
public:
typedef DependencyEntry<DynInstPtr> DepEntry;
+ /** Default construction. Must call resize() prior to use. */
DependencyGraph()
: numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
{ }
+ /** Resize the dependency graph to have num_entries registers. */
void resize(int num_entries);
+ /** Clears all of the linked lists. */
void reset();
+ /** Inserts an instruction to be dependent on the given index. */
void insert(PhysRegIndex idx, DynInstPtr &new_inst);
+ /** Sets the producing instruction of a given register. */
void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
{ dependGraph[idx].inst = new_inst; }
+ /** Clears the producing instruction. */
void clearInst(PhysRegIndex idx)
{ dependGraph[idx].inst = NULL; }
+ /** Removes an instruction from a single linked list. */
void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
+ /** Removes and returns the newest dependent of a specific register. */
DynInstPtr pop(PhysRegIndex idx);
+ /** Checks if there are any dependents on a specific register. */
bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
/** Debugging function to dump out the dependency graph.
@@ -59,13 +78,16 @@ class DependencyGraph
*/
DepEntry *dependGraph;
+ /** Number of linked lists; identical to the number of registers. */
int numEntries;
// Debug variable, remove when done testing.
unsigned memAllocCounter;
public:
+ // Debug variable, remove when done testing.
uint64_t nodesTraversed;
+ // Debug variable, remove when done testing.
uint64_t nodesRemoved;
};
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 3c4fc7d93..23328c534 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -46,7 +46,7 @@ class Sampler;
* width is specified by the parameters; each cycle it tries to fetch
* that many instructions. It supports using a branch predictor to
* predict direction and targets.
- * It supports the idling functionalitiy of the CPU by indicating to
+ * It supports the idling functionality of the CPU by indicating to
* the CPU when it is active and inactive.
*/
template <class Impl>
@@ -172,14 +172,19 @@ class DefaultFetch
/** Processes cache completion event. */
void processCacheCompletion(PacketPtr pkt);
+ /** Begins the switch out of the fetch stage. */
void switchOut();
+ /** Completes the switch out of the fetch stage. */
void doSwitchOut();
+ /** Takes over from another CPU's thread. */
void takeOverFrom();
+ /** Checks if the fetch stage is switched out. */
bool isSwitchedOut() { return switchedOut; }
+ /** Tells fetch to wake up from a quiesce instruction. */
void wakeFromQuiesce();
private:
@@ -312,8 +317,10 @@ class DefaultFetch
/** BPredUnit. */
BPredUnit branchPred;
+ /** Per-thread fetch PC. */
Addr PC[Impl::MaxThreads];
+ /** Per-thread next PC. */
Addr nextPC[Impl::MaxThreads];
/** Memory packet used to access cache. */
@@ -380,8 +387,12 @@ class DefaultFetch
/** Thread ID being fetched. */
int threadFetched;
+ /** Checks if there is an interrupt pending. If there is, fetch
+ * must stop once it is not fetching PAL instructions.
+ */
bool interruptPending;
+ /** Records if fetch is switched out. */
bool switchedOut;
#if !FULL_SYSTEM
@@ -405,17 +416,23 @@ class DefaultFetch
* the pipeline.
*/
Stats::Scalar<> fetchIdleCycles;
+ /** Total number of cycles spent blocked. */
Stats::Scalar<> fetchBlockedCycles;
-
+ /** Total number of cycles spent in any other state. */
Stats::Scalar<> fetchMiscStallCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar<> fetchedCacheLines;
-
+ /** Total number of outstanding icache accesses that were dropped
+ * due to a squash.
+ */
Stats::Scalar<> fetchIcacheSquashes;
/** Distribution of number of instructions fetched each cycle. */
Stats::Distribution<> fetchNisnDist;
+ /** Rate of how often fetch was idle. */
Stats::Formula idleRate;
+ /** Number of branch fetches per cycle. */
Stats::Formula branchRate;
+ /** Number of instruction fetched per cycle. */
Stats::Formula fetchRate;
};
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 5d3164dbf..69c43a6a2 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -188,59 +188,59 @@ void
DefaultFetch<Impl>::regStats()
{
icacheStallCycles
- .name(name() + ".FETCH:icacheStallCycles")
+ .name(name() + ".icacheStallCycles")
.desc("Number of cycles fetch is stalled on an Icache miss")
.prereq(icacheStallCycles);
fetchedInsts
- .name(name() + ".FETCH:Insts")
+ .name(name() + ".Insts")
.desc("Number of instructions fetch has processed")
.prereq(fetchedInsts);
fetchedBranches
- .name(name() + ".FETCH:Branches")
+ .name(name() + ".Branches")
.desc("Number of branches that fetch encountered")
.prereq(fetchedBranches);
predictedBranches
- .name(name() + ".FETCH:predictedBranches")
+ .name(name() + ".predictedBranches")
.desc("Number of branches that fetch has predicted taken")
.prereq(predictedBranches);
fetchCycles
- .name(name() + ".FETCH:Cycles")
+ .name(name() + ".Cycles")
.desc("Number of cycles fetch has run and was not squashing or"
" blocked")
.prereq(fetchCycles);
fetchSquashCycles
- .name(name() + ".FETCH:SquashCycles")
+ .name(name() + ".SquashCycles")
.desc("Number of cycles fetch has spent squashing")
.prereq(fetchSquashCycles);
fetchIdleCycles
- .name(name() + ".FETCH:IdleCycles")
+ .name(name() + ".IdleCycles")
.desc("Number of cycles fetch was idle")
.prereq(fetchIdleCycles);
fetchBlockedCycles
- .name(name() + ".FETCH:BlockedCycles")
+ .name(name() + ".BlockedCycles")
.desc("Number of cycles fetch has spent blocked")
.prereq(fetchBlockedCycles);
fetchedCacheLines
- .name(name() + ".FETCH:CacheLines")
+ .name(name() + ".CacheLines")
.desc("Number of cache lines fetched")
.prereq(fetchedCacheLines);
fetchMiscStallCycles
- .name(name() + ".FETCH:MiscStallCycles")
+ .name(name() + ".MiscStallCycles")
.desc("Number of cycles fetch has spent waiting on interrupts, or "
"bad addresses, or out of MSHRs")
.prereq(fetchMiscStallCycles);
fetchIcacheSquashes
- .name(name() + ".FETCH:IcacheSquashes")
+ .name(name() + ".IcacheSquashes")
.desc("Number of outstanding Icache misses that were squashed")
.prereq(fetchIcacheSquashes);
@@ -248,24 +248,24 @@ DefaultFetch<Impl>::regStats()
.init(/* base value */ 0,
/* last value */ fetchWidth,
/* bucket size */ 1)
- .name(name() + ".FETCH:rateDist")
+ .name(name() + ".rateDist")
.desc("Number of instructions fetched each cycle (Total)")
.flags(Stats::pdf);
idleRate
- .name(name() + ".FETCH:idleRate")
+ .name(name() + ".idleRate")
.desc("Percent of cycles fetch was idle")
.prereq(idleRate);
idleRate = fetchIdleCycles * 100 / cpu->numCycles;
branchRate
- .name(name() + ".FETCH:branchRate")
+ .name(name() + ".branchRate")
.desc("Number of branch fetches per cycle")
.flags(Stats::total);
- branchRate = predictedBranches / cpu->numCycles;
+ branchRate = fetchedBranches / cpu->numCycles;
fetchRate
- .name(name() + ".FETCH:rate")
+ .name(name() + ".rate")
.desc("Number of inst fetches per cycle")
.flags(Stats::total);
fetchRate = fetchedInsts / cpu->numCycles;
@@ -337,6 +337,7 @@ template<class Impl>
void
DefaultFetch<Impl>::initStage()
{
+ // Setup PC and nextPC with initial state.
for (int tid = 0; tid < numThreads; tid++) {
PC[tid] = cpu->readPC(tid);
nextPC[tid] = cpu->readNextPC(tid);
@@ -353,8 +354,6 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
// Only change the status if it's still waiting on the icache access
// to return.
- // Can keep track of how many cache accesses go unused due to
- // misspeculation here.
if (fetchStatus[tid] != IcacheWaitResponse ||
pkt != memPkt[tid] ||
isSwitchedOut()) {
@@ -391,6 +390,7 @@ template <class Impl>
void
DefaultFetch<Impl>::switchOut()
{
+ // Fetch is ready to switch out at any time.
switchedOut = true;
cpu->signalSwitched();
}
@@ -399,6 +399,7 @@ template <class Impl>
void
DefaultFetch<Impl>::doSwitchOut()
{
+ // Branch predictor needs to have its state cleared.
branchPred.switchOut();
}
@@ -429,6 +430,7 @@ DefaultFetch<Impl>::wakeFromQuiesce()
{
DPRINTF(Fetch, "Waking up from quiesce\n");
// Hopefully this is safe
+ // @todo: Allow other threads to wake from quiesce.
fetchStatus[0] = Running;
}
@@ -1213,7 +1215,7 @@ DefaultFetch<Impl>::lsqCount()
if (fetchStatus[high_pri] == Running ||
fetchStatus[high_pri] == IcacheAccessComplete ||
- fetchStatus[high_pri] == Idle)
+ fetchStatus[high_pri] == Idle)
return high_pri;
else
PQ.pop();
diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc
index fb2b5c00d..b28b5d37f 100644
--- a/src/cpu/o3/fu_pool.cc
+++ b/src/cpu/o3/fu_pool.cc
@@ -183,6 +183,8 @@ FUPool::getUnit(OpClass capability)
}
}
+ assert(fu_idx < numFU);
+
unitBusy[fu_idx] = true;
return fu_idx;
diff --git a/src/cpu/o3/fu_pool.hh b/src/cpu/o3/fu_pool.hh
index f590c4149..1d4c76690 100644
--- a/src/cpu/o3/fu_pool.hh
+++ b/src/cpu/o3/fu_pool.hh
@@ -155,7 +155,10 @@ class FUPool : public SimObject
return maxIssueLatencies[capability];
}
+ /** Switches out functional unit pool. */
void switchOut();
+
+ /** Takes over from another CPU's thread. */
void takeOverFrom();
};
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index ae86536c9..7e79d5311 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -143,12 +143,16 @@ class DefaultIEW
/** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *sb_ptr);
+ /** Starts switch out of IEW stage. */
void switchOut();
+ /** Completes switch out of IEW stage. */
void doSwitchOut();
+ /** Takes over from another CPU's thread. */
void takeOverFrom();
+ /** Returns if IEW is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Sets page table pointer within LSQ. */
@@ -270,6 +274,7 @@ class DefaultIEW
void tick();
private:
+ /** Updates execution stats based on the instruction. */
void updateExeInstStats(DynInstPtr &inst);
/** Pointer to main time buffer used for backwards communication. */
@@ -412,6 +417,7 @@ class DefaultIEW
/** Maximum size of the skid buffer. */
unsigned skidBufferMax;
+ /** Is this stage switched out. */
bool switchedOut;
/** Stat for total number of idle cycles. */
@@ -453,9 +459,13 @@ class DefaultIEW
/** Stat for total number of mispredicted branches detected at execute. */
Stats::Formula branchMispredicts;
+ /** Number of executed software prefetches. */
Stats::Vector<> exeSwp;
+ /** Number of executed nops. */
Stats::Vector<> exeNop;
+ /** Number of executed meomory references. */
Stats::Vector<> exeRefs;
+ /** Number of executed branches. */
Stats::Vector<> exeBranches;
// Stats::Vector<> issued_ops;
@@ -465,19 +475,30 @@ class DefaultIEW
Stats::Vector<> dist_unissued;
Stats::Vector2d<> stat_issued_inst_type;
*/
+ /** Number of instructions issued per cycle. */
Stats::Formula issueRate;
+ /** Number of executed store instructions. */
Stats::Formula iewExecStoreInsts;
// Stats::Formula issue_op_rate;
// Stats::Formula fu_busy_rate;
-
+ /** Number of instructions sent to commit. */
Stats::Vector<> iewInstsToCommit;
+ /** Number of instructions that writeback. */
Stats::Vector<> writebackCount;
+ /** Number of instructions that wake consumers. */
Stats::Vector<> producerInst;
+ /** Number of instructions that wake up from producers. */
Stats::Vector<> consumerInst;
+ /** Number of instructions that were delayed in writing back due
+ * to resource contention.
+ */
Stats::Vector<> wbPenalized;
+ /** Number of instructions per cycle written back. */
Stats::Formula wbRate;
+ /** Average number of woken instructions per writeback. */
Stats::Formula wbFanout;
+ /** Number of instructions per cycle delayed in writing back . */
Stats::Formula wbPenalizedRate;
};
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 6b61b1b0e..23f101517 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -383,6 +383,7 @@ template <class Impl>
void
DefaultIEW<Impl>::switchOut()
{
+ // IEW is ready to switch out at any time.
cpu->signalSwitched();
}
@@ -390,6 +391,7 @@ template <class Impl>
void
DefaultIEW<Impl>::doSwitchOut()
{
+ // Clear any state.
switchedOut = true;
instQueue.switchOut();
@@ -408,6 +410,7 @@ template <class Impl>
void
DefaultIEW<Impl>::takeOverFrom()
{
+ // Reset all state.
_status = Active;
exeStatus = Running;
wbStatus = Idle;
@@ -521,6 +524,7 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->nextPC[tid] = inst->readPC();
+ // Must include the broadcasted SN in the squash.
toCommit->includeSquashInst[tid] = true;
ldstQueue.setLoadBlockedHandled(tid);
@@ -1054,6 +1058,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
// Store conditionals need to be set as "canCommit()"
// so that commit can process them when they reach the
// head of commit.
+ // @todo: This is somewhat specific to Alpha.
inst->setCanCommit();
instQueue.insertNonSpec(inst);
add_to_iq = false;
@@ -1313,6 +1318,7 @@ DefaultIEW<Impl>::executeInsts()
}
}
+ // Update and record activity if we processed any instructions.
if (inst_num) {
if (exeStatus == Idle) {
exeStatus = Running;
@@ -1363,8 +1369,10 @@ DefaultIEW<Impl>::writebackInsts()
scoreboard->setReg(inst->renamedDestRegIdx(i));
}
- producerInst[tid]++;
- consumerInst[tid]+= dependents;
+ if (dependents) {
+ producerInst[tid]++;
+ consumerInst[tid]+= dependents;
+ }
writebackCount[tid]++;
}
}
@@ -1435,6 +1443,7 @@ DefaultIEW<Impl>::tick()
DPRINTF(IEW,"Processing [tid:%i]\n",tid);
+ // Update structures based on instructions committed.
if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
!fromCommit->commitInfo[tid].squash &&
!fromCommit->commitInfo[tid].robSquashing) {
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh
index 245601ccf..60a713020 100644
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -94,6 +94,9 @@ class InstructionQueue
/** Pointer back to the instruction queue. */
InstructionQueue<Impl> *iqPtr;
+ /** Should the FU be added to the list to be freed upon
+ * completing this event.
+ */
bool freeFU;
public:
@@ -118,6 +121,7 @@ class InstructionQueue
/** Registers statistics. */
void regStats();
+ /** Resets all instruction queue state. */
void resetState();
/** Sets CPU pointer. */
@@ -135,10 +139,13 @@ class InstructionQueue
/** Sets the global time buffer. */
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+ /** Switches out the instruction queue. */
void switchOut();
+ /** Takes over execution from another CPU's thread. */
void takeOverFrom();
+ /** Returns if the IQ is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Number of entries needed for given amount of threads. */
@@ -173,6 +180,9 @@ class InstructionQueue
*/
void insertBarrier(DynInstPtr &barr_inst);
+ /** Returns the oldest scheduled instruction, and removes it from
+ * the list of instructions waiting to execute.
+ */
DynInstPtr getInstToExecute();
/**
@@ -276,13 +286,15 @@ class InstructionQueue
/** List of all the instructions in the IQ (some of which may be issued). */
std::list<DynInstPtr> instList[Impl::MaxThreads];
+ /** List of instructions that are ready to be executed. */
std::list<DynInstPtr> instsToExecute;
/**
- * Struct for comparing entries to be added to the priority queue. This
- * gives reverse ordering to the instructions in terms of sequence
- * numbers: the instructions with smaller sequence numbers (and hence
- * are older) will be at the top of the priority queue.
+ * Struct for comparing entries to be added to the priority queue.
+ * This gives reverse ordering to the instructions in terms of
+ * sequence numbers: the instructions with smaller sequence
+ * numbers (and hence are older) will be at the top of the
+ * priority queue.
*/
struct pqCompare {
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
@@ -395,6 +407,7 @@ class InstructionQueue
*/
unsigned commitToIEWDelay;
+ /** Is the IQ switched out. */
bool switchedOut;
/** The sequence number of the squashed instruction. */
@@ -462,19 +475,28 @@ class InstructionQueue
*/
Stats::Scalar<> iqSquashedNonSpecRemoved;
+ /** Distribution of number of instructions in the queue. */
Stats::VectorDistribution<> queueResDist;
+ /** Distribution of the number of instructions issued. */
Stats::Distribution<> numIssuedDist;
+ /** Distribution of the cycles it takes to issue an instruction. */
Stats::VectorDistribution<> issueDelayDist;
+ /** Number of times an instruction could not be issued because a
+ * FU was busy.
+ */
Stats::Vector<> statFuBusy;
// Stats::Vector<> dist_unissued;
+ /** Stat for total number issued for each instruction type. */
Stats::Vector2d<> statIssuedInstType;
+ /** Number of instructions issued per cycle. */
Stats::Formula issueRate;
// Stats::Formula issue_stores;
// Stats::Formula issue_op_rate;
- Stats::Vector<> fuBusy; //cumulative fu busy
-
+ /** Number of times the FU was busy. */
+ Stats::Vector<> fuBusy;
+ /** Number of times the FU was busy per instruction issued. */
Stats::Formula fuBusyRate;
};
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 34af8c641..2f03c6814 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -152,8 +152,10 @@ template <class Impl>
InstructionQueue<Impl>::~InstructionQueue()
{
dependGraph.reset();
+#ifdef DEBUG
cprintf("Nodes traversed: %i, removed: %i\n",
dependGraph.nodesTraversed, dependGraph.nodesRemoved);
+#endif
}
template <class Impl>
@@ -670,14 +672,8 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
// @todo: Ensure that these FU Completions happen at the beginning
// of a cycle, otherwise they could add too many instructions to
// the queue.
- // @todo: This could break if there's multiple multi-cycle ops
- // finishing on this cycle. Maybe implement something like
- // instToCommit in iew_impl.hh.
issueToExecuteQueue->access(0)->size++;
instsToExecute.push_back(inst);
-// int &size = issueToExecuteQueue->access(0)->size;
-
-// issueToExecuteQueue->access(0)->insts[size++] = inst;
}
// @todo: Figure out a better way to remove the squashed items from the
@@ -743,9 +739,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
}
}
+ // If we have an instruction that doesn't require a FU, or a
+ // valid FU, then schedule for execution.
if (idx == -2 || idx != -1) {
if (op_latency == 1) {
-// i2e_info->insts[exec_queue_slot++] = issuing_inst;
i2e_info->size++;
instsToExecute.push_back(issuing_inst);
@@ -763,14 +760,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// @todo: Enforce that issue_latency == 1 or op_latency
if (issue_latency > 1) {
+ // If FU isn't pipelined, then it must be freed
+ // upon the execution completing.
execution->setFreeFU();
} else {
- // @todo: Not sure I'm accounting for the
- // multi-cycle op in a pipelined FU properly, or
- // the number of instructions issued in one cycle.
-// i2e_info->insts[exec_queue_slot++] = issuing_inst;
-// i2e_info->size++;
-
// Add the FU onto the list of FU's to be freed next cycle.
fuPool->freeUnitNextCycle(idx);
}
@@ -815,6 +808,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
numIssuedDist.sample(total_issued);
iqInstsIssued+= total_issued;
+ // If we issued any instructions, tell the CPU we had activity.
if (total_issued) {
cpu->activityThisCycle();
} else {
@@ -1365,4 +1359,45 @@ InstructionQueue<Impl>::dumpInsts()
++num;
}
}
+
+ cprintf("Insts to Execute list:\n");
+
+ int num = 0;
+ int valid_num = 0;
+ ListIt inst_list_it = instsToExecute.begin();
+
+ while (inst_list_it != instsToExecute.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed
+ // still count towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it++;
+ ++num;
+ }
}
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 51eb23cd7..d65510c30 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -49,6 +49,7 @@ class LSQ {
typedef typename Impl::CPUPol::IEW IEW;
typedef typename Impl::CPUPol::LSQUnit LSQUnit;
+ /** SMT policy. */
enum LSQPolicy {
Dynamic,
Partitioned,
@@ -69,8 +70,9 @@ class LSQ {
void setIEW(IEW *iew_ptr);
/** Sets the page table pointer. */
// void setPageTable(PageTable *pt_ptr);
-
+ /** Switches out the LSQ. */
void switchOut();
+ /** Takes over execution from another CPU's thread. */
void takeOverFrom();
/** Number of entries needed for the given amount of threads.*/
@@ -95,9 +97,6 @@ class LSQ {
/** Executes a load. */
Fault executeLoad(DynInstPtr &inst);
- Fault executeLoad(int lq_idx, unsigned tid)
- { return thread[tid].executeLoad(lq_idx); }
-
/** Executes a store. */
Fault executeStore(DynInstPtr &inst);
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index b339cea2c..393d8947d 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -87,10 +87,13 @@ class LSQUnit {
/** Sets the page table pointer. */
// void setPageTable(PageTable *pt_ptr);
+ /** Switches out LSQ unit. */
void switchOut();
+ /** Takes over from another CPU's thread. */
void takeOverFrom();
+ /** Returns if the LSQ is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Ticks the LSQ unit, which in this case only resets the number of
@@ -159,12 +162,15 @@ class LSQUnit {
bool loadBlocked()
{ return isLoadBlocked; }
+ /** Clears the signal that a load became blocked. */
void clearLoadBlocked()
{ isLoadBlocked = false; }
+ /** Returns if the blocked load was handled. */
bool isLoadBlockedHandled()
{ return loadBlockedHandled; }
+ /** Records the blocked load as being handled. */
void setLoadBlockedHandled()
{ loadBlockedHandled = true; }
@@ -339,6 +345,7 @@ class LSQUnit {
/** The number of used cache ports in this cycle. */
int usedPorts;
+ /** Is the LSQ switched out. */
bool switchedOut;
//list<InstSeqNum> mshrSeqNums;
@@ -358,8 +365,10 @@ class LSQUnit {
/** Whether or not a load is blocked due to the memory system. */
bool isLoadBlocked;
+ /** Has the blocked load been handled. */
bool loadBlockedHandled;
+ /** The sequence number of the blocked load. */
InstSeqNum blockedLoadSeqNum;
/** The oldest load that caused a memory ordering violation. */
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 3f6af3d2c..1ad561dc0 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -78,8 +78,12 @@ LSQUnit<Impl>::completeStoreDataAccess(DynInstPtr &inst)
//lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
- if (lsqPtr->isSwitchedOut())
+ if (lsqPtr->isSwitchedOut()) {
+ if (wbEvent)
+ delete wbEvent;
+
return;
+ }
lsqPtr->cpu->wakeCPU();
@@ -500,7 +504,6 @@ LSQUnit<Impl>::commitLoad()
DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
loadQueue[loadHead]->readPC());
-
loadQueue[loadHead] = NULL;
incrLdIdx(loadHead);
diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh
index 4d763fca2..e399f0133 100644
--- a/src/cpu/o3/mem_dep_unit.hh
+++ b/src/cpu/o3/mem_dep_unit.hh
@@ -86,8 +86,10 @@ class MemDepUnit {
/** Registers statistics. */
void regStats();
+ /** Switches out the memory dependence predictor. */
void switchOut();
+ /** Takes over from another CPU's thread. */
void takeOverFrom();
/** Sets the pointer to the IQ. */
@@ -157,10 +159,12 @@ class MemDepUnit {
: inst(new_inst), regsReady(false), memDepReady(false),
completed(false), squashed(false)
{
+#ifdef DEBUG
++memdep_count;
DPRINTF(MemDepUnit, "Memory dependency entry created. "
"memdep_count=%i\n", memdep_count);
+#endif
}
/** Frees any pointers. */
@@ -169,11 +173,12 @@ class MemDepUnit {
for (int i = 0; i < dependInsts.size(); ++i) {
dependInsts[i] = NULL;
}
-
+#ifdef DEBUG
--memdep_count;
DPRINTF(MemDepUnit, "Memory dependency entry deleted. "
"memdep_count=%i\n", memdep_count);
+#endif
}
/** Returns the name of the memory dependence entry. */
@@ -198,9 +203,11 @@ class MemDepUnit {
bool squashed;
/** For debugging. */
+#ifdef DEBUG
static int memdep_count;
static int memdep_insert;
static int memdep_erase;
+#endif
};
/** Finds the memory dependence entry in the hash map. */
@@ -229,9 +236,13 @@ class MemDepUnit {
*/
MemDepPred depPred;
+ /** Is there an outstanding load barrier that loads must wait on. */
bool loadBarrier;
+ /** The sequence number of the load barrier. */
InstSeqNum loadBarrierSN;
+ /** Is there an outstanding store barrier that loads must wait on. */
bool storeBarrier;
+ /** The sequence number of the store barrier. */
InstSeqNum storeBarrierSN;
/** Pointer to the IQ. */
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index a4fed4b0d..50ad1e2c8 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -107,6 +107,7 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::switchOut()
{
+ // Clear any state.
for (int i = 0; i < Impl::MaxThreads; ++i) {
instList[i].clear();
}
@@ -118,6 +119,7 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::takeOverFrom()
{
+ // Be sure to reset all state.
loadBarrier = storeBarrier = false;
loadBarrierSN = storeBarrierSN = 0;
depPred.clear();
@@ -148,7 +150,7 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
inst_entry->listIt = --(instList[tid].end());
// Check any barriers and the dependence predictor for any
- // producing stores.
+ // producing memrefs/stores.
InstSeqNum producing_store;
if (inst->isLoad() && loadBarrier) {
producing_store = loadBarrierSN;
@@ -255,6 +257,7 @@ void
MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
{
InstSeqNum barr_sn = barr_inst->seqNum;
+ // Memory barriers block loads and stores, write barriers only stores.
if (barr_inst->isMemBarrier()) {
loadBarrier = true;
loadBarrierSN = barr_sn;
@@ -332,6 +335,7 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
DynInstPtr temp_inst;
bool found_inst = false;
+ // For now this replay function replays all waiting memory ops.
while (!instsToReplay.empty()) {
temp_inst = instsToReplay.front();
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 626d7cc75..42fdf6bf5 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -157,10 +157,13 @@ class DefaultRename
/** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *_scoreboard);
+ /** Switches out the rename stage. */
void switchOut();
+ /** Completes the switch out. */
void doSwitchOut();
+ /** Takes over from another CPU's thread. */
void takeOverFrom();
/** Squashes all instructions in a thread. */
@@ -245,8 +248,10 @@ class DefaultRename
/** Checks if any stages are telling rename to block. */
bool checkStall(unsigned tid);
+ /** Gets the number of free entries for a specific thread. */
void readFreeEntries(unsigned tid);
+ /** Checks the signals and updates the status. */
bool checkSignalsAndUpdate(unsigned tid);
/** Either serializes on the next instruction available in the InstQueue,
@@ -456,8 +461,11 @@ class DefaultRename
Stats::Scalar<> renameCommittedMaps;
/** Stat for total number of mappings that were undone due to a squash. */
Stats::Scalar<> renameUndoneMaps;
+ /** Number of serialize instructions handled. */
Stats::Scalar<> renamedSerializing;
+ /** Number of instructions marked as temporarily serializing. */
Stats::Scalar<> renamedTempSerializing;
+ /** Number of instructions inserted into skid buffers. */
Stats::Scalar<> renameSkidInsts;
};
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 9cbe1a770..8e70c90f7 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -260,6 +260,7 @@ template <class Impl>
void
DefaultRename<Impl>::switchOut()
{
+ // Rename is ready to switch out at any time.
cpu->signalSwitched();
}
@@ -267,6 +268,7 @@ template <class Impl>
void
DefaultRename<Impl>::doSwitchOut()
{
+ // Clear any state, fix up the rename map.
for (int i = 0; i < numThreads; i++) {
typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin();
diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh
index 0edb80684..c4c90c99a 100644
--- a/src/cpu/o3/rename_map.hh
+++ b/src/cpu/o3/rename_map.hh
@@ -64,12 +64,13 @@ class SimpleRenameMap
typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo;
public:
- //Constructor
- SimpleRenameMap() {};
+ /** Default constructor. init() must be called prior to use. */
+ SimpleRenameMap() {};
/** Destructor. */
~SimpleRenameMap();
+ /** Initializes rename map with given parameters. */
void init(unsigned _numLogicalIntRegs,
unsigned _numPhysicalIntRegs,
PhysRegIndex &_int_reg_start,
@@ -86,6 +87,7 @@ class SimpleRenameMap
int id,
bool bindRegs);
+ /** Sets the free list used with this rename map. */
void setFreeList(SimpleFreeList *fl_ptr);
//Tell rename map to get a free physical register for a given
@@ -151,7 +153,6 @@ class SimpleRenameMap
{ }
};
- //Change this to private
private:
/** Integer rename map. */
std::vector<RenameEntry> intRenameMap;
diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh
index 3786a0355..6d1402531 100644
--- a/src/cpu/o3/rob.hh
+++ b/src/cpu/o3/rob.hh
@@ -97,8 +97,10 @@ class ROB
*/
void setActiveThreads(std::list<unsigned>* at_ptr);
+ /** Switches out the ROB. */
void switchOut();
+ /** Takes over another CPU's thread. */
void takeOverFrom();
/** Function to insert an instruction into the ROB. Note that whatever
@@ -300,6 +302,7 @@ class ROB
/** Number of instructions in the ROB. */
int numInstsInROB;
+ /** Dummy instruction returned if there are no insts left. */
DynInstPtr dummyInst;
private:
diff --git a/src/cpu/o3/store_set.cc b/src/cpu/o3/store_set.cc
index 720d5da53..0023cee36 100644
--- a/src/cpu/o3/store_set.cc
+++ b/src/cpu/o3/store_set.cc
@@ -28,6 +28,7 @@
* Authors: Kevin Lim
*/
+#include "base/intmath.hh"
#include "base/trace.hh"
#include "cpu/o3/store_set.hh"
@@ -38,6 +39,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
SSITSize, LFSTSize);
+ if (!isPowerOf2(SSITSize)) {
+ fatal("Invalid SSIT size!\n");
+ }
+
SSIT.resize(SSITSize);
validSSIT.resize(SSITSize);
@@ -45,6 +50,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
for (int i = 0; i < SSITSize; ++i)
validSSIT[i] = false;
+ if (!isPowerOf2(LFSTSize)) {
+ fatal("Invalid LFST size!\n");
+ }
+
LFST.resize(LFSTSize);
validLFST.resize(LFSTSize);
@@ -320,3 +329,19 @@ StoreSet::clear()
storeList.clear();
}
+
+void
+StoreSet::dump()
+{
+ cprintf("storeList.size(): %i\n", storeList.size());
+ SeqNumMapIt store_list_it = storeList.begin();
+
+ int num = 0;
+
+ while (store_list_it != storeList.end()) {
+ cprintf("%i: [sn:%lli] SSID:%i\n",
+ num, (*store_list_it).first, (*store_list_it).second);
+ num++;
+ store_list_it++;
+ }
+}
diff --git a/src/cpu/o3/store_set.hh b/src/cpu/o3/store_set.hh
index 64255c51a..f5a44a1ac 100644
--- a/src/cpu/o3/store_set.hh
+++ b/src/cpu/o3/store_set.hh
@@ -46,58 +46,98 @@ struct ltseqnum {
}
};
+/**
+ * Implements a store set predictor for determining if memory
+ * instructions are dependent upon each other. See paper "Memory
+ * Dependence Prediction using Store Sets" by Chrysos and Emer. SSID
+ * stands for Store Set ID, SSIT stands for Store Set ID Table, and
+ * LFST is Last Fetched Store Table.
+ */
class StoreSet
{
public:
typedef unsigned SSID;
public:
+ /** Default constructor. init() must be called prior to use. */
StoreSet() { };
+ /** Creates store set predictor with given table sizes. */
StoreSet(int SSIT_size, int LFST_size);
+ /** Default destructor. */
~StoreSet();
+ /** Initializes the store set predictor with the given table sizes. */
void init(int SSIT_size, int LFST_size);
+ /** Records a memory ordering violation between the younger load
+ * and the older store. */
void violation(Addr store_PC, Addr load_PC);
+ /** Inserts a load into the store set predictor. This does nothing but
+ * is included in case other predictors require a similar function.
+ */
void insertLoad(Addr load_PC, InstSeqNum load_seq_num);
+ /** Inserts a store into the store set predictor. Updates the
+ * LFST if the store has a valid SSID. */
void insertStore(Addr store_PC, InstSeqNum store_seq_num,
unsigned tid);
+ /** Checks if the instruction with the given PC is dependent upon
+ * any store. @return Returns the sequence number of the store
+ * instruction this PC is dependent upon. Returns 0 if none.
+ */
InstSeqNum checkInst(Addr PC);
+ /** Records this PC/sequence number as issued. */
void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store);
+ /** Squashes for a specific thread until the given sequence number. */
void squash(InstSeqNum squashed_num, unsigned tid);
+ /** Resets all tables. */
void clear();
+ /** Debug function to dump the contents of the store list. */
+ void dump();
+
private:
+ /** Calculates the index into the SSIT based on the PC. */
inline int calcIndex(Addr PC)
{ return (PC >> offsetBits) & indexMask; }
+ /** Calculates a Store Set ID based on the PC. */
inline SSID calcSSID(Addr PC)
{ return ((PC ^ (PC >> 10)) % LFSTSize); }
+ /** The Store Set ID Table. */
std::vector<SSID> SSIT;
+ /** Bit vector to tell if the SSIT has a valid entry. */
std::vector<bool> validSSIT;
+ /** Last Fetched Store Table. */
std::vector<InstSeqNum> LFST;
+ /** Bit vector to tell if the LFST has a valid entry. */
std::vector<bool> validLFST;
+ /** Map of stores that have been inserted into the store set, but
+ * not yet issued or squashed.
+ */
std::map<InstSeqNum, int, ltseqnum> storeList;
typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt;
+ /** Store Set ID Table size, in entries. */
int SSITSize;
+ /** Last Fetched Store Table size, in entries. */
int LFSTSize;
+ /** Mask to obtain the index. */
int indexMask;
// HACK: Hardcoded for now.
diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh
index 9101eafb9..dfb1530d0 100644
--- a/src/cpu/o3/thread_state.hh
+++ b/src/cpu/o3/thread_state.hh
@@ -58,16 +58,26 @@ struct O3ThreadState : public ThreadState {
typedef ExecContext::Status Status;
typedef typename Impl::FullCPU FullCPU;
+ /** Current status of the thread. */
Status _status;
- // Current instruction
+ /** Current instruction the thread is committing. Only set and
+ * used for DTB faults currently.
+ */
TheISA::MachInst inst;
+
private:
+ /** Pointer to the CPU. */
FullCPU *cpu;
public:
-
+ /** Whether or not the thread is currently in syscall mode, and
+ * thus able to be externally updated without squashing.
+ */
bool inSyscall;
+ /** Whether or not the thread is currently waiting on a trap, and
+ * thus able to be externally updated without squashing.
+ */
bool trapPending;
#if FULL_SYSTEM
@@ -88,23 +98,34 @@ struct O3ThreadState : public ThreadState {
{ }
#endif
+ /** Pointer to the ExecContext of this thread. @todo: Don't call
+ this a proxy.*/
ExecContext *xcProxy;
+ /** Returns a pointer to the XC of this thread. */
ExecContext *getXCProxy() { return xcProxy; }
+ /** Returns the status of this thread. */
Status status() const { return _status; }
+ /** Sets the status of this thread. */
void setStatus(Status new_status) { _status = new_status; }
- bool misspeculating() { return false; }
-
+ /** Sets the current instruction being committed. */
void setInst(TheISA::MachInst _inst) { inst = _inst; }
+ /** Reads the number of instructions functionally executed and
+ * committed.
+ */
Counter readFuncExeInst() { return funcExeInst; }
+ /** Sets the total number of instructions functionally executed
+ * and committed.
+ */
void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
#if !FULL_SYSTEM
+ /** Handles the syscall. */
void syscall(int64_t callnum) { process->syscall(callnum, xcProxy); }
#endif
};
diff --git a/src/cpu/o3/tournament_pred.cc b/src/cpu/o3/tournament_pred.cc
index 361ef4770..7cf78dcb1 100644
--- a/src/cpu/o3/tournament_pred.cc
+++ b/src/cpu/o3/tournament_pred.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,6 +28,7 @@
* Authors: Kevin Lim
*/
+#include "base/intmath.hh"
#include "cpu/o3/tournament_pred.hh"
TournamentBP::TournamentBP(unsigned _localPredictorSize,
@@ -51,7 +52,9 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
choiceCtrBits(_choiceCtrBits),
instShiftAmt(_instShiftAmt)
{
- //Should do checks here to make sure sizes are correct (powers of 2)
+ if (!isPowerOf2(localPredictorSize)) {
+ fatal("Invalid local predictor size!\n");
+ }
//Setup the array of counters for the local predictor
localCtrs.resize(localPredictorSize);
@@ -59,6 +62,10 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
for (int i = 0; i < localPredictorSize; ++i)
localCtrs[i].setBits(localCtrBits);
+ if (!isPowerOf2(localHistoryTableSize)) {
+ fatal("Invalid local history table size!\n");
+ }
+
//Setup the history table for the local table
localHistoryTable.resize(localHistoryTableSize);
@@ -68,6 +75,10 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
// Setup the local history mask
localHistoryMask = (1 << localHistoryBits) - 1;
+ if (!isPowerOf2(globalPredictorSize)) {
+ fatal("Invalid global predictor size!\n");
+ }
+
//Setup the array of counters for the global predictor
globalCtrs.resize(globalPredictorSize);
@@ -79,12 +90,17 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
// Setup the global history mask
globalHistoryMask = (1 << globalHistoryBits) - 1;
+ if (!isPowerOf2(choicePredictorSize)) {
+ fatal("Invalid choice predictor size!\n");
+ }
+
//Setup the array of counters for the choice predictor
choiceCtrs.resize(choicePredictorSize);
for (int i = 0; i < choicePredictorSize; ++i)
choiceCtrs[i].setBits(choiceCtrBits);
+ // @todo: Allow for different thresholds between the predictors.
threshold = (1 << (localCtrBits - 1)) - 1;
threshold = threshold / 2;
}
@@ -93,165 +109,185 @@ inline
unsigned
TournamentBP::calcLocHistIdx(Addr &branch_addr)
{
+ // Get low order bits after removing instruction offset.
return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1);
}
inline
void
-TournamentBP::updateHistoriesTaken(unsigned local_history_idx)
+TournamentBP::updateGlobalHistTaken()
{
globalHistory = (globalHistory << 1) | 1;
globalHistory = globalHistory & globalHistoryMask;
-
- localHistoryTable[local_history_idx] =
- (localHistoryTable[local_history_idx] << 1) | 1;
}
inline
void
-TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx)
+TournamentBP::updateGlobalHistNotTaken()
{
globalHistory = (globalHistory << 1);
globalHistory = globalHistory & globalHistoryMask;
+}
+inline
+void
+TournamentBP::updateLocalHistTaken(unsigned local_history_idx)
+{
+ localHistoryTable[local_history_idx] =
+ (localHistoryTable[local_history_idx] << 1) | 1;
+}
+
+inline
+void
+TournamentBP::updateLocalHistNotTaken(unsigned local_history_idx)
+{
localHistoryTable[local_history_idx] =
(localHistoryTable[local_history_idx] << 1);
}
bool
-TournamentBP::lookup(Addr &branch_addr)
+TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
{
- uint8_t local_prediction;
+ bool local_prediction;
unsigned local_history_idx;
unsigned local_predictor_idx;
- uint8_t global_prediction;
- uint8_t choice_prediction;
+ bool global_prediction;
+ bool choice_prediction;
//Lookup in the local predictor to get its branch prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_idx = localHistoryTable[local_history_idx]
& localHistoryMask;
- local_prediction = localCtrs[local_predictor_idx].read();
+ local_prediction = localCtrs[local_predictor_idx].read() > threshold;
//Lookup in the global predictor to get its branch prediction
- global_prediction = globalCtrs[globalHistory].read();
+ global_prediction = globalCtrs[globalHistory].read() > threshold;
//Lookup in the choice predictor to see which one to use
- choice_prediction = choiceCtrs[globalHistory].read();
-
- //@todo Put a threshold value in for the three predictors that can
- // be set through the constructor (so this isn't hard coded).
- //Also should put some of this code into functions.
- if (choice_prediction > threshold) {
- if (global_prediction > threshold) {
- updateHistoriesTaken(local_history_idx);
-
- assert(globalHistory < globalPredictorSize &&
- local_history_idx < localPredictorSize);
-
- globalCtrs[globalHistory].increment();
- localCtrs[local_history_idx].increment();
-
+ choice_prediction = choiceCtrs[globalHistory].read() > threshold;
+
+ // Create BPHistory and pass it back to be recorded.
+ BPHistory *history = new BPHistory;
+ history->globalHistory = globalHistory;
+ history->localPredTaken = local_prediction;
+ history->globalPredTaken = global_prediction;
+ history->globalUsed = choice_prediction;
+ bp_history = (void *)history;
+
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
+
+ // Commented code is for doing speculative update of counters and
+ // all histories.
+ if (choice_prediction) {
+ if (global_prediction) {
+// updateHistoriesTaken(local_history_idx);
+// globalCtrs[globalHistory].increment();
+// localCtrs[local_history_idx].increment();
+ updateGlobalHistTaken();
return true;
} else {
- updateHistoriesNotTaken(local_history_idx);
-
- assert(globalHistory < globalPredictorSize &&
- local_history_idx < localPredictorSize);
-
- globalCtrs[globalHistory].decrement();
- localCtrs[local_history_idx].decrement();
-
+// updateHistoriesNotTaken(local_history_idx);
+// globalCtrs[globalHistory].decrement();
+// localCtrs[local_history_idx].decrement();
+ updateGlobalHistNotTaken();
return false;
}
} else {
- if (local_prediction > threshold) {
- updateHistoriesTaken(local_history_idx);
-
- assert(globalHistory < globalPredictorSize &&
- local_history_idx < localPredictorSize);
-
- globalCtrs[globalHistory].increment();
- localCtrs[local_history_idx].increment();
-
+ if (local_prediction) {
+// updateHistoriesTaken(local_history_idx);
+// globalCtrs[globalHistory].increment();
+// localCtrs[local_history_idx].increment();
+ updateGlobalHistTaken();
return true;
} else {
- updateHistoriesNotTaken(local_history_idx);
-
- assert(globalHistory < globalPredictorSize &&
- local_history_idx < localPredictorSize);
-
- globalCtrs[globalHistory].decrement();
- localCtrs[local_history_idx].decrement();
-
+// updateHistoriesNotTaken(local_history_idx);
+// globalCtrs[globalHistory].decrement();
+// localCtrs[local_history_idx].decrement();
+ updateGlobalHistNotTaken();
return false;
}
}
}
-// Update the branch predictor if it predicted a branch wrong.
void
-TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken)
+TournamentBP::uncondBr(void * &bp_history)
{
+ // Create BPHistory and pass it back to be recorded.
+ BPHistory *history = new BPHistory;
+ history->globalHistory = globalHistory;
+ history->localPredTaken = true;
+ history->globalPredTaken = true;
+ bp_history = static_cast<void *>(history);
+
+ updateGlobalHistTaken();
+}
- uint8_t local_prediction;
+void
+TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
+{
unsigned local_history_idx;
unsigned local_predictor_idx;
- bool local_pred_taken;
+ unsigned local_predictor_hist;
- uint8_t global_prediction;
- bool global_pred_taken;
-
- // Load the correct global history into the register.
- globalHistory = correct_gh;
-
- // Get the local predictor's current prediction, remove the incorrect
- // update, and update the local predictor
+ // Get the local predictor's current prediction
local_history_idx = calcLocHistIdx(branch_addr);
- local_predictor_idx = localHistoryTable[local_history_idx];
- local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask;
-
- local_prediction = localCtrs[local_predictor_idx].read();
- local_pred_taken = local_prediction > threshold;
-
- //Get the global predictor's current prediction, and update the
- //global predictor
- global_prediction = globalCtrs[globalHistory].read();
- global_pred_taken = global_prediction > threshold;
-
- //Update the choice predictor to tell it which one was correct
- if (local_pred_taken != global_pred_taken) {
- //If the local prediction matches the actual outcome, decerement
- //the counter. Otherwise increment the counter.
- if (local_pred_taken == taken) {
- choiceCtrs[globalHistory].decrement();
- } else {
- choiceCtrs[globalHistory].increment();
+ local_predictor_hist = localHistoryTable[local_history_idx];
+ local_predictor_idx = local_predictor_hist & localHistoryMask;
+
+ // Update the choice predictor to tell it which one was correct if
+ // there was a prediction.
+ if (bp_history) {
+ BPHistory *history = static_cast<BPHistory *>(bp_history);
+ if (history->localPredTaken != history->globalPredTaken) {
+ // If the local prediction matches the actual outcome,
+ // decerement the counter. Otherwise increment the
+ // counter.
+ if (history->localPredTaken == taken) {
+ choiceCtrs[globalHistory].decrement();
+ } else if (history->globalPredTaken == taken){
+ choiceCtrs[globalHistory].increment();
+ }
}
+
+ // We're done with this history, now delete it.
+ delete history;
}
- if (taken) {
- assert(globalHistory < globalPredictorSize &&
- local_predictor_idx < localPredictorSize);
+ assert(globalHistory < globalPredictorSize &&
+ local_predictor_idx < localPredictorSize);
+ // Update the counters and local history with the proper
+ // resolution of the branch. Global history is updated
+ // speculatively and restored upon squash() calls, so it does not
+ // need to be updated.
+ if (taken) {
localCtrs[local_predictor_idx].increment();
globalCtrs[globalHistory].increment();
- globalHistory = (globalHistory << 1) | 1;
- globalHistory = globalHistory & globalHistoryMask;
-
- localHistoryTable[local_history_idx] |= 1;
+ updateLocalHistTaken(local_history_idx);
} else {
- assert(globalHistory < globalPredictorSize &&
- local_predictor_idx < localPredictorSize);
-
localCtrs[local_predictor_idx].decrement();
globalCtrs[globalHistory].decrement();
- globalHistory = (globalHistory << 1);
- globalHistory = globalHistory & globalHistoryMask;
-
- localHistoryTable[local_history_idx] &= ~1;
+ updateLocalHistNotTaken(local_history_idx);
}
}
+
+void
+TournamentBP::squash(void *bp_history)
+{
+ BPHistory *history = static_cast<BPHistory *>(bp_history);
+
+ // Restore global history to state prior to this branch.
+ globalHistory = history->globalHistory;
+
+ // Delete this BPHistory now that we're done with it.
+ delete history;
+}
+
+#ifdef DEBUG
+int
+TournamentBP::BPHistory::newCount = 0;
+#endif
diff --git a/src/cpu/o3/tournament_pred.hh b/src/cpu/o3/tournament_pred.hh
index e16600090..92402adc6 100644
--- a/src/cpu/o3/tournament_pred.hh
+++ b/src/cpu/o3/tournament_pred.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -36,6 +36,15 @@
#include "cpu/o3/sat_counter.hh"
#include <vector>
+/**
+ * Implements a tournament branch predictor, hopefully identical to the one
+ * used in the 21264. It has a local predictor, which uses a local history
+ * table to index into a table of counters, and a global predictor, which
+ * uses a global history to index into a table of counters. A choice
+ * predictor chooses between the two. Only the global history register
+ * is speculatively updated, the rest are updated upon branches committing
+ * or misspeculating.
+ */
class TournamentBP
{
public:
@@ -55,30 +64,95 @@ class TournamentBP
/**
* Looks up the given address in the branch predictor and returns
- * a true/false value as to whether it is taken.
+ * a true/false value as to whether it is taken. Also creates a
+ * BPHistory object to store any state it will need on squash/update.
* @param branch_addr The address of the branch to look up.
+ * @param bp_history Pointer that will be set to the BPHistory object.
* @return Whether or not the branch is taken.
*/
- bool lookup(Addr &branch_addr);
+ bool lookup(Addr &branch_addr, void * &bp_history);
+
+ /**
+ * Records that there was an unconditional branch, and modifies
+ * the bp history to point to an object that has the previous
+ * global history stored in it.
+ * @param bp_history Pointer that will be set to the BPHistory object.
+ */
+ void uncondBr(void * &bp_history);
/**
* Updates the branch predictor with the actual result of a branch.
* @param branch_addr The address of the branch to update.
* @param taken Whether or not the branch was taken.
+ * @param bp_history Pointer to the BPHistory object that was created
+ * when the branch was predicted.
+ */
+ void update(Addr &branch_addr, bool taken, void *bp_history);
+
+ /**
+ * Restores the global branch history on a squash.
+ * @param bp_history Pointer to the BPHistory object that has the
+ * previous global branch history in it.
*/
- void update(Addr &branch_addr, unsigned global_history, bool taken);
+ void squash(void *bp_history);
+ /** Returns the global history. */
inline unsigned readGlobalHist() { return globalHistory; }
private:
-
+ /**
+ * Returns if the branch should be taken or not, given a counter
+ * value.
+ * @param count The counter value.
+ */
inline bool getPrediction(uint8_t &count);
+ /**
+ * Returns the local history index, given a branch address.
+ * @param branch_addr The branch's PC address.
+ */
inline unsigned calcLocHistIdx(Addr &branch_addr);
- inline void updateHistoriesTaken(unsigned local_history_idx);
+ /** Updates global history as taken. */
+ inline void updateGlobalHistTaken();
- inline void updateHistoriesNotTaken(unsigned local_history_idx);
+ /** Updates global history as not taken. */
+ inline void updateGlobalHistNotTaken();
+
+ /**
+ * Updates local histories as taken.
+ * @param local_history_idx The local history table entry that
+ * will be updated.
+ */
+ inline void updateLocalHistTaken(unsigned local_history_idx);
+
+ /**
+ * Updates local histories as not taken.
+ * @param local_history_idx The local history table entry that
+ * will be updated.
+ */
+ inline void updateLocalHistNotTaken(unsigned local_history_idx);
+
+ /**
+ * The branch history information that is created upon predicting
+ * a branch. It will be passed back upon updating and squashing,
+ * when the BP can use this information to update/restore its
+ * state properly.
+ */
+ struct BPHistory {
+#ifdef DEBUG
+ BPHistory()
+ { newCount++; }
+ ~BPHistory()
+ { newCount--; }
+
+ static int newCount;
+#endif
+ unsigned globalHistory;
+ bool localPredTaken;
+ bool globalPredTaken;
+ bool globalUsed;
+ };
/** Local counters. */
std::vector<SatCounter> localCtrs;
@@ -103,7 +177,6 @@ class TournamentBP
/** Mask to get the proper local history. */
unsigned localHistoryMask;
-
/** Array of counters that make up the global predictor. */
std::vector<SatCounter> globalCtrs;
@@ -122,7 +195,6 @@ class TournamentBP
/** Mask to get the proper global history. */
unsigned globalHistoryMask;
-
/** Array of counters that make up the choice predictor. */
std::vector<SatCounter> choiceCtrs;