summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--SConscript4
-rw-r--r--cpu/base_cpu.cc10
-rw-r--r--cpu/base_dyn_inst.cc8
-rw-r--r--cpu/base_dyn_inst.hh26
-rw-r--r--cpu/beta_cpu/2bit_local_pred.cc24
-rw-r--r--cpu/beta_cpu/alpha_dyn_inst.hh13
-rw-r--r--cpu/beta_cpu/alpha_full_cpu.hh142
-rw-r--r--cpu/beta_cpu/alpha_full_cpu_builder.cc63
-rw-r--r--cpu/beta_cpu/alpha_full_cpu_impl.hh25
-rw-r--r--cpu/beta_cpu/alpha_params.hh23
-rw-r--r--cpu/beta_cpu/bpred_unit.cc3
-rw-r--r--cpu/beta_cpu/bpred_unit.hh64
-rw-r--r--cpu/beta_cpu/bpred_unit_impl.hh242
-rw-r--r--cpu/beta_cpu/btb.cc6
-rw-r--r--cpu/beta_cpu/comm.hh22
-rw-r--r--cpu/beta_cpu/commit.hh13
-rw-r--r--cpu/beta_cpu/commit_impl.hh127
-rw-r--r--cpu/beta_cpu/cpu_policy.hh2
-rw-r--r--cpu/beta_cpu/decode.hh11
-rw-r--r--cpu/beta_cpu/decode_impl.hh87
-rw-r--r--cpu/beta_cpu/fetch.hh55
-rw-r--r--cpu/beta_cpu/fetch_impl.hh537
-rw-r--r--cpu/beta_cpu/free_list.hh8
-rw-r--r--cpu/beta_cpu/full_cpu.cc46
-rw-r--r--cpu/beta_cpu/full_cpu.hh5
-rw-r--r--cpu/beta_cpu/iew.hh31
-rw-r--r--cpu/beta_cpu/iew_impl.hh406
-rw-r--r--cpu/beta_cpu/inst_queue.cc3
-rw-r--r--cpu/beta_cpu/inst_queue.hh36
-rw-r--r--cpu/beta_cpu/inst_queue_impl.hh190
-rw-r--r--cpu/beta_cpu/mem_dep_unit.hh92
-rw-r--r--cpu/beta_cpu/mem_dep_unit_impl.hh300
-rw-r--r--cpu/beta_cpu/ras.cc42
-rw-r--r--cpu/beta_cpu/ras.hh40
-rw-r--r--cpu/beta_cpu/regfile.hh12
-rw-r--r--cpu/beta_cpu/rename.hh18
-rw-r--r--cpu/beta_cpu/rename_impl.hh195
-rw-r--r--cpu/beta_cpu/rename_map.cc4
-rw-r--r--cpu/beta_cpu/rob_impl.hh4
-rw-r--r--cpu/beta_cpu/store_set.cc68
-rw-r--r--cpu/beta_cpu/store_set.hh2
-rw-r--r--cpu/beta_cpu/tournament_pred.cc243
-rw-r--r--cpu/beta_cpu/tournament_pred.hh160
43 files changed, 2681 insertions, 731 deletions
diff --git a/SConscript b/SConscript
index fb2b40325..8a9b99cb5 100644
--- a/SConscript
+++ b/SConscript
@@ -106,10 +106,12 @@ base_sources = Split('''
cpu/beta_cpu/inst_queue.cc
cpu/beta_cpu/ldstq.cc
cpu/beta_cpu/mem_dep_unit.cc
+ cpu/beta_cpu/ras.cc
cpu/beta_cpu/rename.cc
cpu/beta_cpu/rename_map.cc
cpu/beta_cpu/rob.cc
cpu/beta_cpu/store_set.cc
+ cpu/beta_cpu/tournament_pred.cc
cpu/fast_cpu/fast_cpu.cc
cpu/full_cpu/bpred.cc
cpu/full_cpu/commit.cc
@@ -481,7 +483,7 @@ env.Append(CPPPATH='.')
# Debug binary
debug = env.Copy(OBJSUFFIX='.do')
-debug.Append(CCFLAGS=Split('-g -gstabs+ -O0'))
+debug.Append(CCFLAGS=Split('-g -gstabs+ -O0 -lefence'))
debug.Append(CPPDEFINES='DEBUG')
debug.Program(target = 'm5.debug', source = make_objs(sources, debug))
diff --git a/cpu/base_cpu.cc b/cpu/base_cpu.cc
index 3ee7a3892..988c7a602 100644
--- a/cpu/base_cpu.cc
+++ b/cpu/base_cpu.cc
@@ -37,6 +37,8 @@
#include "sim/param.hh"
#include "sim/sim_events.hh"
+#include "base/trace.hh"
+
using namespace std;
vector<BaseCPU *> BaseCPU::cpuList;
@@ -46,6 +48,7 @@ vector<BaseCPU *> BaseCPU::cpuList;
// been initialized
int maxThreadsPerCPU = 1;
+extern void debug_break();
#ifdef FULL_SYSTEM
BaseCPU::BaseCPU(const string &_name, int _number_of_threads,
Counter max_insts_any_thread,
@@ -64,9 +67,16 @@ BaseCPU::BaseCPU(const string &_name, int _number_of_threads,
: SimObject(_name), number_of_threads(_number_of_threads)
#endif
{
+ DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this);
+
+ debug_break();
+
// add self to global list of CPUs
cpuList.push_back(this);
+ DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n",
+ this);
+
if (number_of_threads > maxThreadsPerCPU)
maxThreadsPerCPU = number_of_threads;
diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc
index c527eb08b..74f6b8a6c 100644
--- a/cpu/base_dyn_inst.cc
+++ b/cpu/base_dyn_inst.cc
@@ -83,7 +83,7 @@ BaseDynInst<Impl>::BaseDynInst(MachInst machInst, Addr inst_PC,
seqNum = seq_num;
- specMemWrite = false;
+// specMemWrite = false;
canIssue = false;
issued = false;
@@ -95,7 +95,7 @@ BaseDynInst<Impl>::BaseDynInst(MachInst machInst, Addr inst_PC,
blockingInst = false;
recoverInst = false;
specMode = false;
- btbMissed = false;
+// btbMissed = false;
// Eventually make this a parameter.
threadNumber = 0;
// Also make this a parameter.
@@ -139,12 +139,12 @@ BaseDynInst<Impl>::BaseDynInst(StaticInstPtr<ISA> &_staticInst)
effAddr = MemReq::inval_addr;
physEffAddr = MemReq::inval_addr;
- specMemWrite = false;
+// specMemWrite = false;
blockingInst = false;
recoverInst = false;
specMode = false;
- btbMissed = false;
+// btbMissed = false;
// Make sure to have the renamed register entries set to the same
// as the normal register entries. It will allow the IQ to work
diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh
index fe30b5195..171721e61 100644
--- a/cpu/base_dyn_inst.hh
+++ b/cpu/base_dyn_inst.hh
@@ -146,7 +146,10 @@ class BaseDynInst : public FastAlloc, public RefCounted
bool threadsyncWait;
/** If the BTB missed. */
- bool btbMissed;
+// bool btbMissed;
+
+ /** The global history of this instruction (branch). */
+// unsigned globalHistory;
/** The thread this instruction is from. */
short threadNumber;
@@ -212,7 +215,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
static int instcount;
/** Did this instruction do a spec write? */
- bool specMemWrite;
+// bool specMemWrite;
private:
/** Physical register index of the destination registers of this
@@ -287,15 +290,22 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Returns whether the instruction was predicted taken or not. */
bool predTaken() {
-// DPRINTF(FullCPU, "PC: %08p\n", PC);
-// DPRINTF(FullCPU, "predPC: %08p\n", predPC);
-
return( predPC != (PC + sizeof(MachInst) ) );
}
/** Returns whether the instruction mispredicted. */
bool mispredicted() { return (predPC != nextPC); }
+/*
+ unsigned readGlobalHist() {
+ return globalHistory;
+ }
+
+ void setGlobalHist(unsigned history) {
+ globalHistory = history;
+ }
+*/
+
//
// Instruction types. Forward checks to StaticInst object.
//
@@ -452,7 +462,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
OpClass opClass() const { return staticInst->opClass(); }
/** Returns whether or not the BTB missed. */
- bool btbMiss() const { return btbMissed; }
+// bool btbMiss() const { return btbMissed; }
/** Returns the branch target address. */
Addr branchTarget() const { return staticInst->branchTarget(PC); }
@@ -579,8 +589,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
storeSize = sizeof(T);
storeData = data;
- if (specMode)
- specMemWrite = true;
+// if (specMode)
+// specMemWrite = true;
MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags);
diff --git a/cpu/beta_cpu/2bit_local_pred.cc b/cpu/beta_cpu/2bit_local_pred.cc
index 88c39a9b0..ef7f23d49 100644
--- a/cpu/beta_cpu/2bit_local_pred.cc
+++ b/cpu/beta_cpu/2bit_local_pred.cc
@@ -75,18 +75,34 @@ DefaultBP::getLocalIndex(Addr &branch_addr)
bool
DefaultBP::lookup(Addr &branch_addr)
{
+ bool taken;
uint8_t local_prediction;
unsigned local_predictor_idx = getLocalIndex(branch_addr);
DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n",
local_predictor_idx);
+ assert(local_predictor_idx < localPredictorSize);
+
local_prediction = localCtrs[local_predictor_idx].read();
DPRINTF(Fetch, "Branch predictor: prediction is %i.\n",
(int)local_prediction);
- return getPrediction(local_prediction);
+ taken = getPrediction(local_prediction);
+
+#if 0
+ // Speculative update.
+ if (taken) {
+ DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n");
+ localCtrs[local_predictor_idx].increment();
+ } else {
+ DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n");
+ localCtrs[local_predictor_idx].decrement();
+ }
+#endif
+
+ return taken;
}
void
@@ -100,11 +116,17 @@ DefaultBP::update(Addr &branch_addr, bool taken)
DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n",
local_predictor_idx);
+ assert(local_predictor_idx < localPredictorSize);
+
+ // Increment or decrement twice to undo speculative update, then
+ // properly update
if (taken) {
DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n");
localCtrs[local_predictor_idx].increment();
+// localCtrs[local_predictor_idx].increment();
} else {
DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n");
localCtrs[local_predictor_idx].decrement();
+// localCtrs[local_predictor_idx].decrement();
}
}
diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh
index 4e1cebd11..c964762db 100644
--- a/cpu/beta_cpu/alpha_dyn_inst.hh
+++ b/cpu/beta_cpu/alpha_dyn_inst.hh
@@ -19,19 +19,19 @@ template <class Impl>
class AlphaDynInst : public BaseDynInst<Impl>
{
public:
- // Typedef for the CPU.
+ /** Typedef for the CPU. */
typedef typename Impl::FullCPU FullCPU;
- //Typedef to get the ISA.
+ /** Typedef to get the ISA. */
typedef typename Impl::ISA ISA;
- /// Binary machine instruction type.
+ /** Binary machine instruction type. */
typedef typename ISA::MachInst MachInst;
- /// Memory address type.
+ /** Memory address type. */
typedef typename ISA::Addr Addr;
- /// Logical register index type.
+ /** Logical register index type. */
typedef typename ISA::RegIndex RegIndex;
- /// Integer register index type.
+ /** Integer register index type. */
typedef typename ISA::IntReg IntReg;
enum {
@@ -54,6 +54,7 @@ class AlphaDynInst : public BaseDynInst<Impl>
return fault;
}
+ public:
uint64_t readUniq();
void setUniq(uint64_t val);
diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh
index 0e094b122..e01eba3bf 100644
--- a/cpu/beta_cpu/alpha_full_cpu.hh
+++ b/cpu/beta_cpu/alpha_full_cpu.hh
@@ -29,6 +29,8 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
#endif
public:
+ void regStats();
+
#ifdef FULL_SYSTEM
bool inPalMode();
@@ -66,14 +68,17 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
return No_Fault;
}
+
Fault translateInstReq(MemReqPtr &req)
{
return dummyTranslation(req);
}
+
Fault translateDataReadReq(MemReqPtr &req)
{
return dummyTranslation(req);
}
+
Fault translateDataWriteReq(MemReqPtr &req)
{
return dummyTranslation(req);
@@ -81,73 +86,6 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
#endif
- template <class T>
- Fault read(MemReqPtr &req, T &data)
- {
-#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM)
- if (req->flags & LOCKED) {
- MiscRegFile *cregs = &req->xc->regs.miscRegs;
- cregs->lock_addr = req->paddr;
- cregs->lock_flag = true;
- }
-#endif
-
- Fault error;
- error = mem->read(req, data);
- data = htoa(data);
- return error;
- }
-
- template <class T>
- Fault write(MemReqPtr &req, T &data)
- {
-#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM)
-
- MiscRegFile *cregs;
-
- // If this is a store conditional, act appropriately
- if (req->flags & LOCKED) {
- cregs = &xc->regs.miscRegs;
-
- if (req->flags & UNCACHEABLE) {
- // Don't update result register (see stq_c in isa_desc)
- req->result = 2;
- req->xc->storeCondFailures = 0;//Needed? [RGD]
- } else {
- req->result = cregs->lock_flag;
- if (!cregs->lock_flag ||
- ((cregs->lock_addr & ~0xf) != (req->paddr & ~0xf))) {
- cregs->lock_flag = false;
- if (((++req->xc->storeCondFailures) % 100000) == 0) {
- std::cerr << "Warning: "
- << req->xc->storeCondFailures
- << " consecutive store conditional failures "
- << "on cpu " << cpu_id
- << std::endl;
- }
- return No_Fault;
- }
- else req->xc->storeCondFailures = 0;
- }
- }
-
- // Need to clear any locked flags on other proccessors for
- // this address. Only do this for succsful Store Conditionals
- // and all other stores (WH64?). Unsuccessful Store
- // Conditionals would have returned above, and wouldn't fall
- // through.
- for (int i = 0; i < system->execContexts.size(); i++){
- cregs = &system->execContexts[i]->regs.miscRegs;
- if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) {
- cregs->lock_flag = false;
- }
- }
-
-#endif
-
- return mem->write(req, (T)htoa(data));
- }
-
// Later on may want to remove this misc stuff from the regfile and
// have it handled at this level. Might prove to be an issue when
// trying to rename source/destination registers...
@@ -240,6 +178,76 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
// Called by initCPU. Implement as I please.
void initIPRs(RegFile *regs);
#endif
+
+
+ template <class T>
+ Fault read(MemReqPtr &req, T &data)
+ {
+#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM)
+ if (req->flags & LOCKED) {
+ MiscRegFile *cregs = &req->xc->regs.miscRegs;
+ cregs->lock_addr = req->paddr;
+ cregs->lock_flag = true;
+ }
+#endif
+
+ Fault error;
+ error = mem->read(req, data);
+ data = htoa(data);
+ return error;
+ }
+
+
+ template <class T>
+ Fault write(MemReqPtr &req, T &data)
+ {
+#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM)
+
+ MiscRegFile *cregs;
+
+ // If this is a store conditional, act appropriately
+ if (req->flags & LOCKED) {
+ cregs = &xc->regs.miscRegs;
+
+ if (req->flags & UNCACHEABLE) {
+ // Don't update result register (see stq_c in isa_desc)
+ req->result = 2;
+ req->xc->storeCondFailures = 0;//Needed? [RGD]
+ } else {
+ req->result = cregs->lock_flag;
+ if (!cregs->lock_flag ||
+ ((cregs->lock_addr & ~0xf) != (req->paddr & ~0xf))) {
+ cregs->lock_flag = false;
+ if (((++req->xc->storeCondFailures) % 100000) == 0) {
+ std::cerr << "Warning: "
+ << req->xc->storeCondFailures
+ << " consecutive store conditional failures "
+ << "on cpu " << cpu_id
+ << std::endl;
+ }
+ return No_Fault;
+ }
+ else req->xc->storeCondFailures = 0;
+ }
+ }
+
+ // Need to clear any locked flags on other proccessors for
+ // this address. Only do this for succsful Store Conditionals
+ // and all other stores (WH64?). Unsuccessful Store
+ // Conditionals would have returned above, and wouldn't fall
+ // through.
+ for (int i = 0; i < system->execContexts.size(); i++){
+ cregs = &system->execContexts[i]->regs.miscRegs;
+ if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) {
+ cregs->lock_flag = false;
+ }
+ }
+
+#endif
+
+ return mem->write(req, (T)htoa(data));
+ }
+
};
#endif // __ALPHA_FULL_CPU_HH__
diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc
index 5fe96d656..f37081232 100644
--- a/cpu/beta_cpu/alpha_full_cpu_builder.cc
+++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc
@@ -81,17 +81,38 @@ Param<unsigned> issueWidth;
Param<unsigned> executeWidth;
Param<unsigned> executeIntWidth;
Param<unsigned> executeFloatWidth;
+Param<unsigned> executeBranchWidth;
+Param<unsigned> executeMemoryWidth;
Param<unsigned> iewToCommitDelay;
Param<unsigned> renameToROBDelay;
Param<unsigned> commitWidth;
Param<unsigned> squashWidth;
+#if 0
Param<unsigned> localPredictorSize;
Param<unsigned> localPredictorCtrBits;
+#endif
+Param<unsigned> local_predictor_size;
+Param<unsigned> local_ctr_bits;
+Param<unsigned> local_history_table_size;
+Param<unsigned> local_history_bits;
+Param<unsigned> global_predictor_size;
+Param<unsigned> global_ctr_bits;
+Param<unsigned> global_history_bits;
+Param<unsigned> choice_predictor_size;
+Param<unsigned> choice_ctr_bits;
+
Param<unsigned> BTBEntries;
Param<unsigned> BTBTagSize;
+Param<unsigned> RASSize;
+
+Param<unsigned> LQEntries;
+Param<unsigned> SQEntries;
+Param<unsigned> LFSTSize;
+Param<unsigned> SSITSize;
+
Param<unsigned> numPhysIntRegs;
Param<unsigned> numPhysFloatRegs;
Param<unsigned> numIQEntries;
@@ -168,6 +189,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
INIT_PARAM(executeWidth, "Execute width"),
INIT_PARAM(executeIntWidth, "Integer execute width"),
INIT_PARAM(executeFloatWidth, "Floating point execute width"),
+ INIT_PARAM(executeBranchWidth, "Branch execute width"),
+ INIT_PARAM(executeMemoryWidth, "Memory execute width"),
INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
"delay"),
@@ -175,12 +198,30 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
INIT_PARAM(commitWidth, "Commit width"),
INIT_PARAM(squashWidth, "Squash width"),
+#if 0
INIT_PARAM(localPredictorSize, "Size of the local predictor in entries. "
"Must be a power of 2."),
INIT_PARAM(localPredictorCtrBits, "Number of bits per counter for bpred"),
+#endif
+ INIT_PARAM(local_predictor_size, "Size of local predictor"),
+ INIT_PARAM(local_ctr_bits, "Bits per counter"),
+ INIT_PARAM(local_history_table_size, "Size of local history table"),
+ INIT_PARAM(local_history_bits, "Bits for the local history"),
+ INIT_PARAM(global_predictor_size, "Size of global predictor"),
+ INIT_PARAM(global_ctr_bits, "Bits per counter"),
+ INIT_PARAM(global_history_bits, "Bits of history"),
+ INIT_PARAM(choice_predictor_size, "Size of choice predictor"),
+ INIT_PARAM(choice_ctr_bits, "Bits of choice counters"),
+
INIT_PARAM(BTBEntries, "Number of BTB entries"),
INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+ INIT_PARAM(RASSize, "RAS size"),
+
+ INIT_PARAM(LQEntries, "Number of load queue entries"),
+ INIT_PARAM(SQEntries, "Number of store queue entries"),
+ INIT_PARAM(LFSTSize, "Last fetched store table size"),
+ INIT_PARAM(SSITSize, "Store set ID table size"),
INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
@@ -277,17 +318,37 @@ CREATE_SIM_OBJECT(BaseFullCPU)
params.executeWidth = executeWidth;
params.executeIntWidth = executeIntWidth;
params.executeFloatWidth = executeFloatWidth;
+ params.executeBranchWidth = executeBranchWidth;
+ params.executeMemoryWidth = executeMemoryWidth;
params.iewToCommitDelay = iewToCommitDelay;
params.renameToROBDelay = renameToROBDelay;
params.commitWidth = commitWidth;
params.squashWidth = squashWidth;
-
+#if 0
params.localPredictorSize = localPredictorSize;
params.localPredictorCtrBits = localPredictorCtrBits;
+#endif
+ params.local_predictor_size = local_predictor_size;
+ params.local_ctr_bits = local_ctr_bits;
+ params.local_history_table_size = local_history_table_size;
+ params.local_history_bits = local_history_bits;
+ params.global_predictor_size = global_predictor_size;
+ params.global_ctr_bits = global_ctr_bits;
+ params.global_history_bits = global_history_bits;
+ params.choice_predictor_size = choice_predictor_size;
+ params.choice_ctr_bits = choice_ctr_bits;
+
params.BTBEntries = BTBEntries;
params.BTBTagSize = BTBTagSize;
+ params.RASSize = RASSize;
+
+ params.LQEntries = LQEntries;
+ params.SQEntries = SQEntries;
+ params.SSITSize = SSITSize;
+ params.LFSTSize = LFSTSize;
+
params.numPhysIntRegs = numPhysIntRegs;
params.numPhysFloatRegs = numPhysFloatRegs;
params.numIQEntries = numIQEntries;
diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh
index 8bfc0777e..ee8f9f33b 100644
--- a/cpu/beta_cpu/alpha_full_cpu_impl.hh
+++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh
@@ -27,6 +27,19 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params &params)
rob.setCPU(this);
}
+template <class Impl>
+void
+AlphaFullCPU<Impl>::regStats()
+{
+ // Register stats for everything that has stats.
+ fullCPURegStats();
+ fetch.regStats();
+ decode.regStats();
+ rename.regStats();
+ iew.regStats();
+ commit.regStats();
+}
+
#ifndef FULL_SYSTEM
template <class Impl>
@@ -92,6 +105,14 @@ AlphaFullCPU<Impl>::squashStages()
rob.squash(rob_head);
commit.setSquashing();
+
+ // Now hack the time buffer to clear the sequence numbers in the places
+ // where the stages might read it.?
+ for (int i = 0; i < 5; ++i)
+ {
+ timeBuffer.access(-i)->commitInfo.doneSeqNum = 0;
+ }
+
}
#endif // FULL_SYSTEM
@@ -178,7 +199,7 @@ template <class Impl>
uint64_t *
AlphaFullCPU<Impl>::getIpr()
{
- return regs.ipr;
+ return regFile.getIpr();
}
template <class Impl>
@@ -564,7 +585,7 @@ AlphaFullCPU<Impl>::setIntrFlag(int val)
regs.intrflag = val;
}
-// Maybe have this send back from IEW stage to squash and update PC.
+// Can force commit stage to squash and stuff.
template <class Impl>
Fault
AlphaFullCPU<Impl>::hwrei()
diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh
index 92dfd35f5..ecde4b016 100644
--- a/cpu/beta_cpu/alpha_params.hh
+++ b/cpu/beta_cpu/alpha_params.hh
@@ -72,6 +72,8 @@ class AlphaSimpleParams : public BaseFullCPU::Params
unsigned executeWidth;
unsigned executeIntWidth;
unsigned executeFloatWidth;
+ unsigned executeBranchWidth;
+ unsigned executeMemoryWidth;
//
// Commit
@@ -84,11 +86,26 @@ class AlphaSimpleParams : public BaseFullCPU::Params
//
// Branch predictor (BP & BTB)
//
+/*
unsigned localPredictorSize;
unsigned localPredictorCtrBits;
+*/
+
+ unsigned local_predictor_size;
+ unsigned local_ctr_bits;
+ unsigned local_history_table_size;
+ unsigned local_history_bits;
+ unsigned global_predictor_size;
+ unsigned global_ctr_bits;
+ unsigned global_history_bits;
+ unsigned choice_predictor_size;
+ unsigned choice_ctr_bits;
+
unsigned BTBEntries;
unsigned BTBTagSize;
+ unsigned RASSize;
+
//
// Load store queue
//
@@ -96,6 +113,12 @@ class AlphaSimpleParams : public BaseFullCPU::Params
unsigned SQEntries;
//
+ // Memory dependence
+ //
+ unsigned SSITSize;
+ unsigned LFSTSize;
+
+ //
// Miscellaneous
//
unsigned numPhysIntRegs;
diff --git a/cpu/beta_cpu/bpred_unit.cc b/cpu/beta_cpu/bpred_unit.cc
index 6de2def44..c4a79fbbe 100644
--- a/cpu/beta_cpu/bpred_unit.cc
+++ b/cpu/beta_cpu/bpred_unit.cc
@@ -1,5 +1,6 @@
#include "cpu/beta_cpu/bpred_unit_impl.hh"
#include "cpu/beta_cpu/alpha_impl.hh"
+#include "cpu/beta_cpu/alpha_dyn_inst.hh"
-template DefaultBPredUnit<AlphaSimpleImpl>;
+template TwobitBPredUnit<AlphaSimpleImpl>;
diff --git a/cpu/beta_cpu/bpred_unit.hh b/cpu/beta_cpu/bpred_unit.hh
index 71191f5b7..53c7146c5 100644
--- a/cpu/beta_cpu/bpred_unit.hh
+++ b/cpu/beta_cpu/bpred_unit.hh
@@ -4,9 +4,15 @@
// For Addr type.
#include "arch/alpha/isa_traits.hh"
+#include "base/statistics.hh"
+#include "cpu/inst_seq.hh"
#include "cpu/beta_cpu/2bit_local_pred.hh"
+#include "cpu/beta_cpu/tournament_pred.hh"
#include "cpu/beta_cpu/btb.hh"
+#include "cpu/beta_cpu/ras.hh"
+
+#include <list>
/**
* Basically a wrapper class to hold both the branch predictor
@@ -18,34 +24,86 @@
* object, and be able to call the constructors on the BP and BTB.
*/
template<class Impl>
-class DefaultBPredUnit
+class TwobitBPredUnit
{
public:
typedef typename Impl::Params Params;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ TwobitBPredUnit(Params &params);
+
+ void regStats();
+
+ bool predict(DynInstPtr &inst, Addr &PC);
+
+ void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
+ bool actually_taken);
- DefaultBPredUnit(Params &params);
+ void squash(const InstSeqNum &squashed_sn);
+
+ void update(const InstSeqNum &done_sn);
bool BPLookup(Addr &inst_PC)
{ return BP.lookup(inst_PC); }
+ unsigned BPReadGlobalHist()
+ { return 0; }
+
bool BTBValid(Addr &inst_PC)
{ return BTB.valid(inst_PC); }
Addr BTBLookup(Addr &inst_PC)
{ return BTB.lookup(inst_PC); }
- void BPUpdate(Addr &inst_PC, bool taken)
+ // Will want to include global history.
+ void BPUpdate(Addr &inst_PC, unsigned global_history, bool taken)
{ BP.update(inst_PC, taken); }
void BTBUpdate(Addr &inst_PC, Addr &target_PC)
{ BTB.update(inst_PC, target_PC); }
private:
+ struct PredictorHistory {
+ PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC,
+ const bool pred_taken)
+ : seqNum(seq_num), PC(inst_PC), predTaken(pred_taken),
+ globalHistory(0), usedRAS(0), wasCall(0), RASIndex(0),
+ RASTarget(0)
+ { }
+
+ InstSeqNum seqNum;
+
+ Addr PC;
+
+ bool predTaken;
+
+ unsigned globalHistory;
+
+ bool usedRAS;
+
+ bool wasCall;
+
+ unsigned RASIndex;
+
+ Addr RASTarget;
+ };
+
+ std::list<PredictorHistory> predHist;
DefaultBP BP;
DefaultBTB BTB;
+ ReturnAddrStack RAS;
+
+ Stats::Scalar<> lookups;
+ Stats::Scalar<> condPredicted;
+ Stats::Scalar<> condIncorrect;
+ Stats::Scalar<> BTBLookups;
+ Stats::Scalar<> BTBHits;
+ Stats::Scalar<> BTBCorrect;
+ Stats::Scalar<> usedRAS;
+ Stats::Scalar<> RASIncorrect;
};
#endif // __BPRED_UNIT_HH__
diff --git a/cpu/beta_cpu/bpred_unit_impl.hh b/cpu/beta_cpu/bpred_unit_impl.hh
index 47415ce9b..02c613d34 100644
--- a/cpu/beta_cpu/bpred_unit_impl.hh
+++ b/cpu/beta_cpu/bpred_unit_impl.hh
@@ -1,13 +1,247 @@
#include "cpu/beta_cpu/bpred_unit.hh"
+#include "base/traceflags.hh"
+#include "base/trace.hh"
template<class Impl>
-DefaultBPredUnit<Impl>::DefaultBPredUnit(Params &params)
- : BP(params.localPredictorSize,
- params.localPredictorCtrBits,
+TwobitBPredUnit<Impl>::TwobitBPredUnit(Params &params)
+ : BP(params.local_predictor_size,
+ params.local_ctr_bits,
params.instShiftAmt),
BTB(params.BTBEntries,
params.BTBTagSize,
- params.instShiftAmt)
+ params.instShiftAmt),
+ RAS(params.RASSize)
{
}
+
+template <class Impl>
+void
+TwobitBPredUnit<Impl>::regStats()
+{
+ lookups
+ .name(name() + ".BPredUnit.lookups")
+ .desc("Number of BP lookups")
+ ;
+
+ condPredicted
+ .name(name() + ".BPredUnit.condPredicted")
+ .desc("Number of conditional branches predicted")
+ ;
+
+ condIncorrect
+ .name(name() + ".BPredUnit.condIncorrect")
+ .desc("Number of conditional branches incorrect")
+ ;
+
+ BTBLookups
+ .name(name() + ".BPredUnit.BTBLookups")
+ .desc("Number of BTB lookups")
+ ;
+
+ BTBHits
+ .name(name() + ".BPredUnit.BTBHits")
+ .desc("Number of BTB hits")
+ ;
+
+ BTBCorrect
+ .name(name() + ".BPredUnit.BTBCorrect")
+ .desc("Number of correct BTB predictions (this stat may not "
+ "work properly.")
+ ;
+
+ usedRAS
+ .name(name() + ".BPredUnit.usedRAS")
+ .desc("Number of times the RAS was used.")
+ ;
+
+ RASIncorrect
+ .name(name() + ".BPredUnit.RASInCorrect")
+ .desc("Number of incorrect RAS predictions.")
+ ;
+}
+
+template <class Impl>
+bool
+TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC)
+{
+ // See if branch predictor predicts taken.
+ // If so, get its target addr either from the BTB or the RAS.
+ // Once that's done, speculatively update the predictor?
+ // Save off record of branch stuff so the RAS can be fixed
+ // up once it's done.
+
+ bool pred_taken = false;
+ Addr target;
+
+ ++lookups;
+
+ if (inst->isUncondCtrl()) {
+ DPRINTF(Fetch, "BranchPred: Unconditional control.\n");
+ pred_taken = true;
+ } else {
+ ++condPredicted;
+
+ pred_taken = BPLookup(PC);
+
+ DPRINTF(Fetch, "BranchPred: Branch predictor predicted %i for PC %#x"
+ "\n", pred_taken, inst->readPC());
+ }
+
+ PredictorHistory predict_record(inst->seqNum, PC, pred_taken);
+
+ // Now lookup in the BTB or RAS.
+ if (pred_taken) {
+ if (inst->isReturn()) {
+ ++usedRAS;
+
+ // If it's a function return call, then look up the address
+ // in the RAS.
+ target = RAS.top();
+
+ // Record the top entry of the RAS, and its index.
+ predict_record.usedRAS = true;
+ predict_record.RASIndex = RAS.topIdx();
+ predict_record.RASTarget = target;
+
+ RAS.pop();
+
+ DPRINTF(Fetch, "BranchPred: Instruction %#x is a return, RAS "
+ "predicted target: %#x, RAS index: %i.\n",
+ inst->readPC(), target, predict_record.RASIndex);
+ } else {
+ ++BTBLookups;
+
+ if (inst->isCall()) {
+ RAS.push(PC+sizeof(MachInst));
+
+ // Record that it was a call so that the top RAS entry can
+ // be popped off if the speculation is incorrect.
+ predict_record.wasCall = true;
+
+ DPRINTF(Fetch, "BranchPred: Instruction %#x was a call, "
+ "adding %#x to the RAS.\n",
+ inst->readPC(), PC+sizeof(MachInst));
+ }
+
+ if (BTB.valid(PC)) {
+ ++BTBHits;
+
+ //If it's anything else, use the BTB to get the target addr.
+ target = BTB.lookup(PC);
+
+ DPRINTF(Fetch, "BranchPred: Instruction %#x predicted target "
+ "is %#x.\n", inst->readPC(), target);
+
+ } else {
+ DPRINTF(Fetch, "BranchPred: BTB doesn't have a valid entry."
+ "\n");
+ pred_taken = false;
+ }
+
+ }
+ }
+
+ if (pred_taken) {
+ // Set the PC and the instruction's predicted target.
+ PC = target;
+ inst->setPredTarg(target);
+ } else {
+ PC = PC + sizeof(MachInst);
+ inst->setPredTarg(PC);
+ }
+
+ predHist.push_front(predict_record);
+
+ assert(!predHist.empty());
+
+ return pred_taken;
+}
+
+template <class Impl>
+void
+TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn)
+{
+ DPRINTF(Fetch, "BranchPred: Commiting branches until sequence number "
+ "%i.\n", done_sn);
+
+ while (!predHist.empty() && predHist.back().seqNum <= done_sn) {
+ assert(!predHist.empty());
+
+ // Update the branch predictor with the correct results of branches.
+ BP.update(predHist.back().PC, predHist.back().predTaken);
+
+ predHist.pop_back();
+ }
+}
+
+template <class Impl>
+void
+TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn)
+{
+ while (!predHist.empty() && predHist.front().seqNum > squashed_sn) {
+ if (predHist.front().usedRAS) {
+ DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, "
+ "target: %#x.\n",
+ predHist.front().RASIndex,
+ predHist.front().RASTarget);
+
+ RAS.restore(predHist.front().RASIndex,
+ predHist.front().RASTarget);
+ } else if (predHist.front().wasCall) {
+ DPRINTF(Fetch, "BranchPred: Removing speculative entry added "
+ "to the RAS.\n");
+
+ RAS.pop();
+ }
+
+ predHist.pop_front();
+ }
+}
+
+template <class Impl>
+void
+TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
+ const Addr &corr_target,
+ const bool actually_taken)
+{
+ // Now that we know that a branch was mispredicted, we need to undo
+ // all the branches that have been seen up until this branch and
+ // fix up everything.
+
+ ++condIncorrect;
+
+ DPRINTF(Fetch, "BranchPred: Squashing from sequence number %i, "
+ "setting target to %#x.\n",
+ squashed_sn, corr_target);
+
+ while (!predHist.empty() && predHist.front().seqNum > squashed_sn) {
+
+ if (predHist.front().usedRAS) {
+ DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, "
+ "target: %#x.\n",
+ predHist.front().RASIndex,
+ predHist.front().RASTarget);
+
+ RAS.restore(predHist.front().RASIndex,
+ predHist.front().RASTarget);
+ } else if (predHist.front().wasCall) {
+ DPRINTF(Fetch, "BranchPred: Removing speculative entry added "
+ "to the RAS.\n");
+
+ RAS.pop();
+ }
+
+ predHist.pop_front();
+ }
+
+ predHist.front().predTaken = actually_taken;
+
+ if (predHist.front().usedRAS) {
+ ++RASIncorrect;
+ }
+
+ BP.update(predHist.front().PC, actually_taken);
+
+ BTB.update(predHist.front().PC, corr_target);
+}
diff --git a/cpu/beta_cpu/btb.cc b/cpu/beta_cpu/btb.cc
index b49f30482..bceaa66d1 100644
--- a/cpu/beta_cpu/btb.cc
+++ b/cpu/beta_cpu/btb.cc
@@ -50,6 +50,8 @@ DefaultBTB::valid(const Addr &inst_PC)
Addr inst_tag = getTag(inst_PC);
+ assert(btb_idx < numEntries);
+
if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) {
return true;
} else {
@@ -67,6 +69,8 @@ DefaultBTB::lookup(const Addr &inst_PC)
Addr inst_tag = getTag(inst_PC);
+ assert(btb_idx < numEntries);
+
if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) {
return btb[btb_idx].target;
} else {
@@ -79,6 +83,8 @@ DefaultBTB::update(const Addr &inst_PC, const Addr &target)
{
unsigned btb_idx = getIndex(inst_PC);
+ assert(btb_idx < numEntries);
+
btb[btb_idx].valid = true;
btb[btb_idx].target = target;
btb[btb_idx].tag = getTag(inst_PC);
diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh
index 849a6c797..e327a83b9 100644
--- a/cpu/beta_cpu/comm.hh
+++ b/cpu/beta_cpu/comm.hh
@@ -9,6 +9,7 @@
using namespace std;
// Find better place to put this typedef.
+// The impl might be the best place for this.
typedef short int PhysRegIndex;
template<class Impl>
@@ -45,6 +46,14 @@ struct SimpleIEWSimpleCommit {
int size;
DynInstPtr insts[Impl::MaxWidth + 1];
+
+ bool squash;
+ bool branchMispredict;
+ bool branchTaken;
+ uint64_t mispredPC;
+ uint64_t nextPC;
+ unsigned globalHist;
+ InstSeqNum squashedSeqNum;
};
template<class Impl>
@@ -63,10 +72,15 @@ struct TimeBufStruct {
bool predIncorrect;
uint64_t branchAddr;
+ InstSeqNum doneSeqNum;
+
+ // Might want to package this kind of branch stuff into a single
+ // struct as it is used pretty frequently.
bool branchMispredict;
bool branchTaken;
uint64_t mispredPC;
uint64_t nextPC;
+ unsigned globalHist;
};
decodeComm decodeInfo;
@@ -84,17 +98,10 @@ struct TimeBufStruct {
renameComm renameInfo;
struct iewComm {
- bool squash;
bool stall;
// Also eventually include skid buffer space.
unsigned freeIQEntries;
-
- bool branchMispredict;
- bool branchTaken;
- uint64_t mispredPC;
- uint64_t nextPC;
- InstSeqNum squashedSeqNum;
};
iewComm iewInfo;
@@ -108,6 +115,7 @@ struct TimeBufStruct {
bool branchTaken;
uint64_t mispredPC;
uint64_t nextPC;
+ unsigned globalHist;
// Think of better names here.
// Will need to be a variety of sizes...
diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh
index 981d9e78f..f1a185143 100644
--- a/cpu/beta_cpu/commit.hh
+++ b/cpu/beta_cpu/commit.hh
@@ -59,6 +59,8 @@ class SimpleCommit
public:
SimpleCommit(Params &params);
+ void regStats();
+
void setCPU(FullCPU *cpu_ptr);
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
@@ -142,6 +144,17 @@ class SimpleCommit
/** Commit width, in instructions. */
unsigned commitWidth;
+
+ Stats::Scalar<> commitCommittedInsts;
+ Stats::Scalar<> commitSquashedInsts;
+ Stats::Scalar<> commitSquashEvents;
+ Stats::Scalar<> commitNonSpecStalls;
+ Stats::Scalar<> commitCommittedBranches;
+ Stats::Scalar<> commitCommittedLoads;
+ Stats::Scalar<> commitCommittedMemRefs;
+ Stats::Scalar<> branchMispredicts;
+
+ Stats::Distribution<> n_committed_dist;
};
#endif // __SIMPLE_COMMIT_HH__
diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh
index 45b8bc7de..9a69c9259 100644
--- a/cpu/beta_cpu/commit_impl.hh
+++ b/cpu/beta_cpu/commit_impl.hh
@@ -23,6 +23,51 @@ SimpleCommit<Impl>::SimpleCommit(Params &params)
template <class Impl>
void
+SimpleCommit<Impl>::regStats()
+{
+ commitCommittedInsts
+ .name(name() + ".commitCommittedInsts")
+ .desc("The number of committed instructions")
+ .prereq(commitCommittedInsts);
+ commitSquashedInsts
+ .name(name() + ".commitSquashedInsts")
+ .desc("The number of squashed insts skipped by commit")
+ .prereq(commitSquashedInsts);
+ commitSquashEvents
+ .name(name() + ".commitSquashEvents")
+ .desc("The number of times commit is told to squash")
+ .prereq(commitSquashEvents);
+ commitNonSpecStalls
+ .name(name() + ".commitNonSpecStalls")
+ .desc("The number of times commit has been forced to stall to "
+ "communicate backwards")
+ .prereq(commitNonSpecStalls);
+ commitCommittedBranches
+ .name(name() + ".commitCommittedBranches")
+ .desc("The number of committed branches")
+ .prereq(commitCommittedBranches);
+ commitCommittedLoads
+ .name(name() + ".commitCommittedLoads")
+ .desc("The number of committed loads")
+ .prereq(commitCommittedLoads);
+ commitCommittedMemRefs
+ .name(name() + ".commitCommittedMemRefs")
+ .desc("The number of committed memory references")
+ .prereq(commitCommittedMemRefs);
+ branchMispredicts
+ .name(name() + ".branchMispredicts")
+ .desc("The number of times a branch was mispredicted")
+ .prereq(branchMispredicts);
+ n_committed_dist
+ .init(0,commitWidth,1)
+ .name(name() + ".COM:committed_per_cycle")
+ .desc("Number of insts commited each cycle")
+ .flags(Stats::pdf)
+ ;
+}
+
+template <class Impl>
+void
SimpleCommit<Impl>::setCPU(FullCPU *cpu_ptr)
{
DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
@@ -143,12 +188,12 @@ SimpleCommit<Impl>::commit()
// Should I also check if the commit stage is telling the ROB to squah?
// This might be necessary to keep the same timing between the IQ and
// the ROB...
- if (robInfoFromIEW->iewInfo.squash) {
+ if (fromIEW->squash) {
DPRINTF(Commit, "Commit: Squashing instructions in the ROB.\n");
_status = ROBSquashing;
- InstSeqNum squashed_inst = robInfoFromIEW->iewInfo.squashedSeqNum;
+ InstSeqNum squashed_inst = fromIEW->squashedSeqNum;
rob->squash(squashed_inst);
@@ -162,15 +207,19 @@ SimpleCommit<Impl>::commit()
// ROB is in the process of squashing.
toIEW->commitInfo.robSquashing = true;
- toIEW->commitInfo.branchMispredict =
- robInfoFromIEW->iewInfo.branchMispredict;
+ toIEW->commitInfo.branchMispredict = fromIEW->branchMispredict;
+
+ toIEW->commitInfo.branchTaken = fromIEW->branchTaken;
+
+ toIEW->commitInfo.nextPC = fromIEW->nextPC;
- toIEW->commitInfo.branchTaken =
- robInfoFromIEW->iewInfo.branchTaken;
+ toIEW->commitInfo.mispredPC = fromIEW->mispredPC;
- toIEW->commitInfo.nextPC = robInfoFromIEW->iewInfo.nextPC;
+ toIEW->commitInfo.globalHist = fromIEW->globalHist;
- toIEW->commitInfo.mispredPC = robInfoFromIEW->iewInfo.mispredPC;
+ if (toIEW->commitInfo.branchMispredict) {
+ ++branchMispredicts;
+ }
}
if (_status != ROBSquashing) {
@@ -237,6 +286,8 @@ SimpleCommit<Impl>::commitInsts()
// inst in the ROB without affecting any other stages.
rob->retireHead();
+ ++commitSquashedInsts;
+
} else {
// Increment the total number of non-speculative instructions
// executed.
@@ -249,7 +300,7 @@ SimpleCommit<Impl>::commitInsts()
bool commit_success = commitHead(head_inst, num_committed);
// Update what instruction we are looking at if the commit worked.
- if(commit_success) {
+ if (commit_success) {
++num_committed;
// Send back which instruction has been committed.
@@ -258,7 +309,11 @@ SimpleCommit<Impl>::commitInsts()
// sequence number instead (copy).
toIEW->commitInfo.doneSeqNum = head_inst->seqNum;
- cpu->instDone();
+ ++commitCommittedInsts;
+
+ if (!head_inst->isNop()) {
+ cpu->instDone();
+ }
} else {
break;
}
@@ -267,6 +322,8 @@ SimpleCommit<Impl>::commitInsts()
// Update the pointer to read the next instruction in the ROB.
head_inst = rob->readHeadInst();
}
+
+ n_committed_dist.sample(num_committed);
}
template <class Impl>
@@ -276,18 +333,13 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Make sure instruction is valid
assert(head_inst);
- Fault fault = No_Fault;
-
- // If the head instruction is a store or a load, then execute it
- // because this simple model does no speculative memory access.
- // Hopefully this covers all memory references.
- // Also check if it's nonspeculative. Or a nop. Then it will be
- // executed only when it reaches the head of the ROB. Actually
- // executing a nop is a bit overkill...
+ // If the instruction is not executed yet, then it is a non-speculative
+ // or store inst. Signal backwards that it should be executed.
if (!head_inst->isExecuted()) {
// Keep this number correct. We have not yet actually executed
// and committed this instruction.
cpu->funcExeInst--;
+
if (head_inst->isStore() || head_inst->isNonSpeculative()) {
DPRINTF(Commit, "Commit: Encountered a store or non-speculative "
"instruction at the head of the ROB, PC %#x.\n",
@@ -299,6 +351,8 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// it is executed.
head_inst->clearCanCommit();
+ ++commitNonSpecStalls;
+
return false;
} else {
panic("Commit: Trying to commit un-executed instruction "
@@ -306,19 +360,6 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
}
}
- // Check if memory access was successful.
- if (fault != No_Fault) {
- // Handle data cache miss here. In the future, set the status
- // to data cache miss, then exit the stage. Have an event
- // that handles commiting the head instruction, then setting
- // the stage back to running, when the event is run. (just
- // make sure that event is commit's run for that cycle)
- panic("Commit: Load/store instruction failed, not sure what "
- "to do.\n");
- // Also will want to clear the instruction's fault after being
- // handled here so it's not handled again below.
- }
-
// Now check if it's one of the special trap or barrier or
// serializing instructions.
if (head_inst->isThreadSync() ||
@@ -335,39 +376,43 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Check if the instruction caused a fault. If so, trap.
if (head_inst->getFault() != No_Fault) {
+ if (!head_inst->isNop()) {
#ifdef FULL_SYSTEM
- cpu->trap(fault);
+ cpu->trap(fault);
#else // !FULL_SYSTEM
- if (!head_inst->isNop()) {
panic("fault (%d) detected @ PC %08p", head_inst->getFault(),
head_inst->PC);
- }
#endif // FULL_SYSTEM
+ }
}
// Check if we're really ready to commit. If not then return false.
// I'm pretty sure all instructions should be able to commit if they've
// reached this far. For now leave this in as a check.
if(!rob->isHeadReady()) {
- DPRINTF(Commit, "Commit: Unable to commit head instruction!\n");
+ panic("Commit: Unable to commit head instruction!\n");
return false;
}
// If it's a branch, then send back branch prediction update info
// to the fetch stage.
// This should be handled in the iew stage if a mispredict happens...
-#if 0
+
if (head_inst->isControl()) {
+#if 0
toIEW->nextPC = head_inst->readPC();
//Maybe switch over to BTB incorrect.
toIEW->btbMissed = head_inst->btbMiss();
toIEW->target = head_inst->nextPC;
//Maybe also include global history information.
//This simple version will have no branch prediction however.
- }
#endif
+ ++commitCommittedBranches;
+ }
+
+
#if 0
// Check if the instruction has a destination register.
// If so add the previous physical register of its logical register's
@@ -383,8 +428,12 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// the LDSTQ will already have been told that a store has reached the head
// of the ROB. Consider including communication if it's a store as well
// to keep things orthagonal.
- if (head_inst->isLoad()) {
- toIEW->commitInfo.commitIsLoad = true;
+ if (head_inst->isMemRef()) {
+ ++commitCommittedMemRefs;
+ if (head_inst->isLoad()) {
+ toIEW->commitInfo.commitIsLoad = true;
+ ++commitCommittedLoads;
+ }
}
// Now that the instruction is going to be committed, finalize its
diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh
index ec8460b77..1479eb191 100644
--- a/cpu/beta_cpu/cpu_policy.hh
+++ b/cpu/beta_cpu/cpu_policy.hh
@@ -22,7 +22,7 @@
template<class Impl>
struct SimpleCPUPolicy
{
- typedef DefaultBPredUnit<Impl> BPredUnit;
+ typedef TwobitBPredUnit<Impl> BPredUnit;
typedef PhysRegFile<Impl> RegFile;
typedef SimpleFreeList FreeList;
typedef SimpleRenameMap RenameMap;
diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh
index be88a4b36..64e87290e 100644
--- a/cpu/beta_cpu/decode.hh
+++ b/cpu/beta_cpu/decode.hh
@@ -49,6 +49,8 @@ class SimpleDecode
public:
SimpleDecode(Params &params);
+ void regStats();
+
void setCPU(FullCPU *cpu_ptr);
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
@@ -128,6 +130,15 @@ class SimpleDecode
* group of instructions, it can restart at the proper instruction.
*/
unsigned numInst;
+
+ Stats::Scalar<> decodeIdleCycles;
+ Stats::Scalar<> decodeBlockedCycles;
+ Stats::Scalar<> decodeUnblockCycles;
+ Stats::Scalar<> decodeSquashCycles;
+ Stats::Scalar<> decodeBranchMispred;
+ Stats::Scalar<> decodeControlMispred;
+ Stats::Scalar<> decodeDecodedInsts;
+ Stats::Scalar<> decodeSquashedInsts;
};
#endif // __SIMPLE_DECODE_HH__
diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh
index d0f46eaa5..8b20bf8bc 100644
--- a/cpu/beta_cpu/decode_impl.hh
+++ b/cpu/beta_cpu/decode_impl.hh
@@ -16,6 +16,45 @@ SimpleDecode<Impl>::SimpleDecode(Params &params)
_status = Idle;
}
+template <class Impl>
+void
+SimpleDecode<Impl>::regStats()
+{
+ decodeIdleCycles
+ .name(name() + ".decodeIdleCycles")
+ .desc("Number of cycles decode is idle")
+ .prereq(decodeIdleCycles);
+ decodeBlockedCycles
+ .name(name() + ".decodeBlockedCycles")
+ .desc("Number of cycles decode is blocked")
+ .prereq(decodeBlockedCycles);
+ decodeUnblockCycles
+ .name(name() + ".decodeUnblockCycles")
+ .desc("Number of cycles decode is unblocking")
+ .prereq(decodeUnblockCycles);
+ decodeSquashCycles
+ .name(name() + ".decodeSquashCycles")
+ .desc("Number of cycles decode is squashing")
+ .prereq(decodeSquashCycles);
+ decodeBranchMispred
+ .name(name() + ".decodeBranchMispred")
+ .desc("Number of times decode detected a branch misprediction")
+ .prereq(decodeBranchMispred);
+ decodeControlMispred
+ .name(name() + ".decodeControlMispred")
+ .desc("Number of times decode detected an instruction incorrectly"
+ " predicted as a control")
+ .prereq(decodeControlMispred);
+ decodeDecodedInsts
+ .name(name() + ".decodeDecodedInsts")
+ .desc("Number of instructions handled by decode")
+ .prereq(decodeDecodedInsts);
+ decodeSquashedInsts
+ .name(name() + ".decodeSquashedInsts")
+ .desc("Number of squashed instructions handled by decode")
+ .prereq(decodeSquashedInsts);
+}
+
template<class Impl>
void
SimpleDecode<Impl>::setCPU(FullCPU *cpu_ptr)
@@ -91,7 +130,7 @@ SimpleDecode<Impl>::unblock()
// If there's still information in the skid buffer, then
// continue to tell previous stages to stall. They will be
- // able to restart once the skid buffer is empty.
+ // able to restart once the skid buffer is empty.
if (!skidBuffer.empty()) {
toFetch->decodeInfo.stall = true;
} else {
@@ -110,9 +149,12 @@ SimpleDecode<Impl>::squash(DynInstPtr &inst)
"detected at decode.\n");
Addr new_PC = inst->nextPC;
+ toFetch->decodeInfo.branchMispredict = true;
+ toFetch->decodeInfo.doneSeqNum = inst->seqNum;
toFetch->decodeInfo.predIncorrect = true;
toFetch->decodeInfo.squash = true;
toFetch->decodeInfo.nextPC = new_PC;
+ toFetch->decodeInfo.branchTaken = true;
// Set status to squashing.
_status = Squashing;
@@ -164,6 +206,8 @@ SimpleDecode<Impl>::tick()
// buffer were used. Remove those instructions and handle
// the rest of unblocking.
if (_status == Unblocking) {
+ ++decodeUnblockCycles;
+
if (fromFetch->size > 0) {
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
@@ -173,6 +217,8 @@ SimpleDecode<Impl>::tick()
unblock();
}
} else if (_status == Blocked) {
+ ++decodeBlockedCycles;
+
if (fromFetch->size > 0) {
block();
}
@@ -197,6 +243,8 @@ SimpleDecode<Impl>::tick()
squash();
}
} else if (_status == Squashing) {
+ ++decodeSquashCycles;
+
if (!fromCommit->commitInfo.squash &&
!fromCommit->commitInfo.robSquashing) {
_status = Running;
@@ -228,17 +276,16 @@ SimpleDecode<Impl>::decode()
// Check fetch queue to see if instructions are available.
// If no available instructions, do nothing, unless this stage is
// currently unblocking.
- if (!fromFetch->insts[0] && _status != Unblocking) {
+ if (fromFetch->size == 0 && _status != Unblocking) {
DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n");
// Should I change the status to idle?
+ ++decodeIdleCycles;
return;
}
+ // Might be better to use a base DynInst * instead?
DynInstPtr inst;
- // Instead have a class member variable that records which instruction
- // was the last one that was ended on. At the tick() stage, it can
- // check if that's equal to 0. If not, then don't pop stuff off.
unsigned to_rename_index = 0;
int insts_available = _status == Unblocking ?
@@ -264,18 +311,10 @@ SimpleDecode<Impl>::decode()
}
#endif
- // Check to make sure that instructions coming from fetch are valid.
- // Normally at this stage the branch target of PC-relative branches
- // should be computed here. However in this simple model all
- // computation will take place at execute. Hence doneTargCalc()
- // will always be false.
while (insts_available > 0)
{
DPRINTF(Decode, "Decode: Sending instruction to rename.\n");
- // Might create some sort of accessor to get an instruction
- // on a per thread basis. Or might be faster to just get
- // a pointer to an array or list of instructions and use that
- // within this code.
+
inst = _status == Unblocking ? skidBuffer.front().insts[numInst] :
fromFetch->insts[numInst];
@@ -287,6 +326,8 @@ SimpleDecode<Impl>::decode()
"squashed, skipping.\n",
inst->seqNum, inst->readPC());
+ ++decodeSquashedInsts;
+
++numInst;
--insts_available;
@@ -305,16 +346,22 @@ SimpleDecode<Impl>::decode()
if (inst->predTaken() && !inst->isControl()) {
panic("Instruction predicted as a branch!");
+ ++decodeControlMispred;
// Might want to set some sort of boolean and just do
// a check at the end
squash(inst);
break;
}
- // Ensure that the predicted branch target is the actual branch
- // target if possible (branches that are PC relative).
- if (inst->isControl() && inst->doneTargCalc()) {
+ // Go ahead and compute any PC-relative branches.
+
+ if (inst->isDirectCtrl() && inst->isUncondCtrl() &&
+ inst->numDestRegs() == 0 && inst->numSrcRegs() == 0) {
+ inst->execute();
+ inst->setExecuted();
+
if (inst->mispredicted()) {
+ ++decodeBranchMispred;
// Might want to set some sort of boolean and just do
// a check at the end
squash(inst);
@@ -322,6 +369,11 @@ SimpleDecode<Impl>::decode()
}
}
+ // Normally can check if a direct branch has the right target
+ // addr (either the immediate, or the branch PC + 4) and redirect
+ // fetch if it's incorrect.
+
+
// Also check if instructions have no source registers. Mark
// them as ready to issue at any time. Not sure if this check
// should exist here or at a later stage; however it doesn't matter
@@ -334,6 +386,7 @@ SimpleDecode<Impl>::decode()
// Increment which instruction we're looking at.
++numInst;
++to_rename_index;
+ ++decodeDecodedInsts;
--insts_available;
}
diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh
index e59a9df7f..4cfc2f167 100644
--- a/cpu/beta_cpu/fetch.hh
+++ b/cpu/beta_cpu/fetch.hh
@@ -14,6 +14,7 @@
#include "sim/eventq.hh"
#include "cpu/pc_event.hh"
#include "mem/mem_interface.hh"
+#include "base/statistics.hh"
/**
* SimpleFetch class to fetch a single instruction each cycle. SimpleFetch
@@ -59,6 +60,8 @@ class SimpleFetch
/** SimpleFetch constructor. */
SimpleFetch(Params &params);
+ void regStats();
+
void setCPU(FullCPU *cpu_ptr);
void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer);
@@ -73,9 +76,13 @@ class SimpleFetch
// private:
// Figure out PC vs next PC and how it should be updated
- void squash(Addr newPC);
+ void squash(const Addr &new_PC);
private:
+ inline void doSquash(const Addr &new_PC);
+
+ void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num);
+
/**
* Looks up in the branch predictor to see if the next PC should be
* either next PC+=MachInst or a branch target.
@@ -84,7 +91,27 @@ class SimpleFetch
* the next PC will be.
* @return Whether or not a branch was predicted as taken.
*/
- bool lookupAndUpdateNextPC(Addr &next_PC);
+ bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC);
+
+ // Might not want this function...
+// inline void recordGlobalHist(DynInstPtr &inst);
+
+ /**
+ * Fetches the cache line that contains fetch_PC. Returns any
+ * fault that happened. Puts the data into the class variable
+ * cacheData.
+ * @params fetch_PC The PC address that is being fetched from.
+ * @return Any fault that occured.
+ */
+ Fault fetchCacheLine(Addr fetch_PC);
+
+ // Align an address (typically a PC) to the start of an I-cache block.
+ // We fold in the PISA 64- to 32-bit conversion here as well.
+ Addr icacheBlockAlignPC(Addr addr)
+ {
+ addr = ISA::realPCToFetchPC(addr);
+ return (addr & ~(cacheBlkMask));
+ }
public:
class CacheCompletionEvent : public Event
@@ -99,7 +126,7 @@ class SimpleFetch
virtual const char *description();
};
- CacheCompletionEvent cacheCompletionEvent;
+// CacheCompletionEvent cacheCompletionEvent;
private:
/** Pointer to the FullCPU. */
@@ -152,20 +179,32 @@ class SimpleFetch
unsigned fetchWidth;
/** Cache block size. */
- int blkSize;
+ int cacheBlkSize;
/** Mask to get a cache block's address. */
- Addr cacheBlockMask;
+ Addr cacheBlkMask;
/** The instruction being fetched. */
- MachInst inst;
+// MachInst inst;
+
+ /** The cache line being fetched. */
+ uint8_t *cacheData;
/** Size of instructions. */
int instSize;
/** Icache stall statistics. */
-// Stats::Scalar<> icacheStallCycles;
-// Counter lastIcacheStall;
+ Counter lastIcacheStall;
+
+ Stats::Scalar<> icacheStallCycles;
+ Stats::Scalar<> fetchedInsts;
+ Stats::Scalar<> predictedBranches;
+ Stats::Scalar<> fetchCycles;
+ Stats::Scalar<> fetchSquashCycles;
+ Stats::Scalar<> fetchBlockedCycles;
+ Stats::Scalar<> fetchedCacheLines;
+
+ Stats::Distribution<> fetch_nisn_dist;
};
#endif //__SIMPLE_FETCH_HH__
diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh
index 93f7bf6d2..8c9cf9f41 100644
--- a/cpu/beta_cpu/fetch_impl.hh
+++ b/cpu/beta_cpu/fetch_impl.hh
@@ -1,10 +1,8 @@
-// Todo: Add in branch prediction. With probe path, should
-// be able to specify
-// size of data to fetch. Will be able to get full cache line.
-
-// Remove this later.
+// Remove this later; used only for debugging.
#define OPCODE(X) (X >> 26) & 0x3f
+
+#include "arch/alpha/byte_swap.hh"
#include "cpu/exetrace.hh"
#include "mem/base_mem.hh"
#include "mem/mem_interface.hh"
@@ -37,15 +35,14 @@ SimpleFetch<Impl>::CacheCompletionEvent::description()
template<class Impl>
SimpleFetch<Impl>::SimpleFetch(Params &params)
- : cacheCompletionEvent(this),
+ : //cacheCompletionEvent(this),
icacheInterface(params.icacheInterface),
branchPred(params),
decodeToFetchDelay(params.decodeToFetchDelay),
renameToFetchDelay(params.renameToFetchDelay),
iewToFetchDelay(params.iewToFetchDelay),
commitToFetchDelay(params.commitToFetchDelay),
- fetchWidth(params.fetchWidth),
- inst(0)
+ fetchWidth(params.fetchWidth)
{
// Set status to idle.
_status = Idle;
@@ -62,13 +59,63 @@ SimpleFetch<Impl>::SimpleFetch(Params &params)
memReq->data = new uint8_t[64];
// Size of cache block.
- blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+ cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
// Create mask to get rid of offset bits.
- cacheBlockMask = (blkSize - 1);
+ cacheBlkMask = (cacheBlkSize - 1);
// Get the size of an instruction.
instSize = sizeof(MachInst);
+
+ // Create space to store a cache line.
+ cacheData = new uint8_t[cacheBlkSize];
+}
+
+template <class Impl>
+void
+SimpleFetch<Impl>::regStats()
+{
+ icacheStallCycles
+ .name(name() + ".icacheStallCycles")
+ .desc("Number of cycles fetch is stalled on an Icache miss")
+ .prereq(icacheStallCycles);
+
+ fetchedInsts
+ .name(name() + ".fetchedInsts")
+ .desc("Number of instructions fetch has processed")
+ .prereq(fetchedInsts);
+ predictedBranches
+ .name(name() + ".predictedBranches")
+ .desc("Number of branches that fetch has predicted taken")
+ .prereq(predictedBranches);
+ fetchCycles
+ .name(name() + ".fetchCycles")
+ .desc("Number of cycles fetch has run and was not squashing or"
+ " blocked")
+ .prereq(fetchCycles);
+ fetchSquashCycles
+ .name(name() + ".fetchSquashCycles")
+ .desc("Number of cycles fetch has spent squashing")
+ .prereq(fetchSquashCycles);
+ fetchBlockedCycles
+ .name(name() + ".fetchBlockedCycles")
+ .desc("Number of cycles fetch has spent blocked")
+ .prereq(fetchBlockedCycles);
+ fetchedCacheLines
+ .name(name() + ".fetchedCacheLines")
+ .desc("Number of cache lines fetched")
+ .prereq(fetchedCacheLines);
+
+ fetch_nisn_dist
+ .init(/* base value */ 0,
+ /* last value */ fetchWidth,
+ /* bucket size */ 1)
+ .name(name() + ".FETCH:rate_dist")
+ .desc("Number of instructions fetched each cycle (Total)")
+ .flags(Stats::pdf)
+ ;
+
+ branchPred.regStats();
}
template<class Impl>
@@ -122,19 +169,40 @@ SimpleFetch<Impl>::processCacheCompletion()
_status = IcacheMissComplete;
}
-template<class Impl>
+#if 0
+template <class Impl>
+inline void
+SimpleFetch<Impl>::recordGlobalHist(DynInstPtr &inst)
+{
+ inst->setGlobalHist(branchPred.BPReadGlobalHist());
+}
+#endif
+
+template <class Impl>
bool
-SimpleFetch<Impl>::lookupAndUpdateNextPC(Addr &next_PC)
+SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
{
-#if 1
// Do branch prediction check here.
- bool predict_taken = branchPred.BPLookup(next_PC);
- Addr predict_target;
+ // A bit of a misnomer...next_PC is actually the current PC until
+ // this function updates it.
+ bool predict_taken;
+
+ if (!inst->isControl()) {
+ next_PC = next_PC + instSize;
+ inst->setPredTarg(next_PC);
+ return false;
+ }
+
+ predict_taken = branchPred.predict(inst, next_PC);
+
+#if 0
+ predict_taken = branchPred.BPLookup(next_PC)
DPRINTF(Fetch, "Fetch: Branch predictor predicts taken? %i\n",
predict_taken);
- if (branchPred.BTBValid(next_PC)) {
+ // Only check the BTB if the BP has predicted taken.
+ if (predict_taken && branchPred.BTBValid(next_PC)) {
predict_target = branchPred.BTBLookup(next_PC);
DPRINTF(Fetch, "Fetch: BTB target is %#x.\n", predict_target);
} else {
@@ -142,42 +210,135 @@ SimpleFetch<Impl>::lookupAndUpdateNextPC(Addr &next_PC)
DPRINTF(Fetch, "Fetch: BTB does not have a valid entry.\n");
}
- // Now update the PC to fetch the next instruction in the cache
- // line.
- if (!predict_taken) {
- next_PC = next_PC + instSize;
- return false;
- } else {
- next_PC = predict_target;
- return true;
- }
#endif
+ if (predict_taken) {
+ ++predictedBranches;
+ }
-#if 0
- next_PC = next_PC + instSize;
- return false;
-#endif
+ return predict_taken;
}
-template<class Impl>
-void
-SimpleFetch<Impl>::squash(Addr new_PC)
+template <class Impl>
+Fault
+SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC)
+{
+ // Check if the instruction exists within the cache.
+ // If it does, then proceed on to read the instruction and the rest
+ // of the instructions in the cache line until either the end of the
+ // cache line or a predicted taken branch is encountered.
+
+#ifdef FULL_SYSTEM
+ // Flag to say whether or not address is physical addr.
+ unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
+#else
+ unsigned flags = 0;
+#endif // FULL_SYSTEM
+
+ Fault fault = No_Fault;
+
+ // Align the fetch PC so it's at the start of a cache block.
+ fetch_PC = icacheBlockAlignPC(fetch_PC);
+
+ // Setup the memReq to do a read of the first isntruction's address.
+ // Set the appropriate read size and flags as well.
+ memReq->cmd = Read;
+ memReq->reset(fetch_PC, cacheBlkSize, flags);
+
+ // Translate the instruction request.
+ // Should this function be
+ // in the CPU class ? Probably...ITB/DTB should exist within the
+ // CPU.
+
+ fault = cpu->translateInstReq(memReq);
+
+ // In the case of faults, the fetch stage may need to stall and wait
+ // on what caused the fetch (ITB or Icache miss).
+
+ // If translation was successful, attempt to read the first
+ // instruction.
+ if (fault == No_Fault) {
+ DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
+ fault = cpu->mem->read(memReq, cacheData);
+ // This read may change when the mem interface changes.
+
+ fetchedCacheLines++;
+ }
+
+ // Now do the timing access to see whether or not the instruction
+ // exists within the cache.
+ if (icacheInterface && fault == No_Fault) {
+ DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
+ memReq->completionEvent = NULL;
+
+ memReq->time = curTick;
+
+ MemAccessResult result = icacheInterface->access(memReq);
+
+ // If the cache missed (in this model functional and timing
+ // memories are different), then schedule an event to wake
+ // up this stage once the cache miss completes.
+ if (result != MA_HIT && icacheInterface->doEvents()) {
+ memReq->completionEvent = new CacheCompletionEvent(this);
+// lastIcacheStall = curTick;
+
+ // How does current model work as far as individual
+ // stages scheduling/unscheduling?
+ // Perhaps have only the main CPU scheduled/unscheduled,
+ // and have it choose what stages to run appropriately.
+
+ DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
+ _status = IcacheMissStall;
+ }
+ }
+
+ return fault;
+}
+
+template <class Impl>
+inline void
+SimpleFetch<Impl>::doSquash(const Addr &new_PC)
{
DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
cpu->setNextPC(new_PC + instSize);
cpu->setPC(new_PC);
- _status = Squashing;
-
// Clear the icache miss if it's outstanding.
if (_status == IcacheMissStall && icacheInterface) {
+ DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n");
// @todo: Use an actual thread number here.
icacheInterface->squash(0);
}
- // Tell the CPU to remove any instructions that aren't currently
- // in the ROB (instructions in flight that were killed).
+ _status = Squashing;
+
+ ++fetchSquashCycles;
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
+ const InstSeqNum &seq_num)
+{
+ DPRINTF(Fetch, "Fetch: Squashing from decode.\n");
+
+ doSquash(new_PC);
+
+ // Tell the CPU to remove any instructions that are in flight between
+ // fetch and decode.
+ cpu->removeInstsUntil(seq_num);
+
+}
+
+template <class Impl>
+void
+SimpleFetch<Impl>::squash(const Addr &new_PC)
+{
+ DPRINTF(Fetch, "Fetch: Squash from commit.\n");
+
+ doSquash(new_PC);
+
+ // Tell the CPU to remove any instructions that are not in the ROB.
cpu->removeInstsNotInROB();
}
@@ -185,7 +346,6 @@ template<class Impl>
void
SimpleFetch<Impl>::tick()
{
-#if 1
// Check squash signals from commit.
if (fromCommit->commitInfo.squash) {
DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
@@ -196,13 +356,18 @@ SimpleFetch<Impl>::tick()
// Also check if there's a mispredict that happened.
if (fromCommit->commitInfo.branchMispredict) {
- branchPred.BPUpdate(fromCommit->commitInfo.mispredPC,
- fromCommit->commitInfo.branchTaken);
- branchPred.BTBUpdate(fromCommit->commitInfo.mispredPC,
- fromCommit->commitInfo.nextPC);
+ branchPred.squash(fromCommit->commitInfo.doneSeqNum,
+ fromCommit->commitInfo.nextPC,
+ fromCommit->commitInfo.branchTaken);
+ } else {
+ branchPred.squash(fromCommit->commitInfo.doneSeqNum);
}
return;
+ } else if (fromCommit->commitInfo.doneSeqNum) {
+ // Update the branch predictor if it wasn't a squashed instruction
+ // that was braodcasted.
+ branchPred.update(fromCommit->commitInfo.doneSeqNum);
}
// Check ROB squash signals from commit.
@@ -211,6 +376,8 @@ SimpleFetch<Impl>::tick()
// Continue to squash.
_status = Squashing;
+
+ ++fetchSquashCycles;
return;
}
@@ -220,22 +387,22 @@ SimpleFetch<Impl>::tick()
"from decode.\n");
// Update the branch predictor.
- if (fromCommit->decodeInfo.branchMispredict) {
- branchPred.BPUpdate(fromDecode->decodeInfo.mispredPC,
- fromDecode->decodeInfo.branchTaken);
- branchPred.BTBUpdate(fromDecode->decodeInfo.mispredPC,
- fromDecode->decodeInfo.nextPC);
+ if (fromDecode->decodeInfo.branchMispredict) {
+ branchPred.squash(fromDecode->decodeInfo.doneSeqNum,
+ fromDecode->decodeInfo.nextPC,
+ fromDecode->decodeInfo.branchTaken);
+ } else {
+ branchPred.squash(fromDecode->decodeInfo.doneSeqNum);
}
if (_status != Squashing) {
// Squash unless we're already squashing?
- squash(fromDecode->decodeInfo.nextPC);
+ squashFromDecode(fromDecode->decodeInfo.nextPC,
+ fromDecode->decodeInfo.doneSeqNum);
return;
}
}
-
-
// Check if any of the stall signals are high.
if (fromDecode->decodeInfo.stall ||
fromRename->renameInfo.stall ||
@@ -253,12 +420,15 @@ SimpleFetch<Impl>::tick()
fromCommit->commitInfo.stall);
_status = Blocked;
+
+ ++fetchBlockedCycles;
return;
} else if (_status == Blocked) {
// Unblock stage if status is currently blocked and none of the
// stall signals are being held high.
_status = Running;
+ ++fetchBlockedCycles;
return;
}
@@ -273,74 +443,15 @@ SimpleFetch<Impl>::tick()
// Switch status to running
_status = Running;
+
+ ++fetchSquashCycles;
} else if (_status != IcacheMissStall) {
DPRINTF(Fetch, "Fetch: Running stage.\n");
- fetch();
- }
-#endif
-
-#if 0
- if (_status != Blocked &&
- _status != Squashing &&
- _status != IcacheMissStall) {
- DPRINTF(Fetch, "Fetch: Running stage.\n");
+ ++fetchCycles;
fetch();
- } else if (_status == Blocked) {
- // If still being told to stall, do nothing.
- if (fromDecode->decodeInfo.stall ||
- fromRename->renameInfo.stall ||
- fromIEW->iewInfo.stall ||
- fromCommit->commitInfo.stall)
- {
- DPRINTF(Fetch, "Fetch: Stalling stage.\n");
- DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
- "Commit: %i\n",
- fromDecode->decodeInfo.stall,
- fromRename->renameInfo.stall,
- fromIEW->iewInfo.stall,
- fromCommit->commitInfo.stall);
- } else {
-
- DPRINTF(Fetch, "Fetch: Done blocking.\n");
- _status = Running;
- }
-
- if (fromCommit->commitInfo.squash) {
- DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
- "from commit.\n");
- squash(fromCommit->commitInfo.nextPC);
- return;
- } else if (fromDecode->decodeInfo.squash) {
- DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
- "from decode.\n");
- squash(fromDecode->decodeInfo.nextPC);
- return;
- } else if (fromCommit->commitInfo.robSquashing) {
- DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
- _status = Squashing;
- return;
- }
- } else if (_status == Squashing) {
- // If there are no squash signals then change back to running.
- // Note that when a squash starts happening, commitInfo.squash will
- // be high. But if the squash is still in progress, then only
- // commitInfo.robSquashing will be high.
- if (!fromCommit->commitInfo.squash &&
- !fromCommit->commitInfo.robSquashing) {
-
- DPRINTF(Fetch, "Fetch: Done squashing.\n");
- _status = Running;
- } else if (fromCommit->commitInfo.squash) {
- // If there's a new squash, then start squashing again.
- squash(fromCommit->commitInfo.nextPC);
- } else {
- // Purely a debugging statement.
- DPRINTF(Fetch, "Fetch: ROB still squashing.\n");
- }
}
-#endif
}
template<class Impl>
@@ -351,13 +462,6 @@ SimpleFetch<Impl>::fetch()
// Start actual fetch
//////////////////////////////////////////
-#ifdef FULL_SYSTEM
- // Flag to say whether or not address is physical addr.
- unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
-#else
- unsigned flags = 0;
-#endif // FULL_SYSTEM
-
// The current PC.
Addr fetch_PC = cpu->readPC();
@@ -379,64 +483,14 @@ SimpleFetch<Impl>::fetch()
"instruction, starting at PC %08p.\n",
fetch_PC);
- // Otherwise check if the instruction exists within the cache.
- // If it does, then proceed on to read the instruction and the rest
- // of the instructions in the cache line until either the end of the
- // cache line or a predicted taken branch is encountered.
- // Note that this simply checks if the first instruction exists
- // within the cache, assuming the rest of the cache line also exists
- // within the cache.
-
- // Setup the memReq to do a read of the first isntruction's address.
- // Set the appropriate read size and flags as well.
- memReq->cmd = Read;
- memReq->reset(fetch_PC, instSize, flags);
-
- // Translate the instruction request.
- // Should this function be
- // in the CPU class ? Probably...ITB/DTB should exist within the
- // CPU.
-
- fault = cpu->translateInstReq(memReq);
-
- // In the case of faults, the fetch stage may need to stall and wait
- // on what caused the fetch (ITB or Icache miss).
-
- // If translation was successful, attempt to read the first
- // instruction.
- if (fault == No_Fault) {
- DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
- fault = cpu->mem->read(memReq, inst);
- // This read may change when the mem interface changes.
- }
-
- // Now do the timing access to see whether or not the instruction
- // exists within the cache.
- if (icacheInterface && fault == No_Fault) {
- DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
- memReq->completionEvent = NULL;
-
- memReq->time = curTick;
-
- MemAccessResult result = icacheInterface->access(memReq);
-
- // If the cache missed (in this model functional and timing
- // memories are different), then schedule an event to wake
- // up this stage once the cache miss completes.
- if (result != MA_HIT && icacheInterface->doEvents()) {
- memReq->completionEvent = &cacheCompletionEvent;
-// lastIcacheStall = curTick;
-
- // How does current model work as far as individual
- // stages scheduling/unscheduling?
- // Perhaps have only the main CPU scheduled/unscheduled,
- // and have it choose what stages to run appropriately.
+ fault = fetchCacheLine(fetch_PC);
+ }
- DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
- _status = IcacheMissStall;
- return;
- }
- }
+ // If we had a stall due to an icache miss, then return. It'd
+ // be nicer if this were handled through the kind of fault that
+ // is returned by the function.
+ if (_status == IcacheMissStall) {
+ return;
}
// As far as timing goes, the CPU will need to send an event through
@@ -446,11 +500,15 @@ SimpleFetch<Impl>::fetch()
Addr next_PC = fetch_PC;
InstSeqNum inst_seq;
+ MachInst inst;
+ unsigned offset = fetch_PC & cacheBlkMask;
+ unsigned fetched;
- // If the read of the first instruction was successful, then grab the
- // instructions from the rest of the cache line and put them into the
- // queue heading to decode.
if (fault == No_Fault) {
+ // If the read of the first instruction was successful, then grab the
+ // instructions from the rest of the cache line and put them into the
+ // queue heading to decode.
+
DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");
//////////////////////////
@@ -461,124 +519,59 @@ SimpleFetch<Impl>::fetch()
// ended this fetch block.
bool predicted_branch = false;
- // Might want to keep track of various stats.
-// numLinesFetched++;
-
- // Get a sequence number.
- inst_seq = cpu->getAndIncrementInstSeq();
-
- // Update the next PC; it either is PC+sizeof(MachInst), or
- // branch_target. Check whether or not a branch was taken.
- predicted_branch = lookupAndUpdateNextPC(next_PC);
-
- // Because the first instruction was already fetched, create the
- // DynInst and put it into the queue to decode.
- DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC,
- inst_seq, cpu);
-
- DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
- inst_seq, instruction->readPC());
- DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
- OPCODE(inst));
-
- instruction->traceData =
- Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
- instruction->staticInst,
- instruction->readPC(), 0);
-
- cpu->addInst(instruction);
-
- // Write the instruction to the first slot in the queue
- // that heads to decode.
- toDecode->insts[0] = instruction;
-
- toDecode->size++;
-
- fetch_PC = next_PC;
-
- //////////////////////////
- // Fetch other instructions
- //////////////////////////
-
- // Obtain the index into the cache line by getting only the low
- // order bits. Will need to do shifting as well.
- int line_index = fetch_PC & cacheBlockMask;
-
- // Take instructions and put them into the queue heading to decode.
- // Then read the next instruction in the cache line. Continue
- // until either all of the fetch bandwidth is used (not an issue for
- // non-SMT), or the end of the cache line is reached. Note that
- // this assumes standard cachelines, and not something like a trace
- // cache where lines might not end at cache-line size aligned
- // addresses.
- // @todo: Fix the horrible amount of translates/reads that must
- // take place due to reading an entire cacheline. Ideally it
- // should all take place at once, return an array of binary
- // instructions, which can then be used to get all the instructions
- // needed. Figure out if I can roll it back into one loop.
- for (int fetched = 1;
- line_index < blkSize &&
+ for (fetched = 0;
+ offset < cacheBlkSize &&
fetched < fetchWidth &&
!predicted_branch;
- line_index+=instSize, ++fetched)
+ ++fetched)
{
- // Reset the mem request to setup the read of the next
- // instruction.
- memReq->reset(fetch_PC, instSize, flags);
- // Translate the instruction request.
- fault = cpu->translateInstReq(memReq);
+ // Get a sequence number.
+ inst_seq = cpu->getAndIncrementInstSeq();
- // Read instruction.
- if (fault == No_Fault) {
- fault = cpu->mem->read(memReq, inst);
- }
+ // Make sure this is a valid index.
+ assert(offset <= cacheBlkSize - instSize);
- // Check if there was a fault.
- if (fault != No_Fault) {
- panic("Fetch: Read of instruction faulted when it should "
- "succeed; most likely exceeding cache line.\n");
- }
+ // Get the instruction from the array of the cache line.
+ inst = htoa(*reinterpret_cast<MachInst *>
+ (&cacheData[offset]));
- // Get a sequence number.
- inst_seq = cpu->getAndIncrementInstSeq();
+ // Create a new DynInst from the instruction fetched.
+ DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC,
+ inst_seq, cpu);
- predicted_branch = lookupAndUpdateNextPC(next_PC);
+ DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
+ inst_seq, instruction->readPC());
- // Create the actual DynInst. Parameters are:
- // DynInst(instruction, PC, predicted PC, CPU pointer).
- // Because this simple model has no branch prediction, the
- // predicted PC will simply be PC+sizeof(MachInst).
- // Update to actually use a branch predictor to predict the
- // target in the future.
- DynInstPtr instruction =
- new DynInst(inst, fetch_PC, next_PC, inst_seq, cpu);
+ DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
+ OPCODE(inst));
instruction->traceData =
Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
instruction->staticInst,
instruction->readPC(), 0);
- DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
- inst_seq, instruction->readPC());
- DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
- OPCODE(inst));
+ predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
+ // Add instruction to the CPU's list of instructions.
cpu->addInst(instruction);
- // Write the instruction to the proper slot in the queue
+ // Write the instruction to the first slot in the queue
// that heads to decode.
toDecode->insts[fetched] = instruction;
toDecode->size++;
- // Might want to keep track of various stats.
-// numInstsFetched++;
+ // Increment stat of fetched instructions.
+ ++fetchedInsts;
- // Update the PC with the next PC.
+ // Move to the next instruction, unless we have a branch.
fetch_PC = next_PC;
+
+ offset+= instSize;
}
+ fetch_nisn_dist.sample(fetched);
}
// Now that fetching is completed, update the PC to signify what the next
@@ -592,6 +585,12 @@ SimpleFetch<Impl>::fetch()
cpu->setPC(next_PC);
cpu->setNextPC(next_PC + instSize);
} else {
+ // If the issue was an icache miss, then we can just return and
+ // wait until it is handled.
+ if (_status == IcacheMissStall) {
+ return;
+ }
+
// Handle the fault.
// This stage will not be able to continue until all the ROB
// slots are empty, at which point the fault can be handled.
diff --git a/cpu/beta_cpu/free_list.hh b/cpu/beta_cpu/free_list.hh
index 0d2b2c421..e8e75f7ec 100644
--- a/cpu/beta_cpu/free_list.hh
+++ b/cpu/beta_cpu/free_list.hh
@@ -6,11 +6,9 @@
#include "arch/alpha/isa_traits.hh"
#include "cpu/beta_cpu/comm.hh"
+#include "base/traceflags.hh"
#include "base/trace.hh"
-// Question: Do I even need the number of logical registers?
-// How to avoid freeing registers instantly? Same with ROB entries.
-
/**
* FreeList class that simply holds the list of free integer and floating
* point registers. Can request for a free register of either type, and
@@ -153,8 +151,6 @@ SimpleFreeList::addIntReg(PhysRegIndex freed_reg)
assert(!freeIntRegsScoreboard[freed_reg]);
freeIntRegsScoreboard[freed_reg] = 1;
- //Might want to add in a check for whether or not this register is
- //already in there. A bit vector or something similar would be useful.
freeIntRegs.push(freed_reg);
}
@@ -167,8 +163,6 @@ SimpleFreeList::addFloatReg(PhysRegIndex freed_reg)
assert(!freeFloatRegsScoreboard[freed_reg]);
freeFloatRegsScoreboard[freed_reg] = 1;
- //Might want to add in a check for whether or not this register is
- //already in there. A bit vector or something similar would be useful.
freeFloatRegs.push(freed_reg);
}
diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc
index abeb4cb87..d5228601c 100644
--- a/cpu/beta_cpu/full_cpu.cc
+++ b/cpu/beta_cpu/full_cpu.cc
@@ -168,6 +168,13 @@ FullBetaCPU<Impl>::~FullBetaCPU()
template <class Impl>
void
+FullBetaCPU<Impl>::fullCPURegStats()
+{
+ // Register any of the FullCPU's stats here.
+}
+
+template <class Impl>
+void
FullBetaCPU<Impl>::tick()
{
DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullBetaCPU.\n");
@@ -424,19 +431,17 @@ template <class Impl>
void
FullBetaCPU<Impl>::removeFrontInst(DynInstPtr &inst)
{
- DynInstPtr inst_to_delete;
+ DynInstPtr inst_to_remove;
- // The front instruction should be the same one being asked to be deleted.
+ // The front instruction should be the same one being asked to be removed.
assert(instList.front() == inst);
// Remove the front instruction.
- inst_to_delete = inst;
+ inst_to_remove = inst;
instList.pop_front();
- DPRINTF(FullCPU, "FullCPU: Deleting committed instruction %#x, PC %#x\n",
- inst_to_delete, inst_to_delete->readPC());
-
-// delete inst_to_delete;
+ DPRINTF(FullCPU, "FullCPU: Removing committed instruction %#x, PC %#x\n",
+ inst_to_remove, inst_to_remove->readPC());
}
template <class Impl>
@@ -453,6 +458,33 @@ FullBetaCPU<Impl>::removeInstsNotInROB()
template <class Impl>
void
+FullBetaCPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num)
+{
+ DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction "
+ "list.\n");
+
+ DynInstPtr inst_to_delete;
+
+ while (instList.back()->seqNum > seq_num) {
+ assert(!instList.empty());
+
+ // Obtain the pointer to the instruction.
+ inst_to_delete = instList.back();
+
+ DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n",
+ inst_to_delete->seqNum, inst_to_delete->readPC());
+
+ // Remove the instruction from the list.
+ instList.pop_back();
+
+ // Mark it as squashed.
+ inst_to_delete->setSquashed();
+ }
+
+}
+
+template <class Impl>
+void
FullBetaCPU<Impl>::removeAllInsts()
{
instList.clear();
diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh
index cf753ad67..bde7e5bbf 100644
--- a/cpu/beta_cpu/full_cpu.hh
+++ b/cpu/beta_cpu/full_cpu.hh
@@ -115,6 +115,8 @@ class FullBetaCPU : public BaseFullCPU
void init();
+ void fullCPURegStats();
+
void activateContext(int thread_num, int delay);
void suspendContext(int thread_num);
void deallocateContext(int thread_num);
@@ -205,6 +207,9 @@ class FullBetaCPU : public BaseFullCPU
/** Remove all instructions that are not currently in the ROB. */
void removeInstsNotInROB();
+ /** Remove all instructions younger than the given sequence number. */
+ void removeInstsUntil(const InstSeqNum &seq_num);
+
/** Remove all instructions from the list. */
void removeAllInsts();
diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh
index de408ef0c..90bd39e7f 100644
--- a/cpu/beta_cpu/iew.hh
+++ b/cpu/beta_cpu/iew.hh
@@ -9,6 +9,7 @@
#include "base/timebuf.hh"
#include "cpu/beta_cpu/comm.hh"
+#include "base/statistics.hh"
//Can IEW even stall? Space should be available/allocated already...maybe
//if there's not enough write ports on the ROB or waiting for CDB
@@ -50,7 +51,9 @@ class SimpleIEW
public:
void squash();
- void squash(DynInstPtr &inst);
+ void squashDueToBranch(DynInstPtr &inst);
+
+ void squashDueToMem(DynInstPtr &inst);
void block();
@@ -59,6 +62,8 @@ class SimpleIEW
public:
SimpleIEW(Params &params);
+ void regStats();
+
void setCPU(FullCPU *cpu_ptr);
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
@@ -76,6 +81,10 @@ class SimpleIEW
void iew();
private:
+ void dispatchInsts();
+
+ void executeInsts();
+
//Interfaces to objects inside and outside of IEW.
/** Time buffer interface. */
TimeBuffer<TimeStruct> *timeBuffer;
@@ -159,9 +168,23 @@ class SimpleIEW
*/
unsigned cyclesSquashing;
- //Will implement later
- //Load queue interface (probably one and the same)
- //Store queue interface
+ Stats::Scalar<> iewIdleCycles;
+ Stats::Scalar<> iewSquashCycles;
+ Stats::Scalar<> iewBlockCycles;
+ Stats::Scalar<> iewUnblockCycles;
+// Stats::Scalar<> iewWBInsts;
+ Stats::Scalar<> iewDispatchedInsts;
+ Stats::Scalar<> iewDispSquashedInsts;
+ Stats::Scalar<> iewDispLoadInsts;
+ Stats::Scalar<> iewDispStoreInsts;
+ Stats::Scalar<> iewDispNonSpecInsts;
+ Stats::Scalar<> iewIQFullEvents;
+ Stats::Scalar<> iewExecutedInsts;
+ Stats::Scalar<> iewExecLoadInsts;
+ Stats::Scalar<> iewExecStoreInsts;
+ Stats::Scalar<> iewExecSquashedInsts;
+ Stats::Scalar<> memOrderViolationEvents;
+ Stats::Scalar<> predictedTakenIncorrect;
};
#endif
diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh
index 521ce77f6..2bfd6bae9 100644
--- a/cpu/beta_cpu/iew_impl.hh
+++ b/cpu/beta_cpu/iew_impl.hh
@@ -38,6 +38,79 @@ SimpleIEW<Impl, IQ>::SimpleIEW(Params &params)
instQueue.setIssueToExecuteQueue(&issueToExecQueue);
}
+template <class Impl, class IQ>
+void
+SimpleIEW<Impl, IQ>::regStats()
+{
+ instQueue.regStats();
+
+ iewIdleCycles
+ .name(name() + ".iewIdleCycles")
+ .desc("Number of cycles IEW is idle");
+
+ iewSquashCycles
+ .name(name() + ".iewSquashCycles")
+ .desc("Number of cycles IEW is squashing");
+
+ iewBlockCycles
+ .name(name() + ".iewBlockCycles")
+ .desc("Number of cycles IEW is blocking");
+
+ iewUnblockCycles
+ .name(name() + ".iewUnblockCycles")
+ .desc("Number of cycles IEW is unblocking");
+
+// iewWBInsts;
+
+ iewDispatchedInsts
+ .name(name() + ".iewDispatchedInsts")
+ .desc("Number of instructions dispatched to IQ");
+
+ iewDispSquashedInsts
+ .name(name() + ".iewDispSquashedInsts")
+ .desc("Number of squashed instructions skipped by dispatch");
+
+ iewDispLoadInsts
+ .name(name() + ".iewDispLoadInsts")
+ .desc("Number of dispatched load instructions");
+
+ iewDispStoreInsts
+ .name(name() + ".iewDispStoreInsts")
+ .desc("Number of dispatched store instructions");
+
+ iewDispNonSpecInsts
+ .name(name() + ".iewDispNonSpecInsts")
+ .desc("Number of dispatched non-speculative instructions");
+
+ iewIQFullEvents
+ .name(name() + ".iewIQFullEvents")
+ .desc("Number of times the IQ has become full, causing a stall");
+
+ iewExecutedInsts
+ .name(name() + ".iewExecutedInsts")
+ .desc("Number of executed instructions");
+
+ iewExecLoadInsts
+ .name(name() + ".iewExecLoadInsts")
+ .desc("Number of load instructions executed");
+
+ iewExecStoreInsts
+ .name(name() + ".iewExecStoreInsts")
+ .desc("Number of store instructions executed");
+
+ iewExecSquashedInsts
+ .name(name() + ".iewExecSquashedInsts")
+ .desc("Number of squashed instructions skipped in execute");
+
+ memOrderViolationEvents
+ .name(name() + ".memOrderViolationEvents")
+ .desc("Number of memory order violations");
+
+ predictedTakenIncorrect
+ .name(name() + ".predictedTakenIncorrect")
+ .desc("Number of branches that were predicted taken incorrectly");
+}
+
template<class Impl, class IQ>
void
SimpleIEW<Impl, IQ>::setCPU(FullCPU *cpu_ptr)
@@ -158,7 +231,7 @@ SimpleIEW<Impl, IQ>::squash()
template<class Impl, class IQ>
void
-SimpleIEW<Impl, IQ>::squash(DynInstPtr &inst)
+SimpleIEW<Impl, IQ>::squashDueToBranch(DynInstPtr &inst)
{
DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
inst->PC);
@@ -167,119 +240,39 @@ SimpleIEW<Impl, IQ>::squash(DynInstPtr &inst)
_status = Squashing;
// Tell rename to squash through the time buffer.
- toRename->iewInfo.squash = true;
+ toCommit->squash = true;
// Also send PC update information back to prior stages.
- toRename->iewInfo.squashedSeqNum = inst->seqNum;
- toRename->iewInfo.mispredPC = inst->readPC();
- toRename->iewInfo.nextPC = inst->readCalcTarg();
- toRename->iewInfo.branchMispredict = true;
+ toCommit->squashedSeqNum = inst->seqNum;
+ toCommit->mispredPC = inst->readPC();
+ toCommit->nextPC = inst->readCalcTarg();
+ toCommit->branchMispredict = true;
// Prediction was incorrect, so send back inverse.
- toRename->iewInfo.branchTaken = !(inst->predTaken());
+ toCommit->branchTaken = inst->readCalcTarg() !=
+ (inst->readPC() + sizeof(MachInst));
+// toCommit->globalHist = inst->readGlobalHist();
}
template<class Impl, class IQ>
void
-SimpleIEW<Impl, IQ>::tick()
+SimpleIEW<Impl, IQ>::squashDueToMem(DynInstPtr &inst)
{
- // Considering putting all the state-determining stuff in this section.
-
- // Try to fill up issue queue with as many instructions as bandwidth
- // allows.
- // Decode should try to execute as many instructions as its bandwidth
- // will allow, as long as it is not currently blocked.
-
- // Check if the stage is in a running status.
- if (_status != Blocked && _status != Squashing) {
- DPRINTF(IEW, "IEW: Status is not blocked, attempting to run "
- "stage.\n");
- iew();
-
- // If it's currently unblocking, check to see if it should switch
- // to running.
- if (_status == Unblocking) {
- unblock();
- }
- } else if (_status == Squashing) {
-
- DPRINTF(IEW, "IEW: Still squashing.\n");
-
- // Check if stage should remain squashing. Stop squashing if the
- // squash signal clears.
- if (!fromCommit->commitInfo.squash &&
- !fromCommit->commitInfo.robSquashing) {
- DPRINTF(IEW, "IEW: Done squashing, changing status to "
- "running.\n");
-
- _status = Running;
- instQueue.stopSquash();
- } else {
- instQueue.doSquash();
- }
-
- // Also should advance its own time buffers if the stage ran.
- // Not sure about this...
-// issueToExecQueue.advance();
- } else if (_status == Blocked) {
- // Continue to tell previous stage to stall.
- toRename->iewInfo.stall = true;
-
- // Check if possible stall conditions have cleared.
- if (!fromCommit->commitInfo.stall &&
- !instQueue.isFull()) {
- DPRINTF(IEW, "IEW: Stall signals cleared, going to unblock.\n");
- _status = Unblocking;
- }
-
- // If there's still instructions coming from rename, continue to
- // put them on the skid buffer.
- if (fromRename->insts[0]) {
- block();
- }
-
- if (fromCommit->commitInfo.squash ||
- fromCommit->commitInfo.robSquashing) {
- squash();
- }
- }
-
- // @todo: Maybe put these at the beginning, so if it's idle it can
- // return early.
- // Write back number of free IQ entries here.
- toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries();
-
- // Check the committed load/store signals to see if there's a load
- // or store to commit. Also check if it's being told to execute a
- // nonspeculative instruction.
- if (fromCommit->commitInfo.commitIsStore) {
- ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum);
- } else if (fromCommit->commitInfo.commitIsLoad) {
- ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum);
- }
-
- if (fromCommit->commitInfo.nonSpecSeqNum != 0) {
- instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum);
- }
+ DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
+ inst->PC);
+ // Perhaps leave the squashing up to the ROB stage to tell it when to
+ // squash?
+ _status = Squashing;
- DPRINTF(IEW, "IEW: IQ has %i free entries.\n",
- instQueue.numFreeEntries());
+ // Tell rename to squash through the time buffer.
+ toCommit->squash = true;
+ // Also send PC update information back to prior stages.
+ toCommit->squashedSeqNum = inst->seqNum;
+ toCommit->nextPC = inst->readCalcTarg();
}
-template<class Impl, class IQ>
+template <class Impl, class IQ>
void
-SimpleIEW<Impl, IQ>::iew()
+SimpleIEW<Impl, IQ>::dispatchInsts()
{
- // Might want to put all state checks in the tick() function.
- // Check if being told to stall from commit.
- if (fromCommit->commitInfo.stall) {
- block();
- return;
- } else if (fromCommit->commitInfo.squash ||
- fromCommit->commitInfo.robSquashing) {
- // Also check if commit is telling this stage to squash.
- squash();
- return;
- }
-
////////////////////////////////////////
// DISPATCH/ISSUE stage
////////////////////////////////////////
@@ -289,29 +282,36 @@ SimpleIEW<Impl, IQ>::iew()
// Check if there are any instructions coming from rename, and we're.
// not squashing.
- if (fromRename->insts[0] && _status != Squashing) {
+ if (fromRename->size > 0) {
+ int insts_to_add = fromRename->size;
// Loop through the instructions, putting them in the instruction
// queue.
- for (int inst_num = 0; inst_num < issueReadWidth; ++inst_num)
+ for (int inst_num = 0; inst_num < insts_to_add; ++inst_num)
{
DynInstPtr inst = fromRename->insts[inst_num];
// Make sure there's a valid instruction there.
- if (!inst)
- break;
+ assert(inst);
DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n",
inst->readPC());
- // If it's a memory reference, don't put it in the
- // instruction queue. These will only be executed at commit.
- // Do the same for nonspeculative instructions and nops.
// Be sure to mark these instructions as ready so that the
// commit stage can go ahead and execute them, and mark
// them as issued so the IQ doesn't reprocess them.
if (inst->isSquashed()) {
+ ++iewDispSquashedInsts;
continue;
+ } else if (instQueue.isFull()) {
+ DPRINTF(IEW, "IEW: Issue: IQ has become full.\n");
+ // Call function to start blocking.
+ block();
+ // Tell previous stage to stall.
+ toRename->iewInfo.stall = true;
+
+ ++iewIQFullEvents;
+ break;
} else if (inst->isLoad()) {
DPRINTF(IEW, "IEW: Issue: Memory instruction "
"encountered, adding to LDSTQ.\n");
@@ -320,6 +320,7 @@ SimpleIEW<Impl, IQ>::iew()
// memory access.
ldstQueue.insertLoad(inst);
+ ++iewDispLoadInsts;
} else if (inst->isStore()) {
ldstQueue.insertStore(inst);
@@ -327,10 +328,15 @@ SimpleIEW<Impl, IQ>::iew()
// the commit stage will try committing it, and then
// once commit realizes it's a store it will send back
// a signal to this stage to issue and execute that
- // store.
+ // store. Change to be a bit that says the instruction
+ // has extra work to do at commit.
inst->setCanCommit();
instQueue.insertNonSpec(inst);
+
+ ++iewDispStoreInsts;
+ ++iewDispNonSpecInsts;
+
continue;
} else if (inst->isNonSpeculative()) {
DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction "
@@ -342,6 +348,8 @@ SimpleIEW<Impl, IQ>::iew()
// Specificall insert it as nonspeculative.
instQueue.insertNonSpec(inst);
+ ++iewDispNonSpecInsts;
+
continue;
} else if (inst->isNop()) {
DPRINTF(IEW, "IEW: Issue: Nop instruction encountered "
@@ -352,25 +360,35 @@ SimpleIEW<Impl, IQ>::iew()
inst->setCanCommit();
instQueue.advanceTail(inst);
+
+ continue;
+ } else if (inst->isExecuted()) {
+ DPRINTF(IEW, "IEW: Issue: Executed branch encountered, "
+ "skipping.\n");
+
+ assert(inst->isDirectCtrl());
+
+ inst->setIssued();
+ inst->setCanCommit();
+
+ instQueue.advanceTail(inst);
+
continue;
- } else if (instQueue.isFull()) {
- DPRINTF(IEW, "IEW: Issue: IQ has become full.\n");
- // Call function to start blocking.
- block();
- // Tell previous stage to stall.
- toRename->iewInfo.stall = true;
- break;
}
// If the instruction queue is not full, then add the
// instruction.
instQueue.insert(fromRename->insts[inst_num]);
+
+ ++iewDispatchedInsts;
}
}
+}
- // Have the instruction queue try to schedule any ready instructions.
- instQueue.scheduleReadyInsts();
-
+template <class Impl, class IQ>
+void
+SimpleIEW<Impl, IQ>::executeInsts()
+{
////////////////////////////////////////
//EXECUTE/WRITEBACK stage
////////////////////////////////////////
@@ -389,9 +407,10 @@ SimpleIEW<Impl, IQ>::iew()
// Execute/writeback any instructions that are available.
for (int inst_num = 0;
fu_usage < executeWidth && /* Haven't exceeded available FU's. */
- inst_num < issueWidth && /* Haven't exceeded issue width. */
- fromIssue->insts[inst_num]; /* There are available instructions. */
+ inst_num < issueWidth &&
+ fromIssue->insts[inst_num];
++inst_num) {
+
DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n");
// Get instruction from issue's queue.
@@ -410,6 +429,8 @@ SimpleIEW<Impl, IQ>::iew()
toCommit->insts[inst_num] = inst;
+ ++iewExecSquashedInsts;
+
continue;
}
@@ -428,14 +449,20 @@ SimpleIEW<Impl, IQ>::iew()
// Tell the LDSTQ to execute this instruction (if it is a load).
if (inst->isLoad()) {
ldstQueue.executeLoad(inst);
+
+ ++iewExecLoadInsts;
} else if (inst->isStore()) {
ldstQueue.executeStore();
+
+ ++iewExecStoreInsts;
} else {
panic("IEW: Unexpected memory type!\n");
}
} else {
inst->execute();
+
+ ++iewExecutedInsts;
}
// First check the time slot that this instruction will write
@@ -464,25 +491,148 @@ SimpleIEW<Impl, IQ>::iew()
inst->nextPC);
// If incorrect, then signal the ROB that it must be squashed.
- squash(inst);
+ squashDueToBranch(inst);
+
+ if (inst->predTaken()) {
+ predictedTakenIncorrect++;
+ }
} else if (ldstQueue.violation()) {
fetch_redirect = true;
+ // Get the DynInst that caused the violation.
DynInstPtr violator = ldstQueue.getMemDepViolator();
DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: "
"%#x, inst PC: %#x. Addr is: %#x.\n",
violator->readPC(), inst->readPC(), inst->physEffAddr);
+ // Tell the instruction queue that a violation has occured.
instQueue.violation(inst, violator);
- squash(inst);
- // Otherwise check if there was a memory ordering violation.
- // If there was, then signal ROB that it must be squashed. Also
- // signal IQ that there was a violation.
+ // Squash.
+ squashDueToMem(inst);
+
+ ++memOrderViolationEvents;
}
}
}
+}
+
+template<class Impl, class IQ>
+void
+SimpleIEW<Impl, IQ>::tick()
+{
+ // Considering putting all the state-determining stuff in this section.
+
+ // Try to fill up issue queue with as many instructions as bandwidth
+ // allows.
+ // Decode should try to execute as many instructions as its bandwidth
+ // will allow, as long as it is not currently blocked.
+
+ // Check if the stage is in a running status.
+ if (_status != Blocked && _status != Squashing) {
+ DPRINTF(IEW, "IEW: Status is not blocked, attempting to run "
+ "stage.\n");
+ iew();
+
+ // If it's currently unblocking, check to see if it should switch
+ // to running.
+ if (_status == Unblocking) {
+ unblock();
+
+ ++iewUnblockCycles;
+ }
+ } else if (_status == Squashing) {
+
+ DPRINTF(IEW, "IEW: Still squashing.\n");
+
+ // Check if stage should remain squashing. Stop squashing if the
+ // squash signal clears.
+ if (!fromCommit->commitInfo.squash &&
+ !fromCommit->commitInfo.robSquashing) {
+ DPRINTF(IEW, "IEW: Done squashing, changing status to "
+ "running.\n");
+
+ _status = Running;
+ instQueue.stopSquash();
+ } else {
+ instQueue.doSquash();
+ }
+
+ ++iewSquashCycles;
+
+ // Also should advance its own time buffers if the stage ran.
+ // Not sure about this...
+// issueToExecQueue.advance();
+ } else if (_status == Blocked) {
+ // Continue to tell previous stage to stall.
+ toRename->iewInfo.stall = true;
+
+ // Check if possible stall conditions have cleared.
+ if (!fromCommit->commitInfo.stall &&
+ !instQueue.isFull()) {
+ DPRINTF(IEW, "IEW: Stall signals cleared, going to unblock.\n");
+ _status = Unblocking;
+ }
+
+ // If there's still instructions coming from rename, continue to
+ // put them on the skid buffer.
+ if (fromRename->size == 0) {
+ block();
+ }
+
+ if (fromCommit->commitInfo.squash ||
+ fromCommit->commitInfo.robSquashing) {
+ squash();
+ }
+
+ ++iewBlockCycles;
+ }
+
+ // @todo: Maybe put these at the beginning, so if it's idle it can
+ // return early.
+ // Write back number of free IQ entries here.
+ toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries();
+
+ // Check the committed load/store signals to see if there's a load
+ // or store to commit. Also check if it's being told to execute a
+ // nonspeculative instruction.
+ if (fromCommit->commitInfo.commitIsStore) {
+ ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum);
+ } else if (fromCommit->commitInfo.commitIsLoad) {
+ ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum);
+ }
+
+ if (fromCommit->commitInfo.nonSpecSeqNum != 0) {
+ instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum);
+ }
+
+ DPRINTF(IEW, "IEW: IQ has %i free entries.\n",
+ instQueue.numFreeEntries());
+}
+
+template<class Impl, class IQ>
+void
+SimpleIEW<Impl, IQ>::iew()
+{
+ // Might want to put all state checks in the tick() function.
+ // Check if being told to stall from commit.
+ if (fromCommit->commitInfo.stall) {
+ block();
+ return;
+ } else if (fromCommit->commitInfo.squash ||
+ fromCommit->commitInfo.robSquashing) {
+ // Also check if commit is telling this stage to squash.
+ squash();
+ return;
+ }
+
+ dispatchInsts();
+
+ // Have the instruction queue try to schedule any ready instructions.
+ instQueue.scheduleReadyInsts();
+
+ executeInsts();
// Loop through the head of the time buffer and wake any dependents.
// These instructions are about to write back. In the simple model
@@ -491,7 +641,7 @@ SimpleIEW<Impl, IQ>::iew()
// Also mark scoreboard that this instruction is finally complete.
// Either have IEW have direct access to rename map, or have this as
// part of backwards communication.
- for (int inst_num = 0; inst_num < executeWidth &&
+ for (int inst_num = 0; inst_num < issueWidth &&
toCommit->insts[inst_num]; inst_num++)
{
DynInstPtr inst = toCommit->insts[inst_num];
diff --git a/cpu/beta_cpu/inst_queue.cc b/cpu/beta_cpu/inst_queue.cc
index 43b0a4572..c4fd077bc 100644
--- a/cpu/beta_cpu/inst_queue.cc
+++ b/cpu/beta_cpu/inst_queue.cc
@@ -5,3 +5,6 @@
// Force instantiation of InstructionQueue.
template InstructionQueue<AlphaSimpleImpl>;
+
+unsigned
+InstructionQueue<AlphaSimpleImpl>::DependencyEntry::mem_alloc_counter = 0;
diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh
index a170979cb..6fcce70a4 100644
--- a/cpu/beta_cpu/inst_queue.hh
+++ b/cpu/beta_cpu/inst_queue.hh
@@ -7,14 +7,10 @@
#include <stdint.h>
#include <vector>
+#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
-//Perhaps have a better separation between the data structure underlying
-//and the actual algorithm.
-//somewhat nasty to try to have a nice ordering.
-// Consider moving to STL list or slist for the LL stuff.
-
/**
* A standard instruction queue class. It holds instructions in an
* array, holds the ordering of the instructions within a linked list,
@@ -74,6 +70,8 @@ class InstructionQueue
InstructionQueue(Params &params);
+ void regStats();
+
void setCPU(FullCPU *cpu);
void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
@@ -98,6 +96,7 @@ class InstructionQueue
void violation(DynInstPtr &store, DynInstPtr &faulting_load);
+ // Change this to take in the sequence number
void squash();
void doSquash();
@@ -159,7 +158,7 @@ class InstructionQueue
ReadyInstQueue readyBranchInsts;
/** List of ready memory instructions. */
- ReadyInstQueue readyMemInsts;
+// ReadyInstQueue readyMemInsts;
/** List of ready miscellaneous instructions. */
ReadyInstQueue readyMiscInsts;
@@ -228,9 +227,6 @@ class InstructionQueue
/** The sequence number of the squashed instruction. */
InstSeqNum squashedSeqNum;
- /** Iterator that points to the oldest instruction in the IQ. */
-// ListIt head;
-
/** Iterator that points to the youngest instruction in the IQ. */
ListIt tail;
@@ -261,6 +257,9 @@ class InstructionQueue
void insert(DynInstPtr &new_inst);
void remove(DynInstPtr &inst_to_remove);
+
+ // Debug variable, remove when done testing.
+ static unsigned mem_alloc_counter;
};
/** Array of linked lists. Each linked list is a list of all the
@@ -285,6 +284,25 @@ class InstructionQueue
void dumpDependGraph();
void addIfReady(DynInstPtr &inst);
+
+ Stats::Scalar<> iqInstsAdded;
+ Stats::Scalar<> iqNonSpecInstsAdded;
+// Stats::Scalar<> iqIntInstsAdded;
+ Stats::Scalar<> iqIntInstsIssued;
+// Stats::Scalar<> iqFloatInstsAdded;
+ Stats::Scalar<> iqFloatInstsIssued;
+// Stats::Scalar<> iqBranchInstsAdded;
+ Stats::Scalar<> iqBranchInstsIssued;
+// Stats::Scalar<> iqMemInstsAdded;
+ Stats::Scalar<> iqMemInstsIssued;
+// Stats::Scalar<> iqMiscInstsAdded;
+ Stats::Scalar<> iqMiscInstsIssued;
+ Stats::Scalar<> iqSquashedInstsIssued;
+ Stats::Scalar<> iqLoopSquashStalls;
+ Stats::Scalar<> iqSquashedInstsExamined;
+ Stats::Scalar<> iqSquashedOperandsExamined;
+ Stats::Scalar<> iqSquashedNonSpecRemoved;
+
};
#endif //__INST_QUEUE_HH__
diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh
index 03e3fed33..c688181ed 100644
--- a/cpu/beta_cpu/inst_queue_impl.hh
+++ b/cpu/beta_cpu/inst_queue_impl.hh
@@ -24,15 +24,13 @@ InstructionQueue<Impl>::InstructionQueue(Params &params)
numEntries(params.numIQEntries),
intWidth(params.executeIntWidth),
floatWidth(params.executeFloatWidth),
+ branchWidth(params.executeBranchWidth),
+ memoryWidth(params.executeMemoryWidth),
totalWidth(params.issueWidth),
numPhysIntRegs(params.numPhysIntRegs),
numPhysFloatRegs(params.numPhysFloatRegs),
commitToIEWDelay(params.commitToIEWDelay)
{
- // HACK: HARDCODED NUMBER. REMOVE LATER AND ADD TO PARAMETER.
- branchWidth = 1;
- memoryWidth = 1;
-
DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth);
// Initialize the number of free IQ entries.
@@ -68,6 +66,87 @@ InstructionQueue<Impl>::InstructionQueue(Params &params)
template <class Impl>
void
+InstructionQueue<Impl>::regStats()
+{
+ iqInstsAdded
+ .name(name() + ".iqInstsAdded")
+ .desc("Number of instructions added to the IQ (excludes non-spec)")
+ .prereq(iqInstsAdded);
+
+ iqNonSpecInstsAdded
+ .name(name() + ".iqNonSpecInstsAdded")
+ .desc("Number of non-speculative instructions added to the IQ")
+ .prereq(iqNonSpecInstsAdded);
+
+// iqIntInstsAdded;
+
+ iqIntInstsIssued
+ .name(name() + ".iqIntInstsIssued")
+ .desc("Number of integer instructions issued")
+ .prereq(iqIntInstsIssued);
+
+// iqFloatInstsAdded;
+
+ iqFloatInstsIssued
+ .name(name() + ".iqFloatInstsIssued")
+ .desc("Number of float instructions issued")
+ .prereq(iqFloatInstsIssued);
+
+// iqBranchInstsAdded;
+
+ iqBranchInstsIssued
+ .name(name() + ".iqBranchInstsIssued")
+ .desc("Number of branch instructions issued")
+ .prereq(iqBranchInstsIssued);
+
+// iqMemInstsAdded;
+
+ iqMemInstsIssued
+ .name(name() + ".iqMemInstsIssued")
+ .desc("Number of memory instructions issued")
+ .prereq(iqMemInstsIssued);
+
+// iqMiscInstsAdded;
+
+ iqMiscInstsIssued
+ .name(name() + ".iqMiscInstsIssued")
+ .desc("Number of miscellaneous instructions issued")
+ .prereq(iqMiscInstsIssued);
+
+ iqSquashedInstsIssued
+ .name(name() + ".iqSquashedInstsIssued")
+ .desc("Number of squashed instructions issued")
+ .prereq(iqSquashedInstsIssued);
+
+ iqLoopSquashStalls
+ .name(name() + ".iqLoopSquashStalls")
+ .desc("Number of times issue loop had to restart due to squashed "
+ "inst; mainly for profiling")
+ .prereq(iqLoopSquashStalls);
+
+ iqSquashedInstsExamined
+ .name(name() + ".iqSquashedInstsExamined")
+ .desc("Number of squashed instructions iterated over during squash;"
+ " mainly for profiling")
+ .prereq(iqSquashedInstsExamined);
+
+ iqSquashedOperandsExamined
+ .name(name() + ".iqSquashedOperandsExamined")
+ .desc("Number of squashed operands that are examined and possibly "
+ "removed from graph")
+ .prereq(iqSquashedOperandsExamined);
+
+ iqSquashedNonSpecRemoved
+ .name(name() + ".iqSquashedNonSpecRemoved")
+ .desc("Number of squashed non-spec instructions that were removed")
+ .prereq(iqSquashedNonSpecRemoved);
+
+ // Tell mem dependence unit to reg stats as well.
+ memDepUnit.regStats();
+}
+
+template <class Impl>
+void
InstructionQueue<Impl>::setCPU(FullCPU *cpu_ptr)
{
cpu = cpu_ptr;
@@ -161,10 +240,14 @@ InstructionQueue<Impl>::insert(DynInstPtr &new_inst)
// unit.
if (new_inst->isMemRef()) {
memDepUnit.insert(new_inst);
+ // Uh..forgot to look it up and put it on the proper dependency list
+ // if the instruction should not go yet.
+ } else {
+ // If the instruction is ready then add it to the ready list.
+ addIfReady(new_inst);
}
- // If the instruction is ready then add it to the ready list.
- addIfReady(new_inst);
+ ++iqInstsAdded;
assert(freeEntries == (numEntries - countInsts()));
}
@@ -219,13 +302,16 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &inst)
// If it's a memory instruction, add it to the memory dependency
// unit.
if (inst->isMemRef()) {
- memDepUnit.insert(inst);
+ memDepUnit.insertNonSpec(inst);
}
+
+ ++iqNonSpecInstsAdded;
}
// Slightly hack function to advance the tail iterator in the case that
// the IEW stage issues an instruction that is not added to the IQ. This
// is needed in case a long chain of such instructions occurs.
+// I don't think this is used anymore.
template <class Impl>
void
InstructionQueue<Impl>::advanceTail(DynInstPtr &inst)
@@ -288,7 +374,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
bool insts_available = !readyBranchInsts.empty() ||
!readyIntInsts.empty() ||
!readyFloatInsts.empty() ||
- !readyMemInsts.empty() ||
+ !memDepUnit.empty() ||
!readyMiscInsts.empty() ||
!squashedInsts.empty();
@@ -327,6 +413,9 @@ InstructionQueue<Impl>::scheduleReadyInsts()
if (int_head_inst->isSquashed()) {
readyIntInsts.pop();
+
+ ++iqLoopSquashStalls;
+
continue;
}
@@ -344,6 +433,9 @@ InstructionQueue<Impl>::scheduleReadyInsts()
if (float_head_inst->isSquashed()) {
readyFloatInsts.pop();
+
+ ++iqLoopSquashStalls;
+
continue;
} else if (float_head_inst->seqNum < oldest_inst) {
oldest_inst = float_head_inst->seqNum;
@@ -361,6 +453,9 @@ InstructionQueue<Impl>::scheduleReadyInsts()
if (branch_head_inst->isSquashed()) {
readyBranchInsts.pop();
+
+ ++iqLoopSquashStalls;
+
continue;
} else if (branch_head_inst->seqNum < oldest_inst) {
oldest_inst = branch_head_inst->seqNum;
@@ -370,15 +465,18 @@ InstructionQueue<Impl>::scheduleReadyInsts()
}
- if (!readyMemInsts.empty() &&
+ if (!memDepUnit.empty() &&
memory_issued < memoryWidth) {
insts_available = true;
- mem_head_inst = readyMemInsts.top();
+ mem_head_inst = memDepUnit.top();
if (mem_head_inst->isSquashed()) {
- readyMemInsts.pop();
+ memDepUnit.pop();
+
+ ++iqLoopSquashStalls;
+
continue;
} else if (mem_head_inst->seqNum < oldest_inst) {
oldest_inst = mem_head_inst->seqNum;
@@ -395,6 +493,9 @@ InstructionQueue<Impl>::scheduleReadyInsts()
if (misc_head_inst->isSquashed()) {
readyMiscInsts.pop();
+
+ ++iqLoopSquashStalls;
+
continue;
} else if (misc_head_inst->seqNum < oldest_inst) {
oldest_inst = misc_head_inst->seqNum;
@@ -450,9 +551,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
case Memory:
issuing_inst = mem_head_inst;
- memDepUnit.issue(mem_head_inst);
-
- readyMemInsts.pop();
+ memDepUnit.pop();
++memory_issued;
DPRINTF(IQ, "IQ: Issuing memory instruction PC %#x.\n",
issuing_inst->readPC());
@@ -461,6 +560,9 @@ InstructionQueue<Impl>::scheduleReadyInsts()
case Misc:
issuing_inst = misc_head_inst;
readyMiscInsts.pop();
+
+ ++iqMiscInstsIssued;
+
DPRINTF(IQ, "IQ: Issuing a miscellaneous instruction PC %#x.\n",
issuing_inst->readPC());
break;
@@ -476,6 +578,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
if (list_with_oldest != None) {
i2e_info->insts[total_issued] = issuing_inst;
+ i2e_info->size++;
issuing_inst->setIssued();
@@ -485,12 +588,21 @@ InstructionQueue<Impl>::scheduleReadyInsts()
assert(freeEntries == (numEntries - countInsts()));
}
+
+ iqIntInstsIssued += int_issued;
+ iqFloatInstsIssued += float_issued;
+ iqBranchInstsIssued += branch_issued;
+ iqMemInstsIssued += memory_issued;
+ iqSquashedInstsIssued += squashed_issued;
}
template <class Impl>
void
InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
{
+ DPRINTF(IQ, "IQ: Marking nonspeculative instruction with sequence "
+ "number %i as ready to execute.\n", inst);
+
non_spec_it_t inst_it = nonSpecInsts.find(inst);
assert(inst_it != nonSpecInsts.end());
@@ -499,7 +611,11 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
(*inst_it).second->setCanIssue();
// Now schedule the instruction.
- addIfReady((*inst_it).second);
+ if (!(*inst_it).second->isMemRef()) {
+ addIfReady((*inst_it).second);
+ } else {
+ memDepUnit.nonSpecInstReady((*inst_it).second);
+ }
nonSpecInsts.erase(inst_it);
}
@@ -552,6 +668,7 @@ InstructionQueue<Impl>::doSquash()
// hasn't already been squashed in the IQ.
if (!squashed_inst->isIssued() &&
!squashed_inst->isSquashedInIQ()) {
+
// Remove the instruction from the dependency list.
// Hack for now: These below don't add themselves to the
// dependency list, so don't try to remove them.
@@ -576,7 +693,15 @@ InstructionQueue<Impl>::doSquash()
src_reg < numPhysRegs) {
dependGraph[src_reg].remove(squashed_inst);
}
+
+ ++iqSquashedOperandsExamined;
}
+
+ // Might want to remove producers as well.
+ } else {
+ nonSpecInsts.erase(squashed_inst->seqNum);
+
+ ++iqSquashedNonSpecRemoved;
}
// Might want to also clear out the head of the dependency graph.
@@ -590,11 +715,8 @@ InstructionQueue<Impl>::doSquash()
squashed_inst->readPC());
}
- if (squashed_inst->isNonSpeculative() || squashed_inst->isStore()) {
- nonSpecInsts.erase(squashed_inst->seqNum);
- }
-
--squashIt;
+ ++iqSquashedInstsExamined;
}
}
@@ -665,6 +787,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
dependGraph[dest_reg].next = curr->next;
+ DependencyEntry::mem_alloc_counter--;
+
delete curr;
}
@@ -749,13 +873,9 @@ InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst)
}
dependGraph[dest_reg].inst = new_inst;
-#if 0
- if (dependGraph[dest_reg].next) {
- panic("Dependency chain of dest reg %i is not empty.\n",
- dest_reg);
- }
-#endif
+
assert(!dependGraph[dest_reg].next);
+
// Mark the scoreboard to say it's not yet ready.
regScoreboard[dest_reg] = false;
}
@@ -776,6 +896,8 @@ InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
// Then actually add it to the chain.
this->next = new_entry;
+
+ ++mem_alloc_counter;
}
template <class Impl>
@@ -805,6 +927,8 @@ InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
// Now remove this instruction from the list.
prev->next = curr->next;
+ --mem_alloc_counter;
+
delete curr;
}
@@ -855,12 +979,26 @@ InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
DPRINTF(IQ, "IQ: Checking if memory instruction can issue.\n");
+ // Message to the mem dependence unit that this instruction has
+ // its registers ready.
+
+ memDepUnit.regsReady(inst);
+
+#if 0
if (memDepUnit.readyToIssue(inst)) {
DPRINTF(IQ, "IQ: Memory instruction is ready to issue, "
"putting it onto the ready list, PC %#x.\n",
inst->readPC());
readyMemInsts.push(inst);
+ } else {
+ // Make dependent on the store.
+ // Will need some way to get the store instruction it should
+ // be dependent upon; then when the store issues it can
+ // put the instruction on the ready list.
+ // Yet another tree?
+ assert(0 && "Instruction has no way to actually issue");
}
+#endif
} else if (inst->isInteger()) {
@@ -923,7 +1061,7 @@ InstructionQueue<Impl>::dumpLists()
cprintf("Ready branch list size: %i\n", readyBranchInsts.size());
- cprintf("Ready memory list size: %i\n", readyMemInsts.size());
+// cprintf("Ready memory list size: %i\n", readyMemInsts.size());
cprintf("Ready misc list size: %i\n", readyMiscInsts.size());
diff --git a/cpu/beta_cpu/mem_dep_unit.hh b/cpu/beta_cpu/mem_dep_unit.hh
index 4821c63b7..e43543e09 100644
--- a/cpu/beta_cpu/mem_dep_unit.hh
+++ b/cpu/beta_cpu/mem_dep_unit.hh
@@ -6,6 +6,7 @@
#include <map>
#include "cpu/inst_seq.hh"
+#include "base/statistics.hh"
/**
* Memory dependency unit class. This holds the memory dependence predictor.
@@ -25,16 +26,17 @@ class MemDepUnit {
typedef typename Impl::DynInstPtr DynInstPtr;
public:
- typedef typename std::set<InstSeqNum>::iterator sn_it_t;
- typedef typename std::map<InstSeqNum, vector<InstSeqNum> >::iterator
- dep_it_t;
-
- public:
MemDepUnit(Params &params);
+ void regStats();
+
void insert(DynInstPtr &inst);
- bool readyToIssue(DynInstPtr &inst);
+ void insertNonSpec(DynInstPtr &inst);
+
+ void regsReady(DynInstPtr &inst);
+
+ void nonSpecInstReady(DynInstPtr &inst);
void issue(DynInstPtr &inst);
@@ -44,19 +46,83 @@ class MemDepUnit {
void violation(DynInstPtr &store_inst, DynInstPtr &violating_load);
+ // Will want to make this operation relatively fast. Right now it
+ // kind of sucks.
+ DynInstPtr &top();
+
+ void pop();
+
+ inline bool empty()
+ { return readyInsts.empty(); }
+
+ private:
+ typedef typename std::set<InstSeqNum>::iterator sn_it_t;
+ typedef typename std::map<InstSeqNum, DynInstPtr>::iterator dyn_it_t;
+
+ // Forward declarations so that the following two typedefs work.
+ class Dependency;
+ class ltDependency;
+
+ typedef typename std::set<Dependency, ltDependency>::iterator dep_it_t;
+ typedef typename std::map<InstSeqNum, vector<dep_it_t> >::iterator
+ sd_it_t;
+
+ struct Dependency {
+ Dependency(const InstSeqNum &_seqNum)
+ : seqNum(_seqNum), regsReady(0), memDepReady(0)
+ { }
+
+ Dependency(const InstSeqNum &_seqNum, bool _regsReady,
+ bool _memDepReady)
+ : seqNum(_seqNum), regsReady(_regsReady),
+ memDepReady(_memDepReady)
+ { }
+
+ InstSeqNum seqNum;
+ mutable bool regsReady;
+ mutable bool memDepReady;
+ mutable sd_it_t storeDep;
+ };
+
+ struct ltDependency {
+ bool operator() (const Dependency &lhs, const Dependency &rhs)
+ {
+ return lhs.seqNum < rhs.seqNum;
+ }
+ };
+
+
+ private:
+ inline void moveToReady(dep_it_t &woken_inst);
+
private:
/** List of instructions that have passed through rename, yet are still
- * waiting on a memory dependence to resolve before they can issue.
+ * waiting on either a memory dependence to resolve or source registers to
+ * become available before they can issue.
*/
- std::set<InstSeqNum> renamedInsts;
+ std::set<Dependency, ltDependency> waitingInsts;
/** List of instructions that have all their predicted memory dependences
- * resolved. They are ready in terms of being free of memory
- * dependences; however they may still have to wait on source registers.
+ * resolved and their source registers ready.
*/
std::set<InstSeqNum> readyInsts;
- std::map<InstSeqNum, vector<InstSeqNum> > dependencies;
+ // Change this to hold a vector of iterators, which will point to the
+ // entry of the waiting instructions.
+ /** List of stores' sequence numbers, each of which has a vector of
+ * iterators. The iterators point to the appropriate node within
+ * waitingInsts that has the depenendent instruction.
+ */
+ std::map<InstSeqNum, vector<dep_it_t> > storeDependents;
+
+ // For now will implement this as a map...hash table might not be too
+ // bad, or could move to something that mimics the current dependency
+ // graph.
+ std::map<InstSeqNum, DynInstPtr> memInsts;
+
+ // Iterator pointer to the top instruction which has is ready.
+ // Is set by the top() call.
+ dyn_it_t topInst;
/** The memory dependence predictor. It is accessed upon new
* instructions being added to the IQ, and responds by telling
@@ -65,6 +131,10 @@ class MemDepUnit {
*/
MemDepPred depPred;
+ Stats::Scalar<> insertedLoads;
+ Stats::Scalar<> insertedStores;
+ Stats::Scalar<> conflictingLoads;
+ Stats::Scalar<> conflictingStores;
};
#endif
diff --git a/cpu/beta_cpu/mem_dep_unit_impl.hh b/cpu/beta_cpu/mem_dep_unit_impl.hh
index 4299acb7a..4161ac2a8 100644
--- a/cpu/beta_cpu/mem_dep_unit_impl.hh
+++ b/cpu/beta_cpu/mem_dep_unit_impl.hh
@@ -3,108 +3,301 @@
#include "cpu/beta_cpu/mem_dep_unit.hh"
-// Hack: dependence predictor sizes are hardcoded.
template <class MemDepPred, class Impl>
MemDepUnit<MemDepPred, Impl>::MemDepUnit(Params &params)
- : depPred(4028, 128)
+ : depPred(params.SSITSize, params.LFSTSize)
{
DPRINTF(MemDepUnit, "MemDepUnit: Creating MemDepUnit object.\n");
}
template <class MemDepPred, class Impl>
void
+MemDepUnit<MemDepPred, Impl>::regStats()
+{
+ insertedLoads
+ .name(name() + ".memDep.insertedLoads")
+ .desc("Number of loads inserted to the mem dependence unit.");
+
+ insertedStores
+ .name(name() + ".memDep.insertedStores")
+ .desc("Number of stores inserted to the mem dependence unit.");
+
+ conflictingLoads
+ .name(name() + ".memDep.conflictingLoads")
+ .desc("Number of conflicting loads.");
+
+ conflictingStores
+ .name(name() + ".memDep.conflictingStores")
+ .desc("Number of conflicting stores.");
+}
+
+template <class MemDepPred, class Impl>
+void
MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
{
InstSeqNum inst_seq_num = inst->seqNum;
+ Dependency unresolved_dependencies(inst_seq_num);
InstSeqNum producing_store = depPred.checkInst(inst->readPC());
if (producing_store == 0 ||
- dependencies.find(producing_store) == dependencies.end()) {
- readyInsts.insert(inst_seq_num);
+ storeDependents.find(producing_store) == storeDependents.end()) {
+
+ DPRINTF(MemDepUnit, "MemDepUnit: No dependency for inst PC "
+ "%#x.\n", inst->readPC());
+
+ unresolved_dependencies.storeDep = storeDependents.end();
+
+ if (inst->readyToIssue()) {
+ readyInsts.insert(inst_seq_num);
+ } else {
+ unresolved_dependencies.memDepReady = true;
+
+ waitingInsts.insert(unresolved_dependencies);
+ }
} else {
+ DPRINTF(MemDepUnit, "MemDepUnit: Adding to dependency list; "
+ "inst PC %#x is dependent on seq num %i.\n",
+ inst->readPC(), producing_store);
+
+ if (inst->readyToIssue()) {
+ unresolved_dependencies.regsReady = true;
+ }
+
+ // Find the store that this instruction is dependent on.
+ sd_it_t store_loc = storeDependents.find(producing_store);
+
+ assert(store_loc != storeDependents.end());
+
+ // Record the location of the store that this instruction is
+ // dependent on.
+ unresolved_dependencies.storeDep = store_loc;
+
// If it's not already ready, then add it to the renamed
// list and the dependencies.
- renamedInsts.insert(inst_seq_num);
+ dep_it_t inst_loc =
+ (waitingInsts.insert(unresolved_dependencies)).first;
+
+ // Add this instruction to the list of dependents.
+ (*store_loc).second.push_back(inst_loc);
+
+ assert(!(*store_loc).second.empty());
- dependencies[producing_store].push_back(inst_seq_num);
+ if (inst->isLoad()) {
+ ++conflictingLoads;
+ } else {
+ ++conflictingStores;
+ }
}
if (inst->isStore()) {
+ DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n",
+ inst->readPC());
+
depPred.insertStore(inst->readPC(), inst_seq_num);
// Make sure this store isn't already in this list.
- assert(dependencies.find(inst_seq_num) == dependencies.end());
+ assert(storeDependents.find(inst_seq_num) == storeDependents.end());
// Put a dependency entry in at the store's sequence number.
// Uh, not sure how this works...I want to create an entry but
// I don't have anything to put into the value yet.
- dependencies[inst_seq_num];
- } else if (!inst->isLoad()) {
+ storeDependents[inst_seq_num];
+
+ assert(storeDependents.size() != 0);
+
+ ++insertedStores;
+
+ } else if (inst->isLoad()) {
+ ++insertedLoads;
+ } else {
+ panic("MemDepUnit: Unknown type! (most likely a barrier).");
+ }
+
+ memInsts[inst_seq_num] = inst;
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst)
+{
+ InstSeqNum inst_seq_num = inst->seqNum;
+
+ Dependency non_spec_inst(inst_seq_num);
+
+ non_spec_inst.storeDep = storeDependents.end();
+
+ waitingInsts.insert(non_spec_inst);
+
+ // Might want to turn this part into an inline function or something.
+ // It's shared between both insert functions.
+ if (inst->isStore()) {
+ DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n",
+ inst->readPC());
+
+ depPred.insertStore(inst->readPC(), inst_seq_num);
+
+ // Make sure this store isn't already in this list.
+ assert(storeDependents.find(inst_seq_num) == storeDependents.end());
+
+ // Put a dependency entry in at the store's sequence number.
+ // Uh, not sure how this works...I want to create an entry but
+ // I don't have anything to put into the value yet.
+ storeDependents[inst_seq_num];
+
+ assert(storeDependents.size() != 0);
+
+ ++insertedStores;
+
+ } else if (inst->isLoad()) {
+ ++insertedLoads;
+ } else {
panic("MemDepUnit: Unknown type! (most likely a barrier).");
}
+
+ memInsts[inst_seq_num] = inst;
+}
+
+template <class MemDepPred, class Impl>
+typename Impl::DynInstPtr &
+MemDepUnit<MemDepPred, Impl>::top()
+{
+ topInst = memInsts.find( (*readyInsts.begin()) );
+
+ DPRINTF(MemDepUnit, "MemDepUnit: Top instruction is PC %#x.\n",
+ (*topInst).second->readPC());
+
+ return (*topInst).second;
}
template <class MemDepPred, class Impl>
-bool
-MemDepUnit<MemDepPred, Impl>::readyToIssue(DynInstPtr &inst)
+void
+MemDepUnit<MemDepPred, Impl>::pop()
{
+ DPRINTF(MemDepUnit, "MemDepUnit: Removing instruction PC %#x.\n",
+ (*topInst).second->readPC());
+
+ wakeDependents((*topInst).second);
+
+ issue((*topInst).second);
+
+ memInsts.erase(topInst);
+
+ topInst = memInsts.end();
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::regsReady(DynInstPtr &inst)
+{
+ DPRINTF(MemDepUnit, "MemDepUnit: Marking registers as ready for "
+ "instruction PC %#x.\n",
+ inst->readPC());
+
InstSeqNum inst_seq_num = inst->seqNum;
- if (readyInsts.find(inst_seq_num) == readyInsts.end()) {
- return false;
+ Dependency inst_to_find(inst_seq_num);
+
+ dep_it_t waiting_inst = waitingInsts.find(inst_to_find);
+
+ assert(waiting_inst != waitingInsts.end());
+
+ if ((*waiting_inst).memDepReady) {
+ DPRINTF(MemDepUnit, "MemDepUnit: Instruction has its memory "
+ "dependencies resolved, adding it to the ready list.\n");
+
+ moveToReady(waiting_inst);
} else {
- return true;
+ DPRINTF(MemDepUnit, "MemDepUnit: Instruction still waiting on "
+ "memory dependency.\n");
+
+ (*waiting_inst).regsReady = true;
}
}
template <class MemDepPred, class Impl>
void
+MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(DynInstPtr &inst)
+{
+ DPRINTF(MemDepUnit, "MemDepUnit: Marking non speculative "
+ "instruction PC %#x as ready.\n",
+ inst->readPC());
+
+ InstSeqNum inst_seq_num = inst->seqNum;
+
+ Dependency inst_to_find(inst_seq_num);
+
+ dep_it_t waiting_inst = waitingInsts.find(inst_to_find);
+
+ assert(waiting_inst != waitingInsts.end());
+
+ moveToReady(waiting_inst);
+}
+
+template <class MemDepPred, class Impl>
+void
MemDepUnit<MemDepPred, Impl>::issue(DynInstPtr &inst)
{
assert(readyInsts.find(inst->seqNum) != readyInsts.end());
+ DPRINTF(MemDepUnit, "MemDepUnit: Issuing instruction PC %#x.\n",
+ inst->readPC());
+
// Remove the instruction from the ready list.
readyInsts.erase(inst->seqNum);
+
+ depPred.issued(inst->readPC(), inst->seqNum, inst->isStore());
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
{
+ // Only stores have dependents.
+ if (!inst->isStore()) {
+ return;
+ }
+
// Wake any dependencies.
- dep_it_t dep_it = dependencies.find(inst);
+ sd_it_t sd_it = storeDependents.find(inst->seqNum);
// If there's no entry, then return. Really there should only be
// no entry if the instruction is a load.
- if (dep_it == dependencies.end()) {
+ if (sd_it == storeDependents.end()) {
+ DPRINTF(MemDepUnit, "MemDepUnit: Instruction PC %#x, sequence "
+ "number %i has no dependents.\n",
+ inst->readPC(), inst->seqNum);
+
return;
}
- assert(inst->isStore());
-
- for(int i = 0; i < (*dep_it).second.size(); ++i ) {
- InstSeqNum woken_inst = (*dep_it).second[i];
+ for (int i = 0; i < (*sd_it).second.size(); ++i ) {
+ dep_it_t woken_inst = (*sd_it).second[i];
+ DPRINTF(MemDepUnit, "MemDepUnit: Waking up a dependent inst, "
+ "sequence number %i.\n",
+ (*woken_inst).seqNum);
+#if 0
// Should we have reached instructions that are actually squashed,
// there will be no more useful instructions in this dependency
// list. Break out early.
- if (renamedInsts.find(woken_inst) == renamedInsts.end()) {
+ if (waitingInsts.find(woken_inst) == waitingInsts.end()) {
DPRINTF(MemDepUnit, "MemDepUnit: Dependents on inst PC %#x "
"are squashed, starting at SN %i. Breaking early.\n",
inst->readPC(), woken_inst);
break;
}
+#endif
- // Remove it from the renamed instructions.
- renamedInsts.erase(woken_inst);
-
- // Add it to the ready list.
- readyInsts.insert(woken_inst);
+ if ((*woken_inst).regsReady) {
+ moveToReady(woken_inst);
+ } else {
+ (*woken_inst).memDepReady = true;
+ }
}
- dependencies.erase(dep_it);
+ storeDependents.erase(sd_it);
}
template <class MemDepPred, class Impl>
@@ -112,17 +305,30 @@ void
MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num)
{
- if (!renamedInsts.empty()) {
- sn_it_t renamed_it = renamedInsts.end();
+ if (!waitingInsts.empty()) {
+ dep_it_t waiting_it = waitingInsts.end();
- --renamed_it;
+ --waiting_it;
// Remove entries from the renamed list as long as we haven't reached
// the end and the entries continue to be younger than the squashed.
- while (!renamedInsts.empty() &&
- (*renamed_it) > squashed_num)
+ while (!waitingInsts.empty() &&
+ (*waiting_it).seqNum > squashed_num)
{
- renamedInsts.erase(renamed_it--);
+ if (!(*waiting_it).memDepReady &&
+ (*waiting_it).storeDep != storeDependents.end()) {
+ sd_it_t sd_it = (*waiting_it).storeDep;
+
+ // Make sure the iterator that the store has pointing
+ // back is actually to this instruction.
+ assert((*sd_it).second.back() == waiting_it);
+
+ // Now remove this from the store's list of dependent
+ // instructions.
+ (*sd_it).second.pop_back();
+ }
+
+ waitingInsts.erase(waiting_it--);
}
}
@@ -139,16 +345,19 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num)
}
}
- if (!dependencies.empty()) {
- dep_it_t dep_it = dependencies.end();
+ if (!storeDependents.empty()) {
+ sd_it_t dep_it = storeDependents.end();
--dep_it;
// Same for the dependencies list.
- while (!dependencies.empty() &&
+ while (!storeDependents.empty() &&
(*dep_it).first > squashed_num)
{
- dependencies.erase(dep_it--);
+ // This store's list of dependent instructions should be empty.
+ assert((*dep_it).second.empty());
+
+ storeDependents.erase(dep_it--);
}
}
@@ -161,6 +370,23 @@ void
MemDepUnit<MemDepPred, Impl>::violation(DynInstPtr &store_inst,
DynInstPtr &violating_load)
{
+ DPRINTF(MemDepUnit, "MemDepUnit: Passing violating PCs to store sets,"
+ " load: %#x, store: %#x\n", violating_load->readPC(),
+ store_inst->readPC());
// Tell the memory dependence unit of the violation.
depPred.violation(violating_load->readPC(), store_inst->readPC());
}
+
+template <class MemDepPred, class Impl>
+inline void
+MemDepUnit<MemDepPred, Impl>::moveToReady(dep_it_t &woken_inst)
+{
+ DPRINTF(MemDepUnit, "MemDepUnit: Adding instruction sequence number %i "
+ "to the ready list.\n", (*woken_inst).seqNum);
+
+ // Add it to the ready list.
+ readyInsts.insert((*woken_inst).seqNum);
+
+ // Remove it from the waiting instructions.
+ waitingInsts.erase(woken_inst);
+}
diff --git a/cpu/beta_cpu/ras.cc b/cpu/beta_cpu/ras.cc
new file mode 100644
index 000000000..ca05f5a0d
--- /dev/null
+++ b/cpu/beta_cpu/ras.cc
@@ -0,0 +1,42 @@
+#include "cpu/beta_cpu/ras.hh"
+
+ReturnAddrStack::ReturnAddrStack(unsigned _numEntries)
+ : numEntries(_numEntries), usedEntries(0),
+ tos(0)
+{
+ addrStack = new Addr[numEntries](0);
+}
+
+void
+ReturnAddrStack::push(const Addr &return_addr)
+{
+ incrTos();
+
+ addrStack[tos] = return_addr;
+
+ if (usedEntries != numEntries) {
+ ++usedEntries;
+ }
+}
+
+void
+ReturnAddrStack::pop()
+{
+ // Not sure it's possible to really track usedEntries properly.
+// assert(usedEntries > 0);
+
+ if (usedEntries > 0) {
+ --usedEntries;
+ }
+
+ decrTos();
+}
+
+void
+ReturnAddrStack::restore(unsigned top_entry_idx,
+ const Addr &restored_target)
+{
+ tos = top_entry_idx;
+
+ addrStack[tos] = restored_target;
+}
diff --git a/cpu/beta_cpu/ras.hh b/cpu/beta_cpu/ras.hh
new file mode 100644
index 000000000..7666f825f
--- /dev/null
+++ b/cpu/beta_cpu/ras.hh
@@ -0,0 +1,40 @@
+#ifndef __RAS_HH__
+#define __RAS_HH__
+
+// For Addr type.
+#include "arch/alpha/isa_traits.hh"
+
+class ReturnAddrStack
+{
+ public:
+ ReturnAddrStack(unsigned numEntries);
+
+ Addr top()
+ { return addrStack[tos]; }
+
+ unsigned topIdx()
+ { return tos; }
+
+ void push(const Addr &return_addr);
+
+ void pop();
+
+ void restore(unsigned top_entry_idx, const Addr &restored_target);
+
+ private:
+ inline void incrTos()
+ { tos = (tos + 1) % numEntries; }
+
+ inline void decrTos()
+ { tos = (tos == 0 ? numEntries - 1 : tos - 1); }
+
+ Addr *addrStack;
+
+ unsigned numEntries;
+
+ unsigned usedEntries;
+
+ unsigned tos;
+};
+
+#endif // __RAS_HH__
diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh
index aba897fdc..148d9408a 100644
--- a/cpu/beta_cpu/regfile.hh
+++ b/cpu/beta_cpu/regfile.hh
@@ -54,7 +54,7 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- assert(reg_idx < numPhysicalFloatRegs);
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
DPRINTF(IEW, "RegFile: Access to float register %i as single, has "
"data %8.8f\n", int(reg_idx), (float)floatRegFile[reg_idx].d);
@@ -67,7 +67,7 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- assert(reg_idx < numPhysicalFloatRegs);
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
DPRINTF(IEW, "RegFile: Access to float register %i as double, has "
" data %8.8f\n", int(reg_idx), floatRegFile[reg_idx].d);
@@ -80,7 +80,7 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- assert(reg_idx < numPhysicalFloatRegs);
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
DPRINTF(IEW, "RegFile: Access to float register %i as int, has data "
"%lli\n", int(reg_idx), floatRegFile[reg_idx].q);
@@ -103,7 +103,7 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- assert(reg_idx < numPhysicalFloatRegs);
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n",
int(reg_idx), val);
@@ -116,7 +116,7 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- assert(reg_idx < numPhysicalFloatRegs);
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n",
int(reg_idx), val);
@@ -129,7 +129,7 @@ class PhysRegFile
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
- assert(reg_idx < numPhysicalFloatRegs);
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n",
int(reg_idx), val);
diff --git a/cpu/beta_cpu/rename.hh b/cpu/beta_cpu/rename.hh
index 9f031012a..3e6b873ae 100644
--- a/cpu/beta_cpu/rename.hh
+++ b/cpu/beta_cpu/rename.hh
@@ -54,6 +54,8 @@ class SimpleRename
public:
SimpleRename(Params &params);
+ void regStats();
+
void setCPU(FullCPU *cpu_ptr);
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
@@ -182,6 +184,22 @@ class SimpleRename
* group of instructions, it can restart at the proper instruction.
*/
unsigned numInst;
+
+ Stats::Scalar<> renameSquashCycles;
+ Stats::Scalar<> renameIdleCycles;
+ Stats::Scalar<> renameBlockCycles;
+ Stats::Scalar<> renameUnblockCycles;
+ Stats::Scalar<> renameRenamedInsts;
+ Stats::Scalar<> renameSquashedInsts;
+ Stats::Scalar<> renameROBFullEvents;
+ Stats::Scalar<> renameIQFullEvents;
+ Stats::Scalar<> renameFullRegistersEvents;
+ Stats::Scalar<> renameRenamedOperands;
+ Stats::Scalar<> renameRenameLookups;
+ Stats::Scalar<> renameHBPlaceHolders;
+ Stats::Scalar<> renameCommittedMaps;
+ Stats::Scalar<> renameUndoneMaps;
+ Stats::Scalar<> renameValidUndoneMaps;
};
#endif // __SIMPLE_RENAME_HH__
diff --git a/cpu/beta_cpu/rename_impl.hh b/cpu/beta_cpu/rename_impl.hh
index 47464d961..5a8e499e9 100644
--- a/cpu/beta_cpu/rename_impl.hh
+++ b/cpu/beta_cpu/rename_impl.hh
@@ -16,6 +16,72 @@ SimpleRename<Impl>::SimpleRename(Params &params)
template <class Impl>
void
+SimpleRename<Impl>::regStats()
+{
+ renameSquashCycles
+ .name(name() + ".renameSquashCycles")
+ .desc("Number of cycles rename is squashing")
+ .prereq(renameSquashCycles);
+ renameIdleCycles
+ .name(name() + ".renameIdleCycles")
+ .desc("Number of cycles rename is idle")
+ .prereq(renameIdleCycles);
+ renameBlockCycles
+ .name(name() + ".renameBlockCycles")
+ .desc("Number of cycles rename is blocking")
+ .prereq(renameBlockCycles);
+ renameUnblockCycles
+ .name(name() + ".renameUnblockCycles")
+ .desc("Number of cycles rename is unblocking")
+ .prereq(renameUnblockCycles);
+ renameRenamedInsts
+ .name(name() + ".renameRenamedInsts")
+ .desc("Number of instructions processed by rename")
+ .prereq(renameRenamedInsts);
+ renameSquashedInsts
+ .name(name() + ".renameSquashedInsts")
+ .desc("Number of squashed instructions processed by rename")
+ .prereq(renameSquashedInsts);
+ renameROBFullEvents
+ .name(name() + ".renameROBFullEvents")
+ .desc("Number of times rename has considered the ROB 'full'")
+ .prereq(renameROBFullEvents);
+ renameIQFullEvents
+ .name(name() + ".renameIQFullEvents")
+ .desc("Number of times rename has considered the IQ 'full'")
+ .prereq(renameIQFullEvents);
+ renameFullRegistersEvents
+ .name(name() + ".renameFullRegisterEvents")
+ .desc("Number of times there has been no free registers")
+ .prereq(renameFullRegistersEvents);
+ renameRenamedOperands
+ .name(name() + ".renameRenamedOperands")
+ .desc("Number of destination operands rename has renamed")
+ .prereq(renameRenamedOperands);
+ renameRenameLookups
+ .name(name() + ".renameRenameLookups")
+ .desc("Number of register rename lookups that rename has made")
+ .prereq(renameRenameLookups);
+ renameHBPlaceHolders
+ .name(name() + ".renameHBPlaceHolders")
+ .desc("Number of place holders added to the history buffer")
+ .prereq(renameHBPlaceHolders);
+ renameCommittedMaps
+ .name(name() + ".renameCommittedMaps")
+ .desc("Number of HB maps that are committed")
+ .prereq(renameCommittedMaps);
+ renameUndoneMaps
+ .name(name() + ".renameUndoneMaps")
+ .desc("Number of HB maps that are undone due to squashing")
+ .prereq(renameUndoneMaps);
+ renameValidUndoneMaps
+ .name(name() + ".renameValidUndoneMaps")
+ .desc("Number of HB maps that are undone, and are not place holders")
+ .prereq(renameValidUndoneMaps);
+}
+
+template <class Impl>
+void
SimpleRename<Impl>::setCPU(FullCPU *cpu_ptr)
{
DPRINTF(Rename, "Rename: Setting CPU pointer.\n");
@@ -59,7 +125,6 @@ SimpleRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
// Setup wire to get information from decode.
fromDecode = decodeQueue->getWire(-decodeToRenameDelay);
-
}
template <class Impl>
@@ -124,7 +189,7 @@ SimpleRename<Impl>::unblock()
// continue to tell previous stages to stall. They will be
// able to restart once the skid buffer is empty.
if (!skidBuffer.empty()) {
- toDecode->renameInfo.stall = true;
+ toDecode->renameInfo.stall = true;
} else {
DPRINTF(Rename, "Rename: Done unblocking.\n");
_status = Running;
@@ -136,7 +201,6 @@ void
SimpleRename<Impl>::doSquash()
{
typename list<RenameHistory>::iterator hb_it = historyBuffer.begin();
-// typename list<RenameHistory>::iterator delete_it;
InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum;
@@ -154,6 +218,8 @@ SimpleRename<Impl>::doSquash()
// they did and freeing up the registers.
while ((*hb_it).instSeqNum > squashed_seq_num)
{
+ assert(hb_it != historyBuffer.end());
+
DPRINTF(Rename, "Rename: Removing history entry with sequence "
"number %i.\n", (*hb_it).instSeqNum);
@@ -165,15 +231,13 @@ SimpleRename<Impl>::doSquash()
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
- }
-// delete_it = hb_it;
-
-// hb_it++;
+ ++renameValidUndoneMaps;
+ }
historyBuffer.erase(hb_it++);
- assert(hb_it != historyBuffer.end());
+ ++renameUndoneMaps;
}
}
@@ -196,9 +260,6 @@ SimpleRename<Impl>::squash()
doSquash();
}
-// In the future, when a SmartPtr is used for DynInst, then this function
-// itself can handle returning the instruction's physical registers to
-// the free list.
template<class Impl>
void
SimpleRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num)
@@ -233,19 +294,20 @@ SimpleRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num)
if (!(*hb_it).placeHolder) {
freeList->addReg((*hb_it).prevPhysReg);
+ ++renameCommittedMaps;
}
historyBuffer.erase(hb_it--);
}
- // Finally free up the previous register of the squashed instruction
+ // Finally free up the previous register of the finished instruction
// itself.
if (!(*hb_it).placeHolder) {
freeList->addReg(hb_it->prevPhysReg);
+ ++renameCommittedMaps;
}
historyBuffer.erase(hb_it);
-
}
template <class Impl>
@@ -263,7 +325,7 @@ SimpleRename<Impl>::renameSrcRegs(DynInstPtr &inst)
// Look up the source registers to get the phys. register they've
// been renamed to, and set the sources to those registers.
- RegIndex renamed_reg = renameMap->lookup(src_reg);
+ PhysRegIndex renamed_reg = renameMap->lookup(src_reg);
DPRINTF(Rename, "Rename: Looking up arch reg %i, got "
"physical reg %i.\n", (int)src_reg, (int)renamed_reg);
@@ -278,6 +340,8 @@ SimpleRename<Impl>::renameSrcRegs(DynInstPtr &inst)
inst->markSrcRegReady(src_idx);
}
+
+ ++renameRenameLookups;
}
}
@@ -289,40 +353,6 @@ SimpleRename<Impl>::renameDestRegs(DynInstPtr &inst)
unsigned num_dest_regs = inst->numDestRegs();
- // Rename the destination registers.
- for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++)
- {
- RegIndex dest_reg = inst->destRegIdx(dest_idx);
-
- // Get the physical register that the destination will be
- // renamed to.
- rename_result = renameMap->rename(dest_reg);
-
- DPRINTF(Rename, "Rename: Renaming arch reg %i to physical "
- "reg %i.\n", (int)dest_reg,
- (int)rename_result.first);
-
- // Record the rename information so that a history can be kept.
- RenameHistory hb_entry(inst->seqNum, dest_reg,
- rename_result.first,
- rename_result.second);
-
- historyBuffer.push_front(hb_entry);
-
- DPRINTF(Rename, "Rename: Adding instruction to history buffer, "
- "sequence number %lli.\n",
- (*historyBuffer.begin()).instSeqNum);
-
- // Tell the instruction to rename the appropriate destination
- // register (dest_idx) to the new physical register
- // (rename_result.first), and record the previous physical
- // register that the same logical register was renamed to
- // (rename_result.second).
- inst->renameDestReg(dest_idx,
- rename_result.first,
- rename_result.second);
- }
-
// If it's an instruction with no destination registers, then put
// a placeholder within the history buffer. It might be better
// to not put it in the history buffer at all (other than branches,
@@ -337,6 +367,45 @@ SimpleRename<Impl>::renameDestRegs(DynInstPtr &inst)
DPRINTF(Rename, "Rename: Adding placeholder instruction to "
"history buffer, sequence number %lli.\n",
inst->seqNum);
+
+ ++renameHBPlaceHolders;
+ } else {
+
+ // Rename the destination registers.
+ for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++)
+ {
+ RegIndex dest_reg = inst->destRegIdx(dest_idx);
+
+ // Get the physical register that the destination will be
+ // renamed to.
+ rename_result = renameMap->rename(dest_reg);
+
+ DPRINTF(Rename, "Rename: Renaming arch reg %i to physical "
+ "reg %i.\n", (int)dest_reg,
+ (int)rename_result.first);
+
+ // Record the rename information so that a history can be kept.
+ RenameHistory hb_entry(inst->seqNum, dest_reg,
+ rename_result.first,
+ rename_result.second);
+
+ historyBuffer.push_front(hb_entry);
+
+ DPRINTF(Rename, "Rename: Adding instruction to history buffer, "
+ "sequence number %lli.\n",
+ (*historyBuffer.begin()).instSeqNum);
+
+ // Tell the instruction to rename the appropriate destination
+ // register (dest_idx) to the new physical register
+ // (rename_result.first), and record the previous physical
+ // register that the same logical register was renamed to
+ // (rename_result.second).
+ inst->renameDestReg(dest_idx,
+ rename_result.first,
+ rename_result.second);
+
+ ++renameRenamedOperands;
+ }
}
}
@@ -379,6 +448,8 @@ SimpleRename<Impl>::tick()
// buffer were used. Remove those instructions and handle
// the rest of unblocking.
if (_status == Unblocking) {
+ ++renameUnblockCycles;
+
if (fromDecode->size > 0) {
// Add the current inputs onto the skid buffer, so they can be
// reprocessed when this stage unblocks.
@@ -388,6 +459,8 @@ SimpleRename<Impl>::tick()
unblock();
}
} else if (_status == Blocked) {
+ ++renameBlockCycles;
+
// If stage is blocked and still receiving valid instructions,
// make sure to store them in the skid buffer.
if (fromDecode->size > 0) {
@@ -425,6 +498,8 @@ SimpleRename<Impl>::tick()
return;
}
} else if (_status == Squashing) {
+ ++renameSquashCycles;
+
if (fromCommit->commitInfo.squash) {
squash();
} else if (!fromCommit->commitInfo.squash &&
@@ -439,7 +514,13 @@ SimpleRename<Impl>::tick()
// Ugly code, revamp all of the tick() functions eventually.
if (fromCommit->commitInfo.doneSeqNum != 0 && _status != Squashing) {
+#ifndef FULL_SYSTEM
+ if (!fromCommit->commitInfo.squash) {
+ removeFromHistory(fromCommit->commitInfo.doneSeqNum);
+ }
+#else
removeFromHistory(fromCommit->commitInfo.doneSeqNum);
+#endif
}
// Perhaps put this outside of this function, since this will
@@ -539,6 +620,12 @@ SimpleRename<Impl>::rename()
// Tell previous stage to stall.
toDecode->renameInfo.stall = true;
+ if (free_rob_entries <= 0) {
+ ++renameROBFullEvents;
+ } else {
+ ++renameIQFullEvents;
+ }
+
return;
} else if (min_iq_rob < insts_available) {
DPRINTF(Rename, "Rename: Will have to block this cycle. Only "
@@ -548,6 +635,12 @@ SimpleRename<Impl>::rename()
insts_available = min_iq_rob;
block_this_cycle = true;
+
+ if (free_rob_entries < free_iq_entries) {
+ ++renameROBFullEvents;
+ } else {
+ ++renameIQFullEvents;
+ }
}
while (insts_available > 0) {
@@ -566,6 +659,8 @@ SimpleRename<Impl>::rename()
// Go to the next instruction.
++numInst;
+ ++renameSquashedInsts;
+
// Decrement how many instructions are available.
--insts_available;
@@ -606,6 +701,8 @@ SimpleRename<Impl>::rename()
block_this_cycle = true;
+ ++renameFullRegistersEvents;
+
break;
}
@@ -625,6 +722,8 @@ SimpleRename<Impl>::rename()
++to_iew_index;
++numInst;
+ ++renameRenamedInsts;
+
// Decrement how many instructions are available.
--insts_available;
}
diff --git a/cpu/beta_cpu/rename_map.cc b/cpu/beta_cpu/rename_map.cc
index cb9720d28..1301202f2 100644
--- a/cpu/beta_cpu/rename_map.cc
+++ b/cpu/beta_cpu/rename_map.cc
@@ -72,7 +72,7 @@ SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs,
floatRenameMap[index].physical_reg = float_reg_idx++;
}
- for (RegIndex index = numPhysicalIntRegs;
+ for (PhysRegIndex index = numPhysicalIntRegs;
index < numPhysicalIntRegs + numLogicalFloatRegs; ++index)
{
floatScoreboard[index] = 1;
@@ -88,7 +88,7 @@ SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs,
}
// Initialize the entries in the misc register scoreboard to be ready.
- for (RegIndex index = numPhysicalRegs;
+ for (PhysRegIndex index = numPhysicalRegs;
index < numPhysicalRegs + numMiscRegs; ++index)
{
miscScoreboard[index] = 1;
diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh
index 862008429..86c4e2db1 100644
--- a/cpu/beta_cpu/rob_impl.hh
+++ b/cpu/beta_cpu/rob_impl.hh
@@ -139,9 +139,7 @@ bool
ROB<Impl>::isHeadReady()
{
if (numInstsInROB != 0) {
- DynInstPtr head_inst = cpu->instList.front();
-
- return head_inst->readyToCommit();
+ return cpu->instList.front()->readyToCommit();
}
return false;
diff --git a/cpu/beta_cpu/store_set.cc b/cpu/beta_cpu/store_set.cc
index 46d763d37..a5458685d 100644
--- a/cpu/beta_cpu/store_set.cc
+++ b/cpu/beta_cpu/store_set.cc
@@ -5,6 +5,8 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
: SSIT_size(_SSIT_size), LFST_size(_LFST_size)
{
DPRINTF(StoreSet, "StoreSet: Creating store set object.\n");
+ DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
+ SSIT_size, LFST_size);
SSIT = new SSID[SSIT_size];
@@ -31,11 +33,13 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
}
void
-StoreSet::violation(Addr load_PC, Addr store_PC)
+StoreSet::violation(Addr store_PC, Addr load_PC)
{
int load_index = calcIndex(load_PC);
int store_index = calcIndex(store_PC);
+ assert(load_index < SSIT_size && store_index < SSIT_size);
+
bool valid_load_SSID = validSSIT[load_index];
bool valid_store_SSID = validSSIT[store_index];
@@ -51,7 +55,14 @@ StoreSet::violation(Addr load_PC, Addr store_PC)
SSIT[store_index] = new_set;
+ assert(new_set < LFST_size);
+
SSCounters[new_set]++;
+
+
+ DPRINTF(StoreSet, "StoreSet: Neither load nor store had a valid "
+ "storeset, creating a new one: %i for load %#x, store %#x\n",
+ new_set, load_PC, store_PC);
} else if (valid_load_SSID && !valid_store_SSID) {
SSID load_SSID = SSIT[load_index];
@@ -59,7 +70,13 @@ StoreSet::violation(Addr load_PC, Addr store_PC)
SSIT[store_index] = load_SSID;
+ assert(load_SSID < LFST_size);
+
SSCounters[load_SSID]++;
+
+ DPRINTF(StoreSet, "StoreSet: Load had a valid store set. Adding "
+ "store to that set: %i for load %#x, store %#x\n",
+ load_SSID, load_PC, store_PC);
} else if (!valid_load_SSID && valid_store_SSID) {
SSID store_SSID = SSIT[store_index];
@@ -69,10 +86,16 @@ StoreSet::violation(Addr load_PC, Addr store_PC)
// Because we are having a load point to an already existing set,
// the size of the store set is not incremented.
+
+ DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for "
+ "load %#x, store %#x\n",
+ store_SSID, load_PC, store_PC);
} else {
SSID load_SSID = SSIT[load_index];
SSID store_SSID = SSIT[store_index];
+ assert(load_SSID < LFST_size && store_SSID < LFST_size);
+
int load_SS_size = SSCounters[load_SSID];
int store_SS_size = SSCounters[store_SSID];
@@ -83,11 +106,19 @@ StoreSet::violation(Addr load_PC, Addr store_PC)
SSCounters[load_SSID]++;
SSCounters[store_SSID]--;
+
+ DPRINTF(StoreSet, "StoreSet: Load had bigger store set: %i; "
+ "for load %#x, store %#x\n",
+ load_SSID, load_PC, store_PC);
} else {
SSIT[load_index] = store_SSID;
SSCounters[store_SSID]++;
SSCounters[load_SSID]--;
+
+ DPRINTF(StoreSet, "StoreSet: Store had bigger store set: %i; "
+ "for load %#x, store %#x\n",
+ store_SSID, load_PC, store_PC);
}
}
}
@@ -106,6 +137,8 @@ StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num)
int store_SSID;
+ assert(index < SSIT_size);
+
if (!validSSIT[index]) {
// Do nothing if there's no valid entry.
return;
@@ -116,6 +149,11 @@ StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num)
// Update the last store that was fetched with the current one.
LFST[store_SSID] = store_seq_num;
+
+ validLFST[store_SSID] = 1;
+
+ DPRINTF(StoreSet, "Store %#x updated the LFST, SSID: %i\n",
+ store_PC, store_SSID);
}
}
@@ -126,7 +164,12 @@ StoreSet::checkInst(Addr PC)
int inst_SSID;
+ assert(index < SSIT_size);
+
if (!validSSIT[index]) {
+ DPRINTF(StoreSet, "Inst %#x with index %i had no SSID\n",
+ PC, index);
+
// Return 0 if there's no valid entry.
return 0;
} else {
@@ -135,8 +178,15 @@ StoreSet::checkInst(Addr PC)
assert(inst_SSID < LFST_size);
if (!validLFST[inst_SSID]) {
+
+ DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no "
+ "dependency\n", PC, index, inst_SSID);
+
return 0;
} else {
+ DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had LFST "
+ "inum of %i\n", PC, index, inst_SSID, LFST[inst_SSID]);
+
return LFST[inst_SSID];
}
}
@@ -154,14 +204,21 @@ StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store)
int store_SSID;
+ assert(index < SSIT_size);
+
// Make sure the SSIT still has a valid entry for the issued store.
- assert(validSSIT[index]);
+ if (!validSSIT[index]) {
+ return;
+ }
store_SSID = SSIT[index];
+ assert(store_SSID < LFST_size);
+
// If the last fetched store in the store set refers to the store that
// was just issued, then invalidate the entry.
if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) {
+ DPRINTF(StoreSet, "StoreSet: store invalidated itself in LFST.\n");
validLFST[store_SSID] = false;
}
}
@@ -170,9 +227,14 @@ void
StoreSet::squash(InstSeqNum squashed_num)
{
// Not really sure how to do this well.
+ // Generally this is small enough that it should be okay; short circuit
+ // evaluation should take care of invalid entries.
+
+ DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n",
+ squashed_num);
for (int i = 0; i < LFST_size; ++i) {
- if (LFST[i] < squashed_num) {
+ if (validLFST[i] && LFST[i] < squashed_num) {
validLFST[i] = false;
}
}
diff --git a/cpu/beta_cpu/store_set.hh b/cpu/beta_cpu/store_set.hh
index 701c60a2d..b634a180d 100644
--- a/cpu/beta_cpu/store_set.hh
+++ b/cpu/beta_cpu/store_set.hh
@@ -14,7 +14,7 @@ class StoreSet
public:
StoreSet(int SSIT_size, int LFST_size);
- void violation(Addr load_PC, Addr store_PC);
+ void violation(Addr store_PC, Addr load_PC);
void insertLoad(Addr load_PC, InstSeqNum load_seq_num);
diff --git a/cpu/beta_cpu/tournament_pred.cc b/cpu/beta_cpu/tournament_pred.cc
new file mode 100644
index 000000000..53a11326a
--- /dev/null
+++ b/cpu/beta_cpu/tournament_pred.cc
@@ -0,0 +1,243 @@
+#include "cpu/beta_cpu/tournament_pred.hh"
+
+TournamentBP::SatCounter::SatCounter(unsigned bits)
+ : maxVal((1 << bits) - 1), counter(0)
+{
+}
+
+TournamentBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val)
+ : maxVal((1 << bits) - 1), counter(initial_val)
+{
+ // Check to make sure initial value doesn't exceed the max counter value.
+ if (initial_val > maxVal) {
+ panic("BP: Initial counter value exceeds max size.");
+ }
+}
+
+void
+TournamentBP::SatCounter::increment()
+{
+ if (counter < maxVal) {
+ ++counter;
+ }
+}
+
+void
+TournamentBP::SatCounter::decrement()
+{
+ if (counter > 0) {
+ --counter;
+ }
+}
+
+TournamentBP::TournamentBP(unsigned _local_predictor_size,
+ unsigned _local_ctr_bits,
+ unsigned _local_history_table_size,
+ unsigned _local_history_bits,
+ unsigned _global_predictor_size,
+ unsigned _global_ctr_bits,
+ unsigned _global_history_bits,
+ unsigned _choice_predictor_size,
+ unsigned _choice_ctr_bits,
+ unsigned _instShiftAmt)
+ : local_predictor_size(_local_predictor_size),
+ local_ctr_bits(_local_ctr_bits),
+ local_history_table_size(_local_history_table_size),
+ local_history_bits(_local_history_bits),
+ global_predictor_size(_global_predictor_size),
+ global_ctr_bits(_global_ctr_bits),
+ global_history_bits(_global_history_bits),
+ choice_predictor_size(_global_predictor_size),
+ choice_ctr_bits(_choice_ctr_bits),
+ instShiftAmt(_instShiftAmt)
+{
+ //Should do checks here to make sure sizes are correct (powers of 2)
+
+ //Setup the array of counters for the local predictor
+ local_ctrs = new SatCounter[local_predictor_size](local_ctr_bits);
+ //Setup the history table for the local table
+ local_history_table = new unsigned[local_history_table_size](0);
+ // Setup the local history mask
+ localHistoryMask = (1 << local_history_bits) - 1;
+
+ //Setup the array of counters for the global predictor
+ global_ctrs = new SatCounter[global_predictor_size](global_ctr_bits);
+ //Clear the global history
+ global_history = 0;
+ // Setup the global history mask
+ globalHistoryMask = (1 << global_history_bits) - 1;
+
+ //Setup the array of counters for the choice predictor
+ choice_ctrs = new SatCounter[choice_predictor_size](choice_ctr_bits);
+
+ threshold = (1 << (local_ctr_bits - 1)) - 1;
+ threshold = threshold / 2;
+}
+
+inline
+unsigned
+TournamentBP::calcLocHistIdx(Addr &branch_addr)
+{
+ return (branch_addr >> instShiftAmt) & (local_history_table_size - 1);
+}
+
+inline
+void
+TournamentBP::updateHistoriesTaken(unsigned local_history_idx)
+{
+ global_history = (global_history << 1) | 1;
+ global_history = global_history & globalHistoryMask;
+
+ local_history_table[local_history_idx] =
+ (local_history_table[local_history_idx] << 1) | 1;
+}
+
+inline
+void
+TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx)
+{
+ global_history = (global_history << 1);
+ global_history = global_history & globalHistoryMask;
+
+ local_history_table[local_history_idx] =
+ (local_history_table[local_history_idx] << 1);
+}
+
+bool
+TournamentBP::lookup(Addr &branch_addr)
+{
+ uint8_t local_prediction;
+ unsigned local_history_idx;
+ unsigned local_predictor_idx;
+
+ uint8_t global_prediction;
+ uint8_t choice_prediction;
+
+ //Lookup in the local predictor to get its branch prediction
+ local_history_idx = calcLocHistIdx(branch_addr);
+ local_predictor_idx = local_history_table[local_history_idx]
+ & localHistoryMask;
+ local_prediction = local_ctrs[local_predictor_idx].read();
+
+ //Lookup in the global predictor to get its branch prediction
+ global_prediction = global_ctrs[global_history].read();
+
+ //Lookup in the choice predictor to see which one to use
+ choice_prediction = choice_ctrs[global_history].read();
+
+ //@todo Put a threshold value in for the three predictors that can
+ // be set through the constructor (so this isn't hard coded).
+ //Also should put some of this code into functions.
+ if (choice_prediction > threshold) {
+ if (global_prediction > threshold) {
+ updateHistoriesTaken(local_history_idx);
+
+ assert(global_history < global_predictor_size &&
+ local_history_idx < local_predictor_size);
+
+ global_ctrs[global_history].increment();
+ local_ctrs[local_history_idx].increment();
+
+ return true;
+ } else {
+ updateHistoriesNotTaken(local_history_idx);
+
+ assert(global_history < global_predictor_size &&
+ local_history_idx < local_predictor_size);
+
+ global_ctrs[global_history].decrement();
+ local_ctrs[local_history_idx].decrement();
+
+ return false;
+ }
+ } else {
+ if (local_prediction > threshold) {
+ updateHistoriesTaken(local_history_idx);
+
+ assert(global_history < global_predictor_size &&
+ local_history_idx < local_predictor_size);
+
+ global_ctrs[global_history].increment();
+ local_ctrs[local_history_idx].increment();
+
+ return true;
+ } else {
+ updateHistoriesNotTaken(local_history_idx);
+
+ assert(global_history < global_predictor_size &&
+ local_history_idx < local_predictor_size);
+
+ global_ctrs[global_history].decrement();
+ local_ctrs[local_history_idx].decrement();
+
+ return false;
+ }
+ }
+}
+
+// Update the branch predictor if it predicted a branch wrong.
+void
+TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken)
+{
+
+ uint8_t local_prediction;
+ unsigned local_history_idx;
+ unsigned local_predictor_idx;
+ bool local_pred_taken;
+
+ uint8_t global_prediction;
+ bool global_pred_taken;
+
+ // Load the correct global history into the register.
+ global_history = correct_gh;
+
+ // Get the local predictor's current prediction, remove the incorrect
+ // update, and update the local predictor
+ local_history_idx = calcLocHistIdx(branch_addr);
+ local_predictor_idx = local_history_table[local_history_idx];
+ local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask;
+
+ local_prediction = local_ctrs[local_predictor_idx].read();
+ local_pred_taken = local_prediction > threshold;
+
+ //Get the global predictor's current prediction, and update the
+ //global predictor
+ global_prediction = global_ctrs[global_history].read();
+ global_pred_taken = global_prediction > threshold;
+
+ //Update the choice predictor to tell it which one was correct
+ if (local_pred_taken != global_pred_taken) {
+ //If the local prediction matches the actual outcome, decerement
+ //the counter. Otherwise increment the counter.
+ if (local_pred_taken == taken) {
+ choice_ctrs[global_history].decrement();
+ } else {
+ choice_ctrs[global_history].increment();
+ }
+ }
+
+ if (taken) {
+ assert(global_history < global_predictor_size &&
+ local_predictor_idx < local_predictor_size);
+
+ local_ctrs[local_predictor_idx].increment();
+ global_ctrs[global_history].increment();
+
+ global_history = (global_history << 1) | 1;
+ global_history = global_history & globalHistoryMask;
+
+ local_history_table[local_history_idx] |= 1;
+ }
+ else {
+ assert(global_history < global_predictor_size &&
+ local_predictor_idx < local_predictor_size);
+
+ local_ctrs[local_predictor_idx].decrement();
+ global_ctrs[global_history].decrement();
+
+ global_history = (global_history << 1);
+ global_history = global_history & globalHistoryMask;
+
+ local_history_table[local_history_idx] &= ~1;
+ }
+}
diff --git a/cpu/beta_cpu/tournament_pred.hh b/cpu/beta_cpu/tournament_pred.hh
new file mode 100644
index 000000000..bf87d753b
--- /dev/null
+++ b/cpu/beta_cpu/tournament_pred.hh
@@ -0,0 +1,160 @@
+#ifndef __TOURNAMENT_PRED_HH__
+#define __TOURNAMENT_PRED_HH__
+
+// For Addr type.
+#include "arch/alpha/isa_traits.hh"
+
+class TournamentBP
+{
+ public:
+ /**
+ * Default branch predictor constructor.
+ */
+ TournamentBP(unsigned local_predictor_size,
+ unsigned local_ctr_bits,
+ unsigned local_history_table_size,
+ unsigned local_history_bits,
+ unsigned global_predictor_size,
+ unsigned global_history_bits,
+ unsigned global_ctr_bits,
+ unsigned choice_predictor_size,
+ unsigned choice_ctr_bits,
+ unsigned instShiftAmt);
+
+ /**
+ * Looks up the given address in the branch predictor and returns
+ * a true/false value as to whether it is taken.
+ * @param branch_addr The address of the branch to look up.
+ * @return Whether or not the branch is taken.
+ */
+ bool lookup(Addr &branch_addr);
+
+ /**
+ * Updates the branch predictor with the actual result of a branch.
+ * @param branch_addr The address of the branch to update.
+ * @param taken Whether or not the branch was taken.
+ */
+ void update(Addr &branch_addr, unsigned global_history, bool taken);
+
+ inline unsigned readGlobalHist() { return global_history; }
+
+ private:
+
+ inline bool getPrediction(uint8_t &count);
+
+ inline unsigned calcLocHistIdx(Addr &branch_addr);
+
+ inline void updateHistoriesTaken(unsigned local_history_idx);
+
+ inline void updateHistoriesNotTaken(unsigned local_history_idx);
+
+ /**
+ * Private counter class for the internal saturating counters.
+ * Implements an n bit saturating counter and provides methods to
+ * increment, decrement, and read it.
+ * @todo Consider making this something that more closely mimics a
+ * built in class so you can use ++ or --.
+ */
+ class SatCounter
+ {
+ public:
+ /**
+ * Constructor for the counter.
+ * @param bits How many bits the counter will have.
+ */
+ SatCounter(unsigned bits);
+
+ /**
+ * Constructor for the counter.
+ * @param bits How many bits the counter will have.
+ * @param initial_val Starting value for each counter.
+ */
+ SatCounter(unsigned bits, unsigned initial_val);
+
+ /**
+ * Increments the counter's current value.
+ */
+ void increment();
+
+ /**
+ * Decrements the counter's current value.
+ */
+ void decrement();
+
+ /**
+ * Read the counter's value.
+ */
+ uint8_t read()
+ {
+ return counter;
+ }
+
+ private:
+ uint8_t maxVal;
+ uint8_t counter;
+ };
+
+ /** Local counters. */
+ SatCounter *local_ctrs;
+
+ /** Size of the local predictor. */
+ unsigned local_predictor_size;
+
+ /** Number of bits of the local predictor's counters. */
+ unsigned local_ctr_bits;
+
+ /** Array of local history table entries. */
+ unsigned *local_history_table;
+
+ /** Size of the local history table. */
+ unsigned local_history_table_size;
+
+ /** Number of bits for each entry of the local history table.
+ * @todo Doesn't this come from the size of the local predictor?
+ */
+ unsigned local_history_bits;
+
+ /** Mask to get the proper local history. */
+ unsigned localHistoryMask;
+
+
+ /** Array of counters that make up the global predictor. */
+ SatCounter *global_ctrs;
+
+ /** Size of the global predictor. */
+ unsigned global_predictor_size;
+
+ /** Number of bits of the global predictor's counters. */
+ unsigned global_ctr_bits;
+
+ /** Global history register. */
+ unsigned global_history;
+
+ /** Number of bits for the global history. */
+ unsigned global_history_bits;
+
+ /** Mask to get the proper global history. */
+ unsigned globalHistoryMask;
+
+
+ /** Array of counters that make up the choice predictor. */
+ SatCounter *choice_ctrs;
+
+ /** Size of the choice predictor (identical to the global predictor). */
+ unsigned choice_predictor_size;
+
+ /** Number of bits of the choice predictor's counters. */
+ unsigned choice_ctr_bits;
+
+ /** Number of bits to shift the instruction over to get rid of the word
+ * offset.
+ */
+ unsigned instShiftAmt;
+
+ /** Threshold for the counter value; above the threshold is taken,
+ * equal to or below the threshold is not taken.
+ */
+ unsigned threshold;
+};
+
+#endif // __TOURNAMENT_PRED_HH__