summaryrefslogtreecommitdiff
path: root/src/cpu/ozone
diff options
context:
space:
mode:
authorGabe Black <gblack@eecs.umich.edu>2006-10-02 14:32:02 -0400
committerGabe Black <gblack@eecs.umich.edu>2006-10-02 14:32:02 -0400
commite8ced44aead3b1a11ac7747b8d38ce5dba6c09d1 (patch)
tree2453c596489fbe786e6da2e21e0b5791bcbeaf88 /src/cpu/ozone
parente62b734b23d03c50fcd31d833f417207e4520d29 (diff)
parentf2acc3a4a303e4cc054eef62621499ebcbf8599b (diff)
downloadgem5-e8ced44aead3b1a11ac7747b8d38ce5dba6c09d1.tar.xz
Merge zizzer.eecs.umich.edu:/bk/newmem
into zeep.eecs.umich.edu:/home/gblack/m5/newmem src/cpu/ozone/cpu_impl.hh: Hand merged --HG-- extra : convert_revision : f8a5b0205bcb78c8f5e109f456fe7bca80a7abac
Diffstat (limited to 'src/cpu/ozone')
-rw-r--r--src/cpu/ozone/checker_builder.cc8
-rw-r--r--src/cpu/ozone/cpu.hh37
-rw-r--r--src/cpu/ozone/cpu_builder.cc16
-rw-r--r--src/cpu/ozone/cpu_impl.hh120
-rw-r--r--src/cpu/ozone/front_end.hh7
-rw-r--r--src/cpu/ozone/front_end_impl.hh74
-rw-r--r--src/cpu/ozone/inorder_back_end_impl.hh2
-rw-r--r--src/cpu/ozone/inst_queue_impl.hh8
-rw-r--r--src/cpu/ozone/lw_back_end.hh94
-rw-r--r--src/cpu/ozone/lw_back_end_impl.hh317
-rw-r--r--src/cpu/ozone/lw_lsq.hh19
-rw-r--r--src/cpu/ozone/lw_lsq_impl.hh47
-rw-r--r--src/cpu/ozone/simple_params.hh4
-rw-r--r--src/cpu/ozone/thread_state.hh26
14 files changed, 529 insertions, 250 deletions
diff --git a/src/cpu/ozone/checker_builder.cc b/src/cpu/ozone/checker_builder.cc
index c372e51d6..b4c4686b7 100644
--- a/src/cpu/ozone/checker_builder.cc
+++ b/src/cpu/ozone/checker_builder.cc
@@ -65,6 +65,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
+ Param<Tick> progress_interval;
#if FULL_SYSTEM
SimObjectParam<AlphaITB *> itb;
@@ -79,6 +80,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
Param<bool> defer_registration;
Param<bool> exitOnError;
+ Param<bool> updateOnError;
Param<bool> warnOnlyOnLoadError;
Param<bool> function_trace;
Param<Tick> function_trace_start;
@@ -95,6 +97,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
"terminate when any thread reaches this load count"),
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
+ INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
#if FULL_SYSTEM
INIT_PARAM(itb, "Instruction TLB"),
@@ -110,6 +113,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
INIT_PARAM(exitOnError, "exit on error"),
+ INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
"result errors", false),
INIT_PARAM(function_trace, "Enable function trace"),
@@ -128,6 +132,7 @@ CREATE_SIM_OBJECT(OzoneChecker)
params->max_loads_any_thread = 0;
params->max_loads_all_threads = 0;
params->exitOnError = exitOnError;
+ params->updateOnError = updateOnError;
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
params->deferRegistration = defer_registration;
params->functionTrace = function_trace;
@@ -140,6 +145,9 @@ CREATE_SIM_OBJECT(OzoneChecker)
temp = max_insts_all_threads;
temp = max_loads_any_thread;
temp = max_loads_all_threads;
+ Tick temp2 = progress_interval;
+ temp2++;
+ params->progress_interval = 0;
#if FULL_SYSTEM
params->itb = itb;
diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh
index dd63d2e00..8c5be9424 100644
--- a/src/cpu/ozone/cpu.hh
+++ b/src/cpu/ozone/cpu.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005 The Regents of The University of Michigan
+ * Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,7 @@
#include <set>
+#include "arch/regfile.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "config/full_system.hh"
@@ -81,13 +82,13 @@ template <class>
class Checker;
/**
- * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with
- * simple out-of-order capabilities added to it. It is still a 1 CPI machine
- * (?), but is capable of handling cache misses. Basically it models having
- * a ROB/IQ by only allowing a certain amount of instructions to execute while
- * the cache miss is outstanding.
+ * Light weight out of order CPU model that approximates an out of
+ * order CPU. It is separated into a front end and a back end, with
+ * the template parameter Impl describing the classes used for each.
+ * The goal is to be able to specify through the Impl the class to use
+ * for the front end and back end, with different classes used to
+ * model different levels of detail.
*/
-
template <class Impl>
class OzoneCPU : public BaseCPU
{
@@ -273,6 +274,7 @@ class OzoneCPU : public BaseCPU
typedef OzoneThreadState<Impl> ImplState;
private:
+ // Committed thread state for the OzoneCPU.
OzoneThreadState<Impl> thread;
public:
@@ -310,12 +312,6 @@ class OzoneCPU : public BaseCPU
tickEvent.squash();
}
- private:
- Trace::InstRecord *traceData;
-
- template<typename T>
- void trace_data(T data);
-
public:
enum Status {
Running,
@@ -326,8 +322,6 @@ class OzoneCPU : public BaseCPU
Status _status;
public:
- bool checkInterrupts;
-
void post_interrupt(int int_num, int index);
void zero_fill_64(Addr addr) {
@@ -379,6 +373,7 @@ class OzoneCPU : public BaseCPU
FrontEnd *frontEnd;
BackEnd *backEnd;
+
private:
Status status() const { return _status; }
void setStatus(Status new_status) { _status = new_status; }
@@ -410,12 +405,11 @@ class OzoneCPU : public BaseCPU
// number of idle cycles
Stats::Average<> notIdleFraction;
Stats::Formula idleFraction;
- public:
+ public:
virtual void serialize(std::ostream &os);
virtual void unserialize(Checkpoint *cp, const std::string &section);
-
#if FULL_SYSTEM
/** Translates instruction requestion. */
Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
@@ -582,12 +576,9 @@ class OzoneCPU : public BaseCPU
Fault copy(Addr dest);
- InstSeqNum globalSeqNum;
-
public:
void squashFromTC();
- // @todo: This can be a useful debug function. Implement it.
void dumpInsts() { frontEnd->dumpInsts(); }
#if FULL_SYSTEM
@@ -605,7 +596,6 @@ class OzoneCPU : public BaseCPU
ThreadContext *tcBase() { return tc; }
- bool decoupledFrontEnd;
struct CommStruct {
InstSeqNum doneSeqNum;
InstSeqNum nonSpecSeqNum;
@@ -614,8 +604,13 @@ class OzoneCPU : public BaseCPU
bool stall;
};
+
+ InstSeqNum globalSeqNum;
+
TimeBuffer<CommStruct> comm;
+ bool decoupledFrontEnd;
+
bool lockFlag;
Stats::Scalar<> quiesceCycles;
diff --git a/src/cpu/ozone/cpu_builder.cc b/src/cpu/ozone/cpu_builder.cc
index e239b7a94..730158258 100644
--- a/src/cpu/ozone/cpu_builder.cc
+++ b/src/cpu/ozone/cpu_builder.cc
@@ -63,6 +63,7 @@ SimObjectParam<System *> system;
Param<int> cpu_id;
SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
+Param<Tick> profile;
#else
SimObjectVectorParam<Process *> workload;
//SimObjectParam<PageTable *> page_table;
@@ -76,16 +77,18 @@ Param<Counter> max_insts_any_thread;
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
+Param<Tick> progress_interval;
//SimObjectParam<BaseCache *> icache;
//SimObjectParam<BaseCache *> dcache;
Param<unsigned> cachePorts;
Param<unsigned> width;
+Param<unsigned> frontEndLatency;
Param<unsigned> frontEndWidth;
+Param<unsigned> backEndLatency;
Param<unsigned> backEndWidth;
Param<unsigned> backEndSquashLatency;
-Param<unsigned> backEndLatency;
Param<unsigned> maxInstBufferSize;
Param<unsigned> numPhysicalRegs;
Param<unsigned> maxOutstandingMemOps;
@@ -140,6 +143,7 @@ Param<unsigned> RASSize;
Param<unsigned> LQEntries;
Param<unsigned> SQEntries;
+Param<bool> lsqLimits;
Param<unsigned> LFSTSize;
Param<unsigned> SSITSize;
@@ -181,6 +185,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(itb, "Instruction translation buffer"),
INIT_PARAM(dtb, "Data translation buffer"),
+ INIT_PARAM(profile, ""),
#else
INIT_PARAM(workload, "Processes to run"),
// INIT_PARAM(page_table, "Page table"),
@@ -204,16 +209,18 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
"Terminate when all threads have reached this load"
"count",
0),
+ INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
// INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
// INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
INIT_PARAM_DFLT(width, "Width", 1),
+ INIT_PARAM_DFLT(frontEndLatency, "Front end latency", 1),
INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
+ INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
- INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4),
@@ -274,6 +281,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
INIT_PARAM(LQEntries, "Number of load queue entries"),
INIT_PARAM(SQEntries, "Number of store queue entries"),
+ INIT_PARAM_DFLT(lsqLimits, "LSQ size limits dispatch", true),
INIT_PARAM(LFSTSize, "Last fetched store table size"),
INIT_PARAM(SSITSize, "Store set ID table size"),
@@ -336,6 +344,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->cpu_id = cpu_id;
params->itb = itb;
params->dtb = dtb;
+ params->profile = profile;
#else
params->workload = workload;
// params->pTable = page_table;
@@ -347,6 +356,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
params->max_loads_all_threads = max_loads_all_threads;
+ params->progress_interval = progress_interval;
//
// Caches
@@ -357,6 +367,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->width = width;
params->frontEndWidth = frontEndWidth;
+ params->frontEndLatency = frontEndLatency;
params->backEndWidth = backEndWidth;
params->backEndSquashLatency = backEndSquashLatency;
params->backEndLatency = backEndLatency;
@@ -414,6 +425,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->LQEntries = LQEntries;
params->SQEntries = SQEntries;
+ params->lsqLimits = lsqLimits;
params->SSITSize = SSITSize;
params->LFSTSize = LFSTSize;
diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh
index 45e14b9de..bf547bf94 100644
--- a/src/cpu/ozone/cpu_impl.hh
+++ b/src/cpu/ozone/cpu_impl.hh
@@ -35,7 +35,7 @@
#include "arch/isa_traits.hh" // For MachInst
#include "base/trace.hh"
#include "cpu/base.hh"
-#include "cpu/checker/cpu.hh"
+#include "cpu/simple_thread.hh"
#include "cpu/thread_context.hh"
#include "cpu/exetrace.hh"
#include "cpu/ozone/cpu.hh"
@@ -51,7 +51,6 @@
#include "arch/alpha/types.hh"
#include "arch/vtophys.hh"
#include "base/callback.hh"
-//#include "base/remote_gdb.hh"
#include "cpu/profile.hh"
#include "kern/kernel_stats.hh"
#include "mem/physical.hh"
@@ -70,15 +69,6 @@
using namespace TheISA;
template <class Impl>
-template<typename T>
-void
-OzoneCPU<Impl>::trace_data(T data) {
- if (traceData) {
- traceData->setData(data);
- }
-}
-
-template <class Impl>
OzoneCPU<Impl>::TickEvent::TickEvent(OzoneCPU *c, int w)
: Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w)
{
@@ -128,6 +118,8 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
panic("Checker enabled but not compiled in!");
#endif
} else {
+ // If checker is not being used, then the xcProxy points
+ // directly to the CPU's ExecContext.
checker = NULL;
thread.tc = &ozoneTC;
tc = &ozoneTC;
@@ -140,7 +132,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
thread.setStatus(ThreadContext::Suspended);
#if FULL_SYSTEM
- /***** All thread state stuff *****/
+ // Setup thread state stuff.
thread.cpu = this;
thread.setTid(0);
@@ -189,12 +181,15 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
frontEnd->setBackEnd(backEnd);
backEnd->setFrontEnd(frontEnd);
- decoupledFrontEnd = p->decoupledFrontEnd;
-
globalSeqNum = 1;
+#if FULL_SYSTEM
checkInterrupts = false;
+#endif
+ lockFlag = 0;
+
+ // Setup rename table, initializing all values to ready.
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
thread.renameTable[i] = new DynInst(this);
thread.renameTable[i]->setResultReady();
@@ -235,8 +230,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
thread.setVirtPort(virt_port);
#endif
- lockFlag = 0;
-
DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n");
}
@@ -249,6 +242,7 @@ template <class Impl>
void
OzoneCPU<Impl>::switchOut()
{
+ BaseCPU::switchOut();
switchCount = 0;
// Front end needs state from back end, so switch out the back end first.
backEnd->switchOut();
@@ -259,6 +253,8 @@ template <class Impl>
void
OzoneCPU<Impl>::signalSwitched()
{
+ // Only complete the switchout when both the front end and back
+ // end have signalled they are ready to switch.
if (++switchCount == 2) {
backEnd->doSwitchOut();
frontEnd->doSwitchOut();
@@ -268,6 +264,17 @@ OzoneCPU<Impl>::signalSwitched()
#endif
_status = SwitchedOut;
+#ifndef NDEBUG
+ // Loop through all registers
+ for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) {
+ assert(thread.renameTable[i] == frontEnd->renameTable[i]);
+
+ assert(thread.renameTable[i] == backEnd->renameTable[i]);
+
+ DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i);
+ }
+#endif
+
if (tickEvent.scheduled())
tickEvent.squash();
}
@@ -280,13 +287,25 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
BaseCPU::takeOverFrom(oldCPU);
+ thread.trapPending = false;
+ thread.inSyscall = false;
+
backEnd->takeOverFrom();
frontEnd->takeOverFrom();
+ frontEnd->renameTable.copyFrom(thread.renameTable);
+ backEnd->renameTable.copyFrom(thread.renameTable);
assert(!tickEvent.scheduled());
+#ifndef NDEBUG
+ // Check rename table.
+ for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+ assert(thread.renameTable[i]->isResultReady());
+ }
+#endif
+
// @todo: Fix hardcoded number
// Clear out any old information in time buffer.
- for (int i = 0; i < 6; ++i) {
+ for (int i = 0; i < 15; ++i) {
comm.advance();
}
@@ -318,6 +337,10 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
notIdleFraction++;
scheduleTickEvent(delay);
_status = Running;
+#if FULL_SYSTEM
+ if (thread.quiesceEvent && thread.quiesceEvent->scheduled())
+ thread.quiesceEvent->deschedule();
+#endif
thread.setStatus(ThreadContext::Active);
frontEnd->wakeFromQuiesce();
}
@@ -395,7 +418,7 @@ template <class Impl>
void
OzoneCPU<Impl>::resetStats()
{
- startNumInst = numInst;
+// startNumInst = numInst;
notIdleFraction = (_status != Idle);
}
@@ -443,6 +466,15 @@ OzoneCPU<Impl>::serialize(std::ostream &os)
ozoneTC.serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // write out the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static SimpleThread temp;
+
+ nameOut(os, csprintf("%s.xc.0", name()));
+ temp.copyTC(thread.getTC());
+ temp.serialize(os);
}
template <class Impl>
@@ -453,6 +485,15 @@ OzoneCPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
UNSERIALIZE_ENUM(_status);
ozoneTC.unserialize(cp, csprintf("%s.tc", section));
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // read in the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static SimpleThread temp;
+
+ temp.copyTC(thread.getTC());
+ temp.unserialize(cp, csprintf("%s.xc.0", section));
+ thread.getTC()->copyArchRegs(temp.getTC());
}
template <class Impl>
@@ -707,11 +748,13 @@ OzoneCPU<Impl>::processInterrupts()
if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) {
thread.setMiscReg(IPR_ISR, summary);
thread.setMiscReg(IPR_INTID, ipl);
+#if USE_CHECKER
// @todo: Make this more transparent
if (checker) {
checker->threadBase()->setMiscReg(IPR_ISR, summary);
checker->threadBase()->setMiscReg(IPR_INTID, ipl);
}
+#endif
Fault fault = new InterruptFault;
fault->invoke(thread.getTC());
DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
@@ -812,7 +855,9 @@ OzoneCPU<Impl>::OzoneTC::halt()
template <class Impl>
void
OzoneCPU<Impl>::OzoneTC::dumpFuncProfile()
-{ }
+{
+ thread->dumpFuncProfile();
+}
#endif
template <class Impl>
@@ -831,6 +876,7 @@ OzoneCPU<Impl>::OzoneTC::takeOverFrom(ThreadContext *old_context)
copyArchRegs(old_context);
setCpuId(old_context->readCpuId());
+ thread->setInst(old_context->getInst());
#if !FULL_SYSTEM
setFuncExeInst(old_context->readFuncExeInst());
#else
@@ -844,6 +890,7 @@ OzoneCPU<Impl>::OzoneTC::takeOverFrom(ThreadContext *old_context)
thread->quiesceEvent->tc = this;
}
+ // Copy kernel stats pointer from old context.
thread->kernelStats = old_context->getKernelStats();
// storeCondFailures = 0;
cpu->lockFlag = false;
@@ -865,7 +912,11 @@ OzoneCPU<Impl>::OzoneTC::regStats(const std::string &name)
template <class Impl>
void
OzoneCPU<Impl>::OzoneTC::serialize(std::ostream &os)
-{ }
+{
+ // Once serialization is added, serialize the quiesce event and
+ // kernel stats. Will need to make sure there aren't multiple
+ // things that serialize them.
+}
template <class Impl>
void
@@ -898,16 +949,14 @@ template <class Impl>
void
OzoneCPU<Impl>::OzoneTC::profileClear()
{
- if (thread->profile)
- thread->profile->clear();
+ thread->profileClear();
}
template <class Impl>
void
OzoneCPU<Impl>::OzoneTC::profileSample()
{
- if (thread->profile)
- thread->profile->sample(thread->profileNode, thread->profilePC);
+ thread->profileSample();
}
#endif
@@ -918,7 +967,6 @@ OzoneCPU<Impl>::OzoneTC::getThreadNum()
return thread->readTid();
}
-// Also somewhat obnoxious. Really only used for the TLB fault.
template <class Impl>
TheISA::MachInst
OzoneCPU<Impl>::OzoneTC::getInst()
@@ -936,14 +984,20 @@ OzoneCPU<Impl>::OzoneTC::copyArchRegs(ThreadContext *tc)
cpu->frontEnd->setPC(thread->PC);
cpu->frontEnd->setNextPC(thread->nextPC);
- for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
- if (i < TheISA::FP_Base_DepTag) {
- thread->renameTable[i]->setIntResult(tc->readIntReg(i));
- } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
- int fp_idx = i - TheISA::FP_Base_DepTag;
- thread->renameTable[i]->setDoubleResult(
- tc->readFloatReg(fp_idx, 64));
- }
+ // First loop through the integer registers.
+ for (int i = 0; i < TheISA::NumIntRegs; ++i) {
+/* DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, "
+ "now has data %lli.\n",
+ i, thread->renameTable[i]->readIntResult(),
+ tc->readIntReg(i));
+*/
+ thread->renameTable[i]->setIntResult(tc->readIntReg(i));
+ }
+
+ // Then loop through the floating point registers.
+ for (int i = 0; i < TheISA::NumFloatRegs; ++i) {
+ int fp_idx = i + TheISA::FP_Base_DepTag;
+ thread->renameTable[fp_idx]->setIntResult(tc->readFloatRegBits(i));
}
#if !FULL_SYSTEM
diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh
index 3ed3c4d18..5ffd3666e 100644
--- a/src/cpu/ozone/front_end.hh
+++ b/src/cpu/ozone/front_end.hh
@@ -34,6 +34,7 @@
#include <deque>
#include "arch/utility.hh"
+#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/bpred_unit.hh"
#include "cpu/ozone/rename_table.hh"
@@ -246,15 +247,21 @@ class FrontEnd
void dumpInsts();
private:
+ TimeBuffer<int> numInstsReady;
+
typedef typename std::deque<DynInstPtr> InstBuff;
typedef typename InstBuff::iterator InstBuffIt;
+ InstBuff feBuffer;
+
InstBuff instBuffer;
int instBufferSize;
int maxInstBufferSize;
+ int latency;
+
int width;
int freeRegs;
diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh
index 8a15378a7..9a00aefbf 100644
--- a/src/cpu/ozone/front_end_impl.hh
+++ b/src/cpu/ozone/front_end_impl.hh
@@ -92,8 +92,10 @@ FrontEnd<Impl>::FrontEnd(Params *params)
: branchPred(params),
icachePort(this),
mem(params->mem),
+ numInstsReady(params->frontEndLatency, 0),
instBufferSize(0),
maxInstBufferSize(params->maxInstBufferSize),
+ latency(params->frontEndLatency),
width(params->frontEndWidth),
freeRegs(params->numPhysicalRegs),
numPhysRegs(params->numPhysicalRegs),
@@ -326,6 +328,18 @@ FrontEnd<Impl>::tick()
if (switchedOut)
return;
+ for (int insts_to_queue = numInstsReady[-latency];
+ !instBuffer.empty() && insts_to_queue;
+ --insts_to_queue)
+ {
+ DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
+ instBuffer.front()->seqNum);
+ feBuffer.push_back(instBuffer.front());
+ instBuffer.pop_front();
+ }
+
+ numInstsReady.advance();
+
// @todo: Maybe I want to just have direct communication...
if (fromCommit->doneSeqNum) {
branchPred.update(fromCommit->doneSeqNum, 0);
@@ -339,8 +353,8 @@ FrontEnd<Impl>::tick()
cacheBlkValid = true;
status = Running;
- if (barrierInst)
- status = SerializeBlocked;
+// if (barrierInst)
+// status = SerializeBlocked;
if (freeRegs <= 0)
status = RenameBlocked;
checkBE();
@@ -414,11 +428,12 @@ FrontEnd<Impl>::tick()
// latency
instBuffer.push_back(inst);
++instBufferSize;
+ numInstsReady[0]++;
++num_inst;
#if FULL_SYSTEM
if (inst->isQuiesce()) {
- warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
+// warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
status = QuiescePending;
break;
}
@@ -572,10 +587,10 @@ FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
// Change status over to SerializeBlocked so that other stages know
// what this is blocked on.
- status = SerializeBlocked;
+// status = SerializeBlocked;
- barrierInst = inst;
- return true;
+// barrierInst = inst;
+// return true;
} else if ((inst->isStoreConditional() || inst->isSerializeAfter())
&& !inst->isSerializeHandled()) {
DPRINTF(FE, "Serialize after instruction encountered.\n");
@@ -620,6 +635,7 @@ FrontEnd<Impl>::handleFault(Fault &fault)
instruction->fault = fault;
instruction->setCanIssue();
instBuffer.push_back(instruction);
+ numInstsReady[0]++;
++instBufferSize;
}
@@ -649,6 +665,21 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
freeRegs+= inst->numDestRegs();
}
+ while (!feBuffer.empty() &&
+ feBuffer.back()->seqNum > squash_num) {
+ DynInstPtr inst = feBuffer.back();
+
+ DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
+ inst->seqNum, inst->readPC());
+
+ inst->clearDependents();
+
+ feBuffer.pop_back();
+ --instBufferSize;
+
+ freeRegs+= inst->numDestRegs();
+ }
+
// Copy over rename table from the back end.
renameTable.copyFrom(backEnd->renameTable);
@@ -666,12 +697,12 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
DPRINTF(FE, "Squashing outstanding Icache access.\n");
memReq = NULL;
}
-
+/*
if (status == SerializeBlocked) {
assert(barrierInst->seqNum > squash_num);
barrierInst = NULL;
}
-
+*/
// Unless this squash originated from the front end, we're probably
// in running mode now.
// Actually might want to make this latency dependent.
@@ -683,13 +714,22 @@ template <class Impl>
typename Impl::DynInstPtr
FrontEnd<Impl>::getInst()
{
- if (instBufferSize == 0) {
+ if (feBuffer.empty()) {
return NULL;
}
- DynInstPtr inst = instBuffer.front();
+ DynInstPtr inst = feBuffer.front();
- instBuffer.pop_front();
+ if (inst->isSerializeBefore() || inst->isIprAccess()) {
+ DPRINTF(FE, "Back end is getting a serialize before inst\n");
+ if (!backEnd->robEmpty()) {
+ DPRINTF(FE, "Rob is not empty yet, not returning inst\n");
+ return NULL;
+ }
+ inst->clearSerializeBefore();
+ }
+
+ feBuffer.pop_front();
--instBufferSize;
@@ -784,11 +824,11 @@ FrontEnd<Impl>::updateStatus()
}
if (status == BEBlocked && !be_block) {
- if (barrierInst) {
- status = SerializeBlocked;
- } else {
+// if (barrierInst) {
+// status = SerializeBlocked;
+// } else {
status = Running;
- }
+// }
ret_val = true;
}
return ret_val;
@@ -810,6 +850,7 @@ template <class Impl>
typename Impl::DynInstPtr
FrontEnd<Impl>::getInstFromCacheline()
{
+/*
if (status == SerializeComplete) {
DynInstPtr inst = barrierInst;
status = Running;
@@ -817,7 +858,7 @@ FrontEnd<Impl>::getInstFromCacheline()
inst->clearSerializeBefore();
return inst;
}
-
+*/
InstSeqNum inst_seq;
MachInst inst;
// @todo: Fix this magic number used here to handle word offset (and
@@ -932,6 +973,7 @@ FrontEnd<Impl>::doSwitchOut()
squash(0, 0);
instBuffer.clear();
instBufferSize = 0;
+ feBuffer.clear();
status = Idle;
}
diff --git a/src/cpu/ozone/inorder_back_end_impl.hh b/src/cpu/ozone/inorder_back_end_impl.hh
index 701fc0ee9..16ebac163 100644
--- a/src/cpu/ozone/inorder_back_end_impl.hh
+++ b/src/cpu/ozone/inorder_back_end_impl.hh
@@ -284,7 +284,7 @@ InorderBackEnd<Impl>::executeInsts()
}
inst->setExecuted();
- inst->setCompleted();
+ inst->setResultReady();
inst->setCanCommit();
instList.pop_front();
diff --git a/src/cpu/ozone/inst_queue_impl.hh b/src/cpu/ozone/inst_queue_impl.hh
index f2d80e621..32a940241 100644
--- a/src/cpu/ozone/inst_queue_impl.hh
+++ b/src/cpu/ozone/inst_queue_impl.hh
@@ -850,13 +850,13 @@ template <class Impl>
void
InstQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
{
- OpClass op_class = ready_inst->opClass();
+// OpClass op_class = ready_inst->opClass();
readyInsts.push(ready_inst);
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
- ready_inst->readPC(), op_class, ready_inst->seqNum);
+ ready_inst->readPC(), ready_inst->opClass(), ready_inst->seqNum);
}
/*
template <class Impl>
@@ -1177,11 +1177,11 @@ InstQueue<Impl>::addIfReady(DynInstPtr &inst)
return;
}
- OpClass op_class = inst->opClass();
+// OpClass op_class = inst->opClass();
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
- inst->readPC(), op_class, inst->seqNum);
+ inst->readPC(), inst->opClass(), inst->seqNum);
readyInsts.push(inst);
}
diff --git a/src/cpu/ozone/lw_back_end.hh b/src/cpu/ozone/lw_back_end.hh
index d836ceebd..49c6a1ae2 100644
--- a/src/cpu/ozone/lw_back_end.hh
+++ b/src/cpu/ozone/lw_back_end.hh
@@ -80,7 +80,7 @@ class LWBackEnd
TimeBuffer<IssueToExec> i2e;
typename TimeBuffer<IssueToExec>::wire instsToExecute;
TimeBuffer<ExecToCommit> e2c;
- TimeBuffer<Writeback> numInstsToWB;
+ TimeBuffer<int> numInstsToWB;
TimeBuffer<CommStruct> *comm;
typename TimeBuffer<CommStruct>::wire toIEW;
@@ -139,7 +139,7 @@ class LWBackEnd
Tick lastCommitCycle;
- bool robEmpty() { return instList.empty(); }
+ bool robEmpty() { return numInsts == 0; }
bool isFull() { return numInsts >= numROBEntries; }
bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
@@ -194,6 +194,7 @@ class LWBackEnd
}
void instToCommit(DynInstPtr &inst);
+ void readyInstsForCommit();
void switchOut();
void doSwitchOut();
@@ -255,12 +256,13 @@ class LWBackEnd
RenameTable<Impl> renameTable;
private:
+ int latency;
+
// General back end width. Used if the more specific isn't given.
int width;
// Dispatch width.
int dispatchWidth;
- int numDispatchEntries;
int dispatchSize;
int waitingInsts;
@@ -285,6 +287,7 @@ class LWBackEnd
int numROBEntries;
int numInsts;
+ bool lsqLimits;
std::set<InstSeqNum> waitingMemOps;
typedef std::set<InstSeqNum>::iterator MemIt;
@@ -295,9 +298,6 @@ class LWBackEnd
InstSeqNum squashSeqNum;
Addr squashNextPC;
- Fault faultFromFetch;
- bool fetchHasFault;
-
bool switchedOut;
bool switchPending;
@@ -321,8 +321,6 @@ class LWBackEnd
std::list<DynInstPtr> replayList;
std::list<DynInstPtr> writeback;
- int latency;
-
int squashLatency;
bool exactFullStall;
@@ -331,37 +329,39 @@ class LWBackEnd
/* Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
*/
- Stats::Vector<> rob_cap_events;
- Stats::Vector<> rob_cap_inst_count;
- Stats::Vector<> iq_cap_events;
- Stats::Vector<> iq_cap_inst_count;
+ Stats::Vector<> robCapEvents;
+ Stats::Vector<> robCapInstCount;
+ Stats::Vector<> iqCapEvents;
+ Stats::Vector<> iqCapInstCount;
// total number of instructions executed
- Stats::Vector<> exe_inst;
- Stats::Vector<> exe_swp;
- Stats::Vector<> exe_nop;
- Stats::Vector<> exe_refs;
- Stats::Vector<> exe_loads;
- Stats::Vector<> exe_branches;
+ Stats::Vector<> exeInst;
+ Stats::Vector<> exeSwp;
+ Stats::Vector<> exeNop;
+ Stats::Vector<> exeRefs;
+ Stats::Vector<> exeLoads;
+ Stats::Vector<> exeBranches;
- Stats::Vector<> issued_ops;
+ Stats::Vector<> issuedOps;
// total number of loads forwaded from LSQ stores
- Stats::Vector<> lsq_forw_loads;
+ Stats::Vector<> lsqForwLoads;
// total number of loads ignored due to invalid addresses
- Stats::Vector<> inv_addr_loads;
+ Stats::Vector<> invAddrLoads;
// total number of software prefetches ignored due to invalid addresses
- Stats::Vector<> inv_addr_swpfs;
+ Stats::Vector<> invAddrSwpfs;
// ready loads blocked due to memory disambiguation
- Stats::Vector<> lsq_blocked_loads;
+ Stats::Vector<> lsqBlockedLoads;
Stats::Scalar<> lsqInversion;
- Stats::Vector<> n_issued_dist;
- Stats::VectorDistribution<> issue_delay_dist;
+ Stats::Vector<> nIssuedDist;
+/*
+ Stats::VectorDistribution<> issueDelayDist;
- Stats::VectorDistribution<> queue_res_dist;
+ Stats::VectorDistribution<> queueResDist;
+*/
/*
Stats::Vector<> stat_fu_busy;
Stats::Vector2d<> stat_fuBusy;
@@ -379,37 +379,37 @@ class LWBackEnd
Stats::Formula commit_ipb;
Stats::Formula lsq_inv_rate;
*/
- Stats::Vector<> writeback_count;
- Stats::Vector<> producer_inst;
- Stats::Vector<> consumer_inst;
- Stats::Vector<> wb_penalized;
+ Stats::Vector<> writebackCount;
+ Stats::Vector<> producerInst;
+ Stats::Vector<> consumerInst;
+ Stats::Vector<> wbPenalized;
- Stats::Formula wb_rate;
- Stats::Formula wb_fanout;
- Stats::Formula wb_penalized_rate;
+ Stats::Formula wbRate;
+ Stats::Formula wbFanout;
+ Stats::Formula wbPenalizedRate;
// total number of instructions committed
- Stats::Vector<> stat_com_inst;
- Stats::Vector<> stat_com_swp;
- Stats::Vector<> stat_com_refs;
- Stats::Vector<> stat_com_loads;
- Stats::Vector<> stat_com_membars;
- Stats::Vector<> stat_com_branches;
+ Stats::Vector<> statComInst;
+ Stats::Vector<> statComSwp;
+ Stats::Vector<> statComRefs;
+ Stats::Vector<> statComLoads;
+ Stats::Vector<> statComMembars;
+ Stats::Vector<> statComBranches;
- Stats::Distribution<> n_committed_dist;
+ Stats::Distribution<> nCommittedDist;
- Stats::Scalar<> commit_eligible_samples;
- Stats::Vector<> commit_eligible;
+ Stats::Scalar<> commitEligibleSamples;
+ Stats::Vector<> commitEligible;
Stats::Vector<> squashedInsts;
Stats::Vector<> ROBSquashedInsts;
- Stats::Scalar<> ROB_fcount;
- Stats::Formula ROB_full_rate;
+ Stats::Scalar<> ROBFcount;
+ Stats::Formula ROBFullRate;
- Stats::Vector<> ROB_count; // cumulative ROB occupancy
- Stats::Formula ROB_occ_rate;
- Stats::VectorDistribution<> ROB_occ_dist;
+ Stats::Vector<> ROBCount; // cumulative ROB occupancy
+ Stats::Formula ROBOccRate;
+// Stats::VectorDistribution<> ROBOccDist;
public:
void dumpInsts();
diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh
index a4f1d805e..c39b9e08b 100644
--- a/src/cpu/ozone/lw_back_end_impl.hh
+++ b/src/cpu/ozone/lw_back_end_impl.hh
@@ -141,13 +141,14 @@ LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
template <class Impl>
LWBackEnd<Impl>::LWBackEnd(Params *params)
- : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+ : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
trapSquash(false), tcSquash(false),
- width(params->backEndWidth), exactFullStall(true)
+ latency(params->backEndLatency),
+ width(params->backEndWidth), lsqLimits(params->lsqLimits),
+ exactFullStall(true)
{
numROBEntries = params->numROBEntries;
numInsts = 0;
- numDispatchEntries = 32;
maxOutstandingMemOps = params->maxOutstandingMemOps;
numWaitingMemOps = 0;
waitingInsts = 0;
@@ -184,78 +185,79 @@ void
LWBackEnd<Impl>::regStats()
{
using namespace Stats;
- rob_cap_events
+ LSQ.regStats();
+
+ robCapEvents
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_events")
.desc("number of cycles where ROB cap was active")
.flags(total)
;
- rob_cap_inst_count
+ robCapInstCount
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_inst")
.desc("number of instructions held up by ROB cap")
.flags(total)
;
- iq_cap_events
+ iqCapEvents
.init(cpu->number_of_threads)
.name(name() +".IQ:cap_events" )
.desc("number of cycles where IQ cap was active")
.flags(total)
;
- iq_cap_inst_count
+ iqCapInstCount
.init(cpu->number_of_threads)
.name(name() + ".IQ:cap_inst")
.desc("number of instructions held up by IQ cap")
.flags(total)
;
-
- exe_inst
+ exeInst
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:count")
.desc("number of insts issued")
.flags(total)
;
- exe_swp
+ exeSwp
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:swp")
.desc("number of swp insts issued")
.flags(total)
;
- exe_nop
+ exeNop
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:nop")
.desc("number of nop insts issued")
.flags(total)
;
- exe_refs
+ exeRefs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:refs")
.desc("number of memory reference insts issued")
.flags(total)
;
- exe_loads
+ exeLoads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:loads")
.desc("number of load insts issued")
.flags(total)
;
- exe_branches
+ exeBranches
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:branches")
.desc("Number of branches issued")
.flags(total)
;
- issued_ops
+ issuedOps
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:op_count")
.desc("number of insts issued")
@@ -272,28 +274,28 @@ LWBackEnd<Impl>::regStats()
//
// Other stats
//
- lsq_forw_loads
+ lsqForwLoads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:forw_loads")
.desc("number of loads forwarded via LSQ")
.flags(total)
;
- inv_addr_loads
+ invAddrLoads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_loads")
.desc("number of invalid-address loads")
.flags(total)
;
- inv_addr_swpfs
+ invAddrSwpfs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_swpfs")
.desc("number of invalid-address SW prefetches")
.flags(total)
;
- lsq_blocked_loads
+ lsqBlockedLoads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:blocked_loads")
.desc("number of ready loads not issued due to memory disambiguation")
@@ -305,51 +307,52 @@ LWBackEnd<Impl>::regStats()
.desc("Number of times LSQ instruction issued early")
;
- n_issued_dist
+ nIssuedDist
.init(issueWidth + 1)
.name(name() + ".ISSUE:issued_per_cycle")
.desc("Number of insts issued each cycle")
.flags(total | pdf | dist)
;
- issue_delay_dist
+/*
+ issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
.desc("cycles from operands ready to issue")
.flags(pdf | cdf)
;
- queue_res_dist
+ queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
.desc("cycles from dispatch to issue")
.flags(total | pdf | cdf )
;
for (int i = 0; i < Num_OpClasses; ++i) {
- queue_res_dist.subname(i, opClassStrings[i]);
+ queueResDist.subname(i, opClassStrings[i]);
}
-
- writeback_count
+*/
+ writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
.flags(total)
;
- producer_inst
+ producerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
.flags(total)
;
- consumer_inst
+ consumerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
.flags(total)
;
- wb_penalized
+ wbPenalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
@@ -357,71 +360,71 @@ LWBackEnd<Impl>::regStats()
;
- wb_penalized_rate
+ wbPenalizedRate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
.flags(total)
;
- wb_penalized_rate = wb_penalized / writeback_count;
+ wbPenalizedRate = wbPenalized / writebackCount;
- wb_fanout
+ wbFanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
.flags(total)
;
- wb_fanout = producer_inst / consumer_inst;
+ wbFanout = producerInst / consumerInst;
- wb_rate
+ wbRate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
.flags(total)
;
- wb_rate = writeback_count / cpu->numCycles;
+ wbRate = writebackCount / cpu->numCycles;
- stat_com_inst
+ statComInst
.init(cpu->number_of_threads)
.name(name() + ".COM:count")
.desc("Number of instructions committed")
.flags(total)
;
- stat_com_swp
+ statComSwp
.init(cpu->number_of_threads)
.name(name() + ".COM:swp_count")
.desc("Number of s/w prefetches committed")
.flags(total)
;
- stat_com_refs
+ statComRefs
.init(cpu->number_of_threads)
.name(name() + ".COM:refs")
.desc("Number of memory references committed")
.flags(total)
;
- stat_com_loads
+ statComLoads
.init(cpu->number_of_threads)
.name(name() + ".COM:loads")
.desc("Number of loads committed")
.flags(total)
;
- stat_com_membars
+ statComMembars
.init(cpu->number_of_threads)
.name(name() + ".COM:membars")
.desc("Number of memory barriers committed")
.flags(total)
;
- stat_com_branches
+ statComBranches
.init(cpu->number_of_threads)
.name(name() + ".COM:branches")
.desc("Number of branches committed")
.flags(total)
;
- n_committed_dist
+ nCommittedDist
.init(0,commitWidth,1)
.name(name() + ".COM:committed_per_cycle")
.desc("Number of insts commited each cycle")
@@ -441,14 +444,14 @@ LWBackEnd<Impl>::regStats()
// -> The standard deviation is computed only over cycles where
// we reached the BW limit
//
- commit_eligible
+ commitEligible
.init(cpu->number_of_threads)
.name(name() + ".COM:bw_limited")
.desc("number of insts not committed due to BW limits")
.flags(total)
;
- commit_eligible_samples
+ commitEligibleSamples
.name(name() + ".COM:bw_lim_events")
.desc("number cycles where commit BW limit reached")
;
@@ -465,37 +468,38 @@ LWBackEnd<Impl>::regStats()
.desc("Number of instructions removed from inst list when they reached the head of the ROB")
;
- ROB_fcount
+ ROBFcount
.name(name() + ".ROB:full_count")
.desc("number of cycles where ROB was full")
;
- ROB_count
+ ROBCount
.init(cpu->number_of_threads)
.name(name() + ".ROB:occupancy")
.desc(name() + ".ROB occupancy (cumulative)")
.flags(total)
;
- ROB_full_rate
+ ROBFullRate
.name(name() + ".ROB:full_rate")
.desc("ROB full per cycle")
;
- ROB_full_rate = ROB_fcount / cpu->numCycles;
+ ROBFullRate = ROBFcount / cpu->numCycles;
- ROB_occ_rate
+ ROBOccRate
.name(name() + ".ROB:occ_rate")
.desc("ROB occupancy rate")
.flags(total)
;
- ROB_occ_rate = ROB_count / cpu->numCycles;
-
- ROB_occ_dist
+ ROBOccRate = ROBCount / cpu->numCycles;
+/*
+ ROBOccDist
.init(cpu->number_of_threads,0,numROBEntries,2)
.name(name() + ".ROB:occ_dist")
.desc("ROB Occupancy per cycle")
.flags(total | cdf)
;
+*/
}
template <class Impl>
@@ -588,17 +592,21 @@ LWBackEnd<Impl>::tick()
{
DPRINTF(BE, "Ticking back end\n");
+ // Read in any done instruction information and update the IQ or LSQ.
+ updateStructures();
+
if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
cpu->signalSwitched();
return;
}
- ROB_count[0]+= numInsts;
+ readyInstsForCommit();
- wbCycle = 0;
+ numInstsToWB.advance();
- // Read in any done instruction information and update the IQ or LSQ.
- updateStructures();
+ ROBCount[0]+= numInsts;
+
+ wbCycle = 0;
#if FULL_SYSTEM
checkInterrupts();
@@ -674,6 +682,10 @@ LWBackEnd<Impl>::dispatchInsts()
while (numInsts < numROBEntries &&
numWaitingMemOps < maxOutstandingMemOps) {
// Get instruction from front of time buffer
+ if (lsqLimits && LSQ.isFull()) {
+ break;
+ }
+
DynInstPtr inst = frontEnd->getInst();
if (!inst) {
break;
@@ -732,6 +744,7 @@ LWBackEnd<Impl>::dispatchInsts()
inst->setIssued();
inst->setExecuted();
inst->setCanCommit();
+ numInstsToWB[0]++;
} else {
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
"exeList.\n",
@@ -866,8 +879,17 @@ LWBackEnd<Impl>::executeInsts()
if (inst->isLoad()) {
LSQ.executeLoad(inst);
} else if (inst->isStore()) {
- LSQ.executeStore(inst);
- if (inst->req && !(inst->req->getFlags() & LOCKED)) {
+ Fault fault = LSQ.executeStore(inst);
+
+ if (!inst->isStoreConditional() && fault == NoFault) {
+ inst->setExecuted();
+
+ instToCommit(inst);
+ } else if (fault != NoFault) {
+ // If the instruction faulted, then we need to send it along to commit
+ // without the instruction completing.
+ // Send this instruction to commit, also make sure iew stage
+ // realizes there is activity.
inst->setExecuted();
instToCommit(inst);
@@ -908,36 +930,54 @@ LWBackEnd<Impl>::executeInsts()
}
}
- issued_ops[0]+= num_executed;
- n_issued_dist[num_executed]++;
+ issuedOps[0]+= num_executed;
+ nIssuedDist[num_executed]++;
}
template<class Impl>
void
LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
{
-
DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
if (!inst->isSquashed()) {
- DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- inst->setCanCommit();
-
if (inst->isExecuted()) {
inst->setResultReady();
int dependents = wakeDependents(inst);
if (dependents) {
- producer_inst[0]++;
- consumer_inst[0]+= dependents;
+ producerInst[0]++;
+ consumerInst[0]+= dependents;
}
}
}
- writeback_count[0]++;
+ writeback.push_back(inst);
+
+ numInstsToWB[0]++;
+
+ writebackCount[0]++;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::readyInstsForCommit()
+{
+ for (int i = numInstsToWB[-latency];
+ !writeback.empty() && i;
+ --i)
+ {
+ DynInstPtr inst = writeback.front();
+ writeback.pop_front();
+ if (!inst->isSquashed()) {
+ DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ inst->setCanCommit();
+ }
+ }
}
+
#if 0
template <class Impl>
void
@@ -1010,7 +1050,7 @@ LWBackEnd<Impl>::commitInst(int inst_num)
// or store inst. Signal backwards that it should be executed.
if (!inst->isExecuted()) {
if (inst->isNonSpeculative() ||
- inst->isStoreConditional() ||
+ (inst->isStoreConditional() && inst->getFault() == NoFault) ||
inst->isMemBarrier() ||
inst->isWriteBarrier()) {
#if !FULL_SYSTEM
@@ -1151,6 +1191,20 @@ LWBackEnd<Impl>::commitInst(int inst_num)
++freed_regs;
}
+#if FULL_SYSTEM
+ if (thread->profile) {
+// bool usermode =
+// (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+// thread->profilePC = usermode ? 1 : inst->readPC();
+ thread->profilePC = inst->readPC();
+ ProfileNode *node = thread->profile->consume(thread->getTC(),
+ inst->staticInst);
+
+ if (node)
+ thread->profileNode = node;
+ }
+#endif
+
if (inst->traceData) {
inst->traceData->setFetchSeq(inst->seqNum);
inst->traceData->setCPSeq(thread->numInst);
@@ -1158,6 +1212,9 @@ LWBackEnd<Impl>::commitInst(int inst_num)
inst->traceData = NULL;
}
+ if (inst->isCopy())
+ panic("Should not commit any copy instructions!");
+
inst->clearDependents();
frontEnd->addFreeRegs(freed_regs);
@@ -1207,9 +1264,9 @@ LWBackEnd<Impl>::commitInsts()
while (!instList.empty() && inst_num < commitWidth) {
if (instList.back()->isSquashed()) {
instList.back()->clearDependents();
+ ROBSquashedInsts[instList.back()->threadNumber]++;
instList.pop_back();
--numInsts;
- ROBSquashedInsts[instList.back()->threadNumber]++;
continue;
}
@@ -1221,7 +1278,7 @@ LWBackEnd<Impl>::commitInsts()
break;
}
}
- n_committed_dist.sample(inst_num);
+ nCommittedDist.sample(inst_num);
}
template <class Impl>
@@ -1231,10 +1288,10 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
LSQ.squash(sn);
int freed_regs = 0;
- InstListIt waiting_list_end = waitingList.end();
+ InstListIt insts_end_it = waitingList.end();
InstListIt insts_it = waitingList.begin();
- while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
+ while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
++insts_it;
@@ -1260,6 +1317,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
while (!instList.empty() && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
+ panic("Instruction should not be already squashed and on list!");
++insts_it;
continue;
}
@@ -1291,18 +1349,6 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
--numInsts;
}
- insts_it = waitingList.begin();
- while (!waitingList.empty() && insts_it != waitingList.end()) {
- if ((*insts_it)->seqNum < sn) {
- ++insts_it;
- continue;
- }
- assert((*insts_it)->isSquashed());
-
- waitingList.erase(insts_it++);
- waitingInsts--;
- }
-
while (memBarrier && memBarrier->seqNum > sn) {
DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
"squashed)\n", memBarrier->seqNum);
@@ -1320,6 +1366,18 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
}
}
+ insts_it = replayList.begin();
+ insts_end_it = replayList.end();
+ while (!replayList.empty() && insts_it != insts_end_it) {
+ if ((*insts_it)->seqNum < sn) {
+ ++insts_it;
+ continue;
+ }
+ assert((*insts_it)->isSquashed());
+
+ replayList.erase(insts_it++);
+ }
+
frontEnd->addFreeRegs(freed_regs);
}
@@ -1392,14 +1450,6 @@ LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
template <class Impl>
void
-LWBackEnd<Impl>::fetchFault(Fault &fault)
-{
- faultFromFetch = fault;
- fetchHasFault = true;
-}
-
-template <class Impl>
-void
LWBackEnd<Impl>::switchOut()
{
switchPending = true;
@@ -1416,17 +1466,25 @@ LWBackEnd<Impl>::doSwitchOut()
// yet written back.
assert(robEmpty());
assert(!LSQ.hasStoresToWB());
-
+ writeback.clear();
+ for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
+ numInstsToWB.advance();
+
+// squash(0);
+ assert(waitingList.empty());
+ assert(instList.empty());
+ assert(replayList.empty());
+ assert(writeback.empty());
LSQ.switchOut();
-
- squash(0);
}
template <class Impl>
void
LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
{
- switchedOut = false;
+ assert(!squashPending);
+ squashSeqNum = 0;
+ squashNextPC = 0;
tcSquash = false;
trapSquash = false;
@@ -1451,27 +1509,27 @@ LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
- exe_swp[thread_number]++;
+ exeSwp[thread_number]++;
else
- exe_inst[thread_number]++;
+ exeInst[thread_number]++;
#else
- exe_inst[thread_number]++;
+ exeInst[thread_number]++;
#endif
//
// Control operations
//
if (inst->isControl())
- exe_branches[thread_number]++;
+ exeBranches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
- exe_refs[thread_number]++;
+ exeRefs[thread_number]++;
if (inst->isLoad())
- exe_loads[thread_number]++;
+ exeLoads[thread_number]++;
}
}
@@ -1491,33 +1549,33 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch()) {
- stat_com_swp[tid]++;
+ statComSwp[tid]++;
} else {
- stat_com_inst[tid]++;
+ statComInst[tid]++;
}
#else
- stat_com_inst[tid]++;
+ statComInst[tid]++;
#endif
//
// Control Instructions
//
if (inst->isControl())
- stat_com_branches[tid]++;
+ statComBranches[tid]++;
//
// Memory references
//
if (inst->isMemRef()) {
- stat_com_refs[tid]++;
+ statComRefs[tid]++;
if (inst->isLoad()) {
- stat_com_loads[tid]++;
+ statComLoads[tid]++;
}
}
if (inst->isMemBarrier()) {
- stat_com_membars[tid]++;
+ statComMembars[tid]++;
}
}
@@ -1569,6 +1627,45 @@ LWBackEnd<Impl>::dumpInsts()
++num;
}
+ inst_list_it = --(writeback.end());
+
+ cprintf("Writeback list size: %i\n", writeback.size());
+
+ while (inst_list_it != writeback.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+
cprintf("Waiting list size: %i\n", waitingList.size());
inst_list_it = --(waitingList.end());
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index 9a21a9d01..6640a9f34 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -84,6 +84,8 @@ class OzoneLWLSQ {
/** Returns the name of the LSQ unit. */
std::string name() const;
+ void regStats();
+
/** Sets the CPU pointer. */
void setCPU(OzoneCPU *cpu_ptr);
@@ -179,7 +181,7 @@ class OzoneLWLSQ {
int numLoads() { return loads; }
/** Returns the number of stores in the SQ. */
- int numStores() { return stores; }
+ int numStores() { return stores + storesInFlight; }
/** Returns if either the LQ or SQ is full. */
bool isFull() { return lqFull() || sqFull(); }
@@ -188,7 +190,7 @@ class OzoneLWLSQ {
bool lqFull() { return loads >= (LQEntries - 1); }
/** Returns if the SQ is full. */
- bool sqFull() { return stores >= (SQEntries - 1); }
+ bool sqFull() { return (stores + storesInFlight) >= (SQEntries - 1); }
/** Debugging function to dump instructions in the LSQ. */
void dumpInsts();
@@ -223,7 +225,9 @@ class OzoneLWLSQ {
void storePostSend(Packet *pkt, DynInstPtr &inst);
/** Completes the store at the specified index. */
- void completeStore(int store_idx);
+ void completeStore(DynInstPtr &inst);
+
+ void removeStore(int store_idx);
/** Handles doing the retry. */
void recvRetry();
@@ -394,6 +398,10 @@ class OzoneLWLSQ {
int storesToWB;
+ public:
+ int storesInFlight;
+
+ private:
/// @todo Consider moving to a more advanced model with write vs read ports
/** The number of cache ports available each cycle. */
int cachePorts;
@@ -403,6 +411,9 @@ class OzoneLWLSQ {
//list<InstSeqNum> mshrSeqNums;
+ /** Tota number of memory ordering violations. */
+ Stats::Scalar<> lsqMemOrderViolation;
+
//Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
@@ -525,7 +536,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
store_size = (*sq_it).size;
- if (store_size == 0) {
+ if (store_size == 0 || (*sq_it).committed) {
sq_it++;
continue;
}
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index 7eef4b11f..4c96ad149 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -121,7 +121,7 @@ OzoneLWLSQ<Impl>::completeDataAccess(PacketPtr pkt)
}
if (inst->isStore()) {
- completeStore(state->idx);
+ completeStore(inst);
}
}
@@ -132,7 +132,7 @@ OzoneLWLSQ<Impl>::completeDataAccess(PacketPtr pkt)
template <class Impl>
OzoneLWLSQ<Impl>::OzoneLWLSQ()
: switchedOut(false), dcachePort(this), loads(0), stores(0),
- storesToWB(0), stalled(false), isStoreBlocked(false),
+ storesToWB(0), storesInFlight(0), stalled(false), isStoreBlocked(false),
isLoadBlocked(false), loadBlockedHandled(false)
{
}
@@ -173,6 +173,15 @@ OzoneLWLSQ<Impl>::name() const
template<class Impl>
void
+OzoneLWLSQ<Impl>::regStats()
+{
+ lsqMemOrderViolation
+ .name(name() + ".memOrderViolation")
+ .desc("Number of memory ordering violations");
+}
+
+template<class Impl>
+void
OzoneLWLSQ<Impl>::setCPU(OzoneCPU *cpu_ptr)
{
cpu = cpu_ptr;
@@ -321,7 +330,7 @@ unsigned
OzoneLWLSQ<Impl>::numFreeEntries()
{
unsigned free_lq_entries = LQEntries - loads;
- unsigned free_sq_entries = SQEntries - stores;
+ unsigned free_sq_entries = SQEntries - (stores + storesInFlight);
// Both the LQ and SQ entries have an extra dummy entry to differentiate
// empty/full conditions. Subtract 1 from the free entries.
@@ -385,6 +394,9 @@ OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst)
// Actually probably want the oldest faulting load
if (load_fault != NoFault) {
DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum);
+ if (!(inst->req->getFlags() & UNCACHEABLE && !inst->isAtCommit())) {
+ inst->setExecuted();
+ }
// Maybe just set it as can commit here, although that might cause
// some other problems with sending traps to the ROB too quickly.
be->instToCommit(inst);
@@ -461,6 +473,7 @@ OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = (*lq_it);
+ ++lsqMemOrderViolation;
return TheISA::genMachineCheckFault();
}
@@ -553,8 +566,8 @@ OzoneLWLSQ<Impl>::writebackStores()
if ((*sq_it).size == 0 && !(*sq_it).completed) {
sq_it--;
- completeStore(inst->sqIdx);
-
+ removeStore(inst->sqIdx);
+ completeStore(inst);
continue;
}
@@ -626,6 +639,8 @@ OzoneLWLSQ<Impl>::writebackStores()
inst->sqIdx,inst->readPC(),
req->paddr, *(req->data),
inst->seqNum);
+ DPRINTF(OzoneLSQ, "StoresInFlight: %i\n",
+ storesInFlight + 1);
if (dcacheInterface) {
assert(!req->completionEvent);
@@ -687,6 +702,8 @@ OzoneLWLSQ<Impl>::writebackStores()
}
sq_it--;
}
+ ++storesInFlight;
+// removeStore(inst->sqIdx);
} else {
panic("Must HAVE DCACHE!!!!!\n");
}
@@ -704,7 +721,7 @@ void
OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
{
DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
- "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+ "(Loads:%i Stores:%i)\n",squashed_num,loads,stores+storesInFlight);
LQIt lq_it = loadQueue.begin();
@@ -881,7 +898,7 @@ OzoneLWLSQ<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
template <class Impl>
void
-OzoneLWLSQ<Impl>::completeStore(int store_idx)
+OzoneLWLSQ<Impl>::removeStore(int store_idx)
{
SQHashIt sq_hash_it = SQItHash.find(store_idx);
assert(sq_hash_it != SQItHash.end());
@@ -891,8 +908,6 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
(*sq_it).completed = true;
DynInstPtr inst = (*sq_it).inst;
- --storesToWB;
-
if (isStalled() &&
inst->seqNum == stallingStoreIsn) {
DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
@@ -910,6 +925,13 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
SQItHash.erase(sq_hash_it);
SQIndices.push(inst->sqIdx);
storeQueue.erase(sq_it);
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::completeStore(DynInstPtr &inst)
+{
+ --storesToWB;
--stores;
inst->setCompleted();
@@ -935,9 +957,14 @@ OzoneLWLSQ<Impl>::switchOut()
switchedOut = true;
// Clear the queue to free up resources
+ assert(stores == 0);
+ assert(storeQueue.empty());
+ assert(loads == 0);
+ assert(loadQueue.empty());
+ assert(storesInFlight == 0);
storeQueue.clear();
loadQueue.clear();
- loads = stores = storesToWB = 0;
+ loads = stores = storesToWB = storesInFlight = 0;
}
template <class Impl>
diff --git a/src/cpu/ozone/simple_params.hh b/src/cpu/ozone/simple_params.hh
index 11cee716f..3f63d2e1d 100644
--- a/src/cpu/ozone/simple_params.hh
+++ b/src/cpu/ozone/simple_params.hh
@@ -71,10 +71,11 @@ class SimpleParams : public BaseCPU::Params
unsigned cachePorts;
unsigned width;
+ unsigned frontEndLatency;
unsigned frontEndWidth;
+ unsigned backEndLatency;
unsigned backEndWidth;
unsigned backEndSquashLatency;
- unsigned backEndLatency;
unsigned maxInstBufferSize;
unsigned numPhysicalRegs;
unsigned maxOutstandingMemOps;
@@ -150,6 +151,7 @@ class SimpleParams : public BaseCPU::Params
//
unsigned LQEntries;
unsigned SQEntries;
+ bool lsqLimits;
//
// Memory dependence
diff --git a/src/cpu/ozone/thread_state.hh b/src/cpu/ozone/thread_state.hh
index 8234cf938..c86f3552e 100644
--- a/src/cpu/ozone/thread_state.hh
+++ b/src/cpu/ozone/thread_state.hh
@@ -34,9 +34,12 @@
#include "arch/faults.hh"
#include "arch/types.hh"
#include "arch/regfile.hh"
+#include "base/callback.hh"
+#include "base/output.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
#include "sim/process.hh"
+#include "sim/sim_exit.hh"
class Event;
//class Process;
@@ -65,8 +68,21 @@ struct OzoneThreadState : public ThreadState {
#if FULL_SYSTEM
OzoneThreadState(CPUType *_cpu, int _thread_num)
: ThreadState(-1, _thread_num),
- intrflag(0), inSyscall(0), trapPending(0)
+ intrflag(0), cpu(_cpu), inSyscall(0), trapPending(0)
{
+ if (cpu->params->profile) {
+ profile = new FunctionProfile(cpu->params->system->kernelSymtab);
+ Callback *cb =
+ new MakeCallback<OzoneThreadState,
+ &OzoneThreadState::dumpFuncProfile>(this);
+ registerExitCallback(cb);
+ }
+
+ // let's fill with a dummy node for now so we don't get a segfault
+ // on the first cycle when there's no node available.
+ static ProfileNode dummyNode;
+ profileNode = &dummyNode;
+ profilePC = 3;
miscRegFile.clear();
}
#else
@@ -130,6 +146,14 @@ struct OzoneThreadState : public ThreadState {
void setNextPC(uint64_t val)
{ nextPC = val; }
+
+#if FULL_SYSTEM
+ void dumpFuncProfile()
+ {
+ std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
+ profile->dump(tc, *os);
+ }
+#endif
};
#endif // __CPU_OZONE_THREAD_STATE_HH__