summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/alpha/cpu_builder.cc21
-rw-r--r--src/cpu/o3/alpha/impl.hh4
-rw-r--r--src/cpu/o3/alpha/params.hh11
-rw-r--r--src/cpu/o3/alpha/thread_context.hh15
-rw-r--r--src/cpu/o3/commit.hh25
-rw-r--r--src/cpu/o3/commit_impl.hh63
-rw-r--r--src/cpu/o3/cpu.cc406
-rw-r--r--src/cpu/o3/cpu.hh85
-rw-r--r--src/cpu/o3/decode.hh8
-rw-r--r--src/cpu/o3/decode_impl.hh9
-rw-r--r--src/cpu/o3/dyn_inst.hh45
-rw-r--r--src/cpu/o3/fetch.hh25
-rw-r--r--src/cpu/o3/fetch_impl.hh57
-rw-r--r--src/cpu/o3/iew.hh72
-rw-r--r--src/cpu/o3/iew_impl.hh37
-rw-r--r--src/cpu/o3/inst_queue_impl.hh2
-rw-r--r--src/cpu/o3/lsq.hh64
-rw-r--r--src/cpu/o3/lsq_impl.hh56
-rw-r--r--src/cpu/o3/lsq_unit.hh84
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh57
-rwxr-xr-xsrc/cpu/o3/params.hh21
-rw-r--r--src/cpu/o3/regfile.hh4
-rw-r--r--src/cpu/o3/rename.hh9
-rw-r--r--src/cpu/o3/rename_impl.hh9
-rw-r--r--src/cpu/o3/rob.hh2
-rw-r--r--src/cpu/o3/rob_impl.hh14
-rwxr-xr-xsrc/cpu/o3/thread_context.hh2
-rwxr-xr-xsrc/cpu/o3/thread_context_impl.hh16
28 files changed, 817 insertions, 406 deletions
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index 490305cbf..5e767655d 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -91,7 +91,10 @@ Param<unsigned> renameWidth;
Param<unsigned> commitToIEWDelay;
Param<unsigned> renameToIEWDelay;
Param<unsigned> issueToExecuteDelay;
+Param<unsigned> dispatchWidth;
Param<unsigned> issueWidth;
+Param<unsigned> wbWidth;
+Param<unsigned> wbDepth;
SimObjectParam<FUPool *> fuPool;
Param<unsigned> iewToCommitDelay;
@@ -99,7 +102,9 @@ Param<unsigned> renameToROBDelay;
Param<unsigned> commitWidth;
Param<unsigned> squashWidth;
Param<Tick> trapLatency;
-Param<Tick> fetchTrapLatency;
+
+Param<unsigned> backComSize;
+Param<unsigned> forwardComSize;
Param<std::string> predType;
Param<unsigned> localPredictorSize;
@@ -207,7 +212,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
"Issue/Execute/Writeback delay"),
INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
"to the IEW stage)"),
+ INIT_PARAM(dispatchWidth, "Dispatch width"),
INIT_PARAM(issueWidth, "Issue width"),
+ INIT_PARAM(wbWidth, "Writeback width"),
+ INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"),
INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL),
INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
@@ -216,7 +224,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
INIT_PARAM(commitWidth, "Commit width"),
INIT_PARAM(squashWidth, "Squash width"),
INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
- INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
+
+ INIT_PARAM(backComSize, "Time buffer size for backwards communication"),
+ INIT_PARAM(forwardComSize, "Time buffer size for forward communication"),
INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
INIT_PARAM(localPredictorSize, "Size of local predictor"),
@@ -333,7 +343,10 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->commitToIEWDelay = commitToIEWDelay;
params->renameToIEWDelay = renameToIEWDelay;
params->issueToExecuteDelay = issueToExecuteDelay;
+ params->dispatchWidth = dispatchWidth;
params->issueWidth = issueWidth;
+ params->wbWidth = wbWidth;
+ params->wbDepth = wbDepth;
params->fuPool = fuPool;
params->iewToCommitDelay = iewToCommitDelay;
@@ -341,7 +354,9 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->commitWidth = commitWidth;
params->squashWidth = squashWidth;
params->trapLatency = trapLatency;
- params->fetchTrapLatency = fetchTrapLatency;
+
+ params->backComSize = backComSize;
+ params->forwardComSize = forwardComSize;
params->predType = predType;
params->localPredictorSize = localPredictorSize;
diff --git a/src/cpu/o3/alpha/impl.hh b/src/cpu/o3/alpha/impl.hh
index 8cd8692c6..b928ae654 100644
--- a/src/cpu/o3/alpha/impl.hh
+++ b/src/cpu/o3/alpha/impl.hh
@@ -36,6 +36,7 @@
#include "cpu/o3/alpha/params.hh"
#include "cpu/o3/cpu_policy.hh"
+
// Forward declarations.
template <class Impl>
class AlphaDynInst;
@@ -88,7 +89,4 @@ struct AlphaSimpleImpl
/** The O3Impl to be used. */
typedef AlphaSimpleImpl O3CPUImpl;
-/** The O3Impl to be used. */
-typedef DynInst O3DynInst;
-
#endif // __CPU_O3_ALPHA_IMPL_HH__
diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh
index b1f2a487d..c618cee08 100644
--- a/src/cpu/o3/alpha/params.hh
+++ b/src/cpu/o3/alpha/params.hh
@@ -54,16 +54,7 @@ class AlphaSimpleParams : public O3Params
#if FULL_SYSTEM
AlphaITB *itb;
AlphaDTB *dtb;
-#else
- std::vector<Process *> workload;
- Process *process;
-#endif // FULL_SYSTEM
-
- MemObject *mem;
-
- BaseCPU *checker;
-
- unsigned decodeToFetchDelay;
+#endif
};
#endif // __CPU_O3_ALPHA_PARAMS_HH__
diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh
index 78b0ee788..ad52b0d2e 100644
--- a/src/cpu/o3/alpha/thread_context.hh
+++ b/src/cpu/o3/alpha/thread_context.hh
@@ -70,18 +70,19 @@ class AlphaTC : public O3ThreadContext<Impl>
{ panic("Not supported on Alpha!"); }
- // This function exits the thread context in the CPU and returns
- // 1 if the CPU has no more active threads (meaning it's OK to exit);
- // Used in syscall-emulation mode when a thread executes the 'exit'
- // syscall.
+ /** This function exits the thread context in the CPU and returns
+ * 1 if the CPU has no more active threads (meaning it's OK to exit);
+ * Used in syscall-emulation mode when a thread executes the 'exit'
+ * syscall.
+ */
virtual int exit()
{
- this->cpu->deallocateContext(this->thread->readTid());
+ this->deallocate();
// If there are still threads executing in the system
if (this->cpu->numActiveThreads())
- return 0;
+ return 0; // don't exit simulation
else
- return 1;
+ return 1; // exit simulation
}
};
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 60b555269..956b6ec3e 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -162,10 +162,6 @@ class DefaultCommit
/** Sets the pointer to the queue coming from IEW. */
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
- void setFetchStage(Fetch *fetch_stage);
-
- Fetch *fetchStage;
-
/** Sets the pointer to the IEW stage. */
void setIEWStage(IEW *iew_stage);
@@ -187,11 +183,14 @@ class DefaultCommit
/** Initializes stage by sending back the number of free entries. */
void initStage();
- /** Initializes the switching out of commit. */
- void switchOut();
+ /** Initializes the draining of commit. */
+ bool drain();
+
+ /** Resumes execution after draining. */
+ void resume();
/** Completes the switch out of commit. */
- void doSwitchOut();
+ void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
@@ -332,10 +331,6 @@ class DefaultCommit
/** Vector of all of the threads. */
std::vector<Thread *> thread;
- Fault fetchFault;
-
- int fetchTrapWait;
-
/** Records that commit has written to the time buffer this cycle. Used for
* the CPU to determine if it can deschedule itself if there is no activity.
*/
@@ -383,8 +378,8 @@ class DefaultCommit
/** Number of Active Threads */
unsigned numThreads;
- /** Is a switch out pending. */
- bool switchPending;
+ /** Is a drain pending. */
+ bool drainPending;
/** Is commit switched out. */
bool switchedOut;
@@ -394,10 +389,6 @@ class DefaultCommit
*/
Tick trapLatency;
- Tick fetchTrapLatency;
-
- Tick fetchFaultTick;
-
/** The commit PC of each thread. Refers to the instruction that
* is currently being processed/committed.
*/
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 06b8e8a95..c667d633a 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -80,10 +80,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
renameWidth(params->renameWidth),
commitWidth(params->commitWidth),
numThreads(params->numberOfThreads),
- switchPending(false),
+ drainPending(false),
switchedOut(false),
- trapLatency(params->trapLatency),
- fetchTrapLatency(params->fetchTrapLatency)
+ trapLatency(params->trapLatency)
{
_status = Active;
_nextStatus = Inactive;
@@ -123,9 +122,6 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
tcSquash[i] = false;
PC[i] = nextPC[i] = 0;
}
-
- fetchFaultTick = 0;
- fetchTrapWait = 0;
}
template <class Impl>
@@ -235,7 +231,6 @@ DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr)
cpu->activateStage(O3CPU::CommitIdx);
trapLatency = cpu->cycles(trapLatency);
- fetchTrapLatency = cpu->cycles(fetchTrapLatency);
}
template <class Impl>
@@ -294,13 +289,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
template <class Impl>
void
-DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage)
-{
- fetchStage = fetch_stage;
-}
-
-template <class Impl>
-void
DefaultCommit<Impl>::setIEWStage(IEW *iew_stage)
{
iewStage = iew_stage;
@@ -350,23 +338,38 @@ DefaultCommit<Impl>::initStage()
}
template <class Impl>
-void
-DefaultCommit<Impl>::switchOut()
+bool
+DefaultCommit<Impl>::drain()
{
- switchPending = true;
+ drainPending = true;
+
+ // If it's already drained, return true.
+ if (rob->isEmpty() && !iewStage->hasStoresToWB()) {
+ cpu->signalDrained();
+ return true;
+ }
+
+ return false;
}
template <class Impl>
void
-DefaultCommit<Impl>::doSwitchOut()
+DefaultCommit<Impl>::switchOut()
{
switchedOut = true;
- switchPending = false;
+ drainPending = false;
rob->switchOut();
}
template <class Impl>
void
+DefaultCommit<Impl>::resume()
+{
+ drainPending = false;
+}
+
+template <class Impl>
+void
DefaultCommit<Impl>::takeOverFrom()
{
switchedOut = false;
@@ -557,11 +560,15 @@ DefaultCommit<Impl>::tick()
wroteToTimeBuffer = false;
_nextStatus = Inactive;
- if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) {
- cpu->signalSwitched();
+ if (drainPending && rob->isEmpty() && !iewStage->hasStoresToWB()) {
+ cpu->signalDrained();
+ drainPending = false;
return;
}
+ if ((*activeThreads).size() <= 0)
+ return;
+
list<unsigned>::iterator threads = (*activeThreads).begin();
// Check if any of the threads are done squashing. Change the
@@ -575,7 +582,7 @@ DefaultCommit<Impl>::tick()
commitStatus[tid] = Running;
} else {
DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any"
- "insts this cycle.\n", tid);
+ " insts this cycle.\n", tid);
rob->doSquash(tid);
toIEW->commitInfo[tid].robSquashing = true;
wroteToTimeBuffer = true;
@@ -989,6 +996,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Check if the instruction caused a fault. If so, trap.
Fault inst_fault = head_inst->getFault();
+ // DTB will sometimes need the machine instruction for when
+ // faults happen. So we will set it here, prior to the DTB
+ // possibly needing it for its fault.
+ thread[tid]->setInst(
+ static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
+
if (inst_fault != NoFault) {
head_inst->setCompleted();
DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
@@ -1011,12 +1024,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// execution doesn't generate extra squashes.
thread[tid]->inSyscall = true;
- // DTB will sometimes need the machine instruction for when
- // faults happen. So we will set it here, prior to the DTB
- // possibly needing it for its fault.
- thread[tid]->setInst(
- static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
-
// Execute the trap. Although it's slightly unrealistic in
// terms of timing (as it doesn't wait for the full timing of
// the trap event to complete before updating state), it's
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index feca4cdf2..c43cc2cf8 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -115,6 +115,36 @@ FullO3CPU<Impl>::ActivateThreadEvent::description()
}
template <class Impl>
+FullO3CPU<Impl>::DeallocateContextEvent::DeallocateContextEvent()
+ : Event(&mainEventQueue, CPU_Tick_Pri)
+{
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::DeallocateContextEvent::init(int thread_num,
+ FullO3CPU<Impl> *thread_cpu)
+{
+ tid = thread_num;
+ cpu = thread_cpu;
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::DeallocateContextEvent::process()
+{
+ cpu->deactivateThread(tid);
+ cpu->removeThread(tid);
+}
+
+template <class Impl>
+const char *
+FullO3CPU<Impl>::DeallocateContextEvent::description()
+{
+ return "FullO3CPU \"Deallocate Context\" event";
+}
+
+template <class Impl>
FullO3CPU<Impl>::FullO3CPU(Params *params)
: BaseO3CPU(params),
tickEvent(this),
@@ -141,15 +171,14 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
TheISA::NumMiscRegs * number_of_threads,
TheISA::ZeroReg),
- // For now just have these time buffers be pretty big.
- // @todo: Make these time buffer sizes parameters or derived
- // from latencies
- timeBuffer(5, 5),
- fetchQueue(5, 5),
- decodeQueue(5, 5),
- renameQueue(5, 5),
- iewQueue(5, 5),
- activityRec(NumStages, 10, params->activity),
+ timeBuffer(params->backComSize, params->forwardComSize),
+ fetchQueue(params->backComSize, params->forwardComSize),
+ decodeQueue(params->backComSize, params->forwardComSize),
+ renameQueue(params->backComSize, params->forwardComSize),
+ iewQueue(params->backComSize, params->forwardComSize),
+ activityRec(NumStages,
+ params->backComSize + params->forwardComSize,
+ params->activity),
globalSeqNum(1),
@@ -158,7 +187,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
physmem(system->physmem),
#endif // FULL_SYSTEM
mem(params->mem),
- switchCount(0),
+ drainCount(0),
deferRegistration(params->deferRegistration),
numThreads(number_of_threads)
{
@@ -214,7 +243,6 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
commit.setIEWQueue(&iewQueue);
commit.setRenameQueue(&renameQueue);
- commit.setFetchStage(&fetch);
commit.setIEWStage(&iew);
rename.setIEWStage(&iew);
rename.setCommitStage(&commit);
@@ -361,6 +389,18 @@ FullO3CPU<Impl>::fullCPURegStats()
}
template <class Impl>
+Port *
+FullO3CPU<Impl>::getPort(const std::string &if_name, int idx)
+{
+ if (if_name == "dcache_port")
+ return iew.getDcachePort();
+ else if (if_name == "icache_port")
+ return fetch.getIcachePort();
+ else
+ panic("No Such Port\n");
+}
+
+template <class Impl>
void
FullO3CPU<Impl>::tick()
{
@@ -400,7 +440,8 @@ FullO3CPU<Impl>::tick()
}
if (!tickEvent.scheduled()) {
- if (_status == SwitchedOut) {
+ if (_status == SwitchedOut ||
+ getState() == SimObject::Drained) {
// increment stat
lastRunningCycle = curTick;
} else if (!activityRec.active()) {
@@ -461,16 +502,107 @@ FullO3CPU<Impl>::init()
template <class Impl>
void
+FullO3CPU<Impl>::activateThread(unsigned tid)
+{
+ list<unsigned>::iterator isActive = find(
+ activeThreads.begin(), activeThreads.end(), tid);
+
+ if (isActive == activeThreads.end()) {
+ DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n",
+ tid);
+
+ activeThreads.push_back(tid);
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::deactivateThread(unsigned tid)
+{
+ //Remove From Active List, if Active
+ list<unsigned>::iterator thread_it =
+ find(activeThreads.begin(), activeThreads.end(), tid);
+
+ if (thread_it != activeThreads.end()) {
+ DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
+ tid);
+ activeThreads.erase(thread_it);
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::activateContext(int tid, int delay)
+{
+ // Needs to set each stage to running as well.
+ if (delay){
+ DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate "
+ "on cycle %d\n", tid, curTick + cycles(delay));
+ scheduleActivateThreadEvent(tid, delay);
+ } else {
+ activateThread(tid);
+ }
+
+ if(lastActivatedCycle < curTick) {
+ scheduleTickEvent(delay);
+
+ // Be sure to signal that there's some activity so the CPU doesn't
+ // deschedule itself.
+ activityRec.activity();
+ fetch.wakeFromQuiesce();
+
+ lastActivatedCycle = curTick;
+
+ _status = Running;
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::deallocateContext(int tid, int delay)
+{
+ // Schedule removal of thread data from CPU
+ if (delay){
+ DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate "
+ "on cycle %d\n", tid, curTick + cycles(delay));
+ scheduleDeallocateContextEvent(tid, delay);
+ } else {
+ deactivateThread(tid);
+ removeThread(tid);
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::suspendContext(int tid)
+{
+ DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid);
+ deactivateThread(tid);
+ if (activeThreads.size() == 0)
+ unscheduleTickEvent();
+ _status = Idle;
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::haltContext(int tid)
+{
+ //For now, this is the same as deallocate
+ DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid);
+ deallocateContext(tid, 1);
+}
+
+template <class Impl>
+void
FullO3CPU<Impl>::insertThread(unsigned tid)
{
- DPRINTF(O3CPU,"[tid:%i] Initializing thread data");
+ DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU");
// Will change now that the PC and thread state is internal to the CPU
// and not in the ThreadContext.
-#if 0
#if FULL_SYSTEM
ThreadContext *src_tc = system->threadContexts[tid];
#else
- ThreadContext *src_tc = thread[tid];
+ ThreadContext *src_tc = tcBase(tid);
#endif
//Bind Int Regs to Rename Map
@@ -490,11 +622,14 @@ FullO3CPU<Impl>::insertThread(unsigned tid)
}
//Copy Thread Data Into RegFile
- this->copyFromTC(tid);
+ //this->copyFromTC(tid);
- //Set PC/NPC
- regFile.pc[tid] = src_tc->readPC();
- regFile.npc[tid] = src_tc->readNextPC();
+ //Set PC/NPC/NNPC
+ setPC(src_tc->readPC(), tid);
+ setNextPC(src_tc->readNextPC(), tid);
+#if THE_ISA != ALPHA_ISA
+ setNextNPC(src_tc->readNextNPC(), tid);
+#endif
src_tc->setStatus(ThreadContext::Active);
@@ -503,16 +638,19 @@ FullO3CPU<Impl>::insertThread(unsigned tid)
//Reset ROB/IQ/LSQ Entries
commit.rob->resetEntries();
iew.resetEntries();
-#endif
}
template <class Impl>
void
FullO3CPU<Impl>::removeThread(unsigned tid)
{
- DPRINTF(O3CPU,"[tid:%i] Removing thread data");
-#if 0
- //Unbind Int Regs from Rename Map
+ DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid);
+
+ // Copy Thread Data From RegFile
+ // If thread is suspended, it might be re-allocated
+ //this->copyToTC(tid);
+
+ // Unbind Int Regs from Rename Map
for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) {
PhysRegIndex phys_reg = renameMap[tid].lookup(ireg);
@@ -520,7 +658,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
freeList.addReg(phys_reg);
}
- //Unbind Float Regs from Rename Map
+ // Unbind Float Regs from Rename Map
for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) {
PhysRegIndex phys_reg = renameMap[tid].lookup(freg);
@@ -528,27 +666,20 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
freeList.addReg(phys_reg);
}
- //Copy Thread Data From RegFile
- /* Fix Me:
- * Do we really need to do this if we are removing a thread
- * in the sense that it's finished (exiting)? If the thread is just
- * being suspended we might...
- */
-// this->copyToTC(tid);
-
- //Squash Throughout Pipeline
+ // Squash Throughout Pipeline
fetch.squash(0,tid);
decode.squash(tid);
rename.squash(tid);
+ iew.squash(tid);
+ commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid);
assert(iew.ldstQueue.getCount(tid) == 0);
- //Reset ROB/IQ/LSQ Entries
+ // Reset ROB/IQ/LSQ Entries
if (activeThreads.size() >= 1) {
commit.rob->resetEntries();
iew.resetEntries();
}
-#endif
}
@@ -604,6 +735,7 @@ FullO3CPU<Impl>::activateWhenReady(int tid)
//blocks fetch
contextSwitch = true;
+ //@todo: dont always add to waitlist
//do waitlist
cpuWaitList.push_back(tid);
}
@@ -611,149 +743,132 @@ FullO3CPU<Impl>::activateWhenReady(int tid)
template <class Impl>
void
-FullO3CPU<Impl>::activateThread(unsigned int tid)
-{
- list<unsigned>::iterator isActive = find(
- activeThreads.begin(), activeThreads.end(), tid);
-
- if (isActive == activeThreads.end()) {
- DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n",
- tid);
-
- activeThreads.push_back(tid);
+FullO3CPU<Impl>::serialize(std::ostream &os)
+{
+ SimObject::State so_state = SimObject::getState();
+ SERIALIZE_ENUM(so_state);
+ BaseCPU::serialize(os);
+ nameOut(os, csprintf("%s.tickEvent", name()));
+ tickEvent.serialize(os);
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // write out the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static SimpleThread temp;
+
+ for (int i = 0; i < thread.size(); i++) {
+ nameOut(os, csprintf("%s.xc.%i", name(), i));
+ temp.copyTC(thread[i]->getTC());
+ temp.serialize(os);
}
}
-
template <class Impl>
void
-FullO3CPU<Impl>::activateContext(int tid, int delay)
-{
- // Needs to set each stage to running as well.
- if (delay){
- DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate "
- "on cycle %d\n", tid, curTick + cycles(delay));
- scheduleActivateThreadEvent(tid, delay);
- } else {
- activateThread(tid);
- }
-
- if(lastActivatedCycle < curTick) {
- scheduleTickEvent(delay);
-
- // Be sure to signal that there's some activity so the CPU doesn't
- // deschedule itself.
- activityRec.activity();
- fetch.wakeFromQuiesce();
-
- lastActivatedCycle = curTick;
-
- _status = Running;
+FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
+{
+ SimObject::State so_state;
+ UNSERIALIZE_ENUM(so_state);
+ BaseCPU::unserialize(cp, section);
+ tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // read in the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static SimpleThread temp;
+
+ for (int i = 0; i < thread.size(); i++) {
+ temp.copyTC(thread[i]->getTC());
+ temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
+ thread[i]->getTC()->copyArchRegs(temp.getTC());
}
}
template <class Impl>
-void
-FullO3CPU<Impl>::suspendContext(int tid)
+unsigned int
+FullO3CPU<Impl>::drain(Event *drain_event)
{
- DPRINTF(O3CPU,"[tid: %i]: Suspended ...\n", tid);
- unscheduleTickEvent();
- _status = Idle;
-/*
- //Remove From Active List, if Active
- list<unsigned>::iterator isActive = find(
- activeThreads.begin(), activeThreads.end(), tid);
+ drainCount = 0;
+ fetch.drain();
+ decode.drain();
+ rename.drain();
+ iew.drain();
+ commit.drain();
- if (isActive != activeThreads.end()) {
- DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
- tid);
- activeThreads.erase(isActive);
+ // Wake the CPU and record activity so everything can drain out if
+ // the CPU was not able to immediately drain.
+ if (getState() != SimObject::Drained) {
+ // A bit of a hack...set the drainEvent after all the drain()
+ // calls have been made, that way if all of the stages drain
+ // immediately, the signalDrained() function knows not to call
+ // process on the drain event.
+ drainEvent = drain_event;
+
+ wakeCPU();
+ activityRec.activity();
+
+ return 1;
+ } else {
+ return 0;
}
-*/
}
template <class Impl>
void
-FullO3CPU<Impl>::deallocateContext(int tid)
+FullO3CPU<Impl>::resume()
{
- DPRINTF(O3CPU,"[tid:%i]: Deallocating ...", tid);
-/*
- //Remove From Active List, if Active
- list<unsigned>::iterator isActive = find(
- activeThreads.begin(), activeThreads.end(), tid);
+ assert(system->getMemoryMode() == System::Timing);
+ fetch.resume();
+ decode.resume();
+ rename.resume();
+ iew.resume();
+ commit.resume();
- if (isActive != activeThreads.end()) {
- DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
- tid);
- activeThreads.erase(isActive);
+ changeState(SimObject::Running);
- removeThread(tid);
- }
-*/
+ if (_status == SwitchedOut || _status == Idle)
+ return;
+
+ if (!tickEvent.scheduled())
+ tickEvent.schedule(curTick);
+ _status = Running;
}
template <class Impl>
void
-FullO3CPU<Impl>::haltContext(int tid)
+FullO3CPU<Impl>::signalDrained()
{
- DPRINTF(O3CPU,"[tid:%i]: Halted ...", tid);
-/*
- //Remove From Active List, if Active
- list<unsigned>::iterator isActive = find(
- activeThreads.begin(), activeThreads.end(), tid);
+ if (++drainCount == NumStages) {
+ if (tickEvent.scheduled())
+ tickEvent.squash();
- if (isActive != activeThreads.end()) {
- DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
- tid);
- activeThreads.erase(isActive);
+ changeState(SimObject::Drained);
- removeThread(tid);
+ if (drainEvent) {
+ drainEvent->process();
+ drainEvent = NULL;
+ }
}
-*/
+ assert(drainCount <= 5);
}
template <class Impl>
void
-FullO3CPU<Impl>::switchOut(Sampler *_sampler)
+FullO3CPU<Impl>::switchOut()
{
- sampler = _sampler;
- switchCount = 0;
fetch.switchOut();
- decode.switchOut();
rename.switchOut();
- iew.switchOut();
commit.switchOut();
+ instList.clear();
+ while (!removeList.empty()) {
+ removeList.pop();
+ }
- // Wake the CPU and record activity so everything can drain out if
- // the CPU is currently idle.
- wakeCPU();
- activityRec.activity();
-}
-
-template <class Impl>
-void
-FullO3CPU<Impl>::signalSwitched()
-{
- if (++switchCount == NumStages) {
- fetch.doSwitchOut();
- rename.doSwitchOut();
- commit.doSwitchOut();
- instList.clear();
- while (!removeList.empty()) {
- removeList.pop();
- }
-
+ _status = SwitchedOut;
#if USE_CHECKER
- if (checker)
- checker->switchOut(sampler);
+ if (checker)
+ checker->switchOut();
#endif
-
- if (tickEvent.scheduled())
- tickEvent.squash();
- sampler->signalSwitched();
- _status = SwitchedOut;
- }
- assert(switchCount <= 5);
}
template <class Impl>
@@ -761,7 +876,7 @@ void
FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
// Flush out any old data from the time buffers.
- for (int i = 0; i < 10; ++i) {
+ for (int i = 0; i < timeBuffer.getSize(); ++i) {
timeBuffer.advance();
fetchQueue.advance();
decodeQueue.advance();
@@ -932,7 +1047,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatReg(phys_reg, val);
}
@@ -941,7 +1057,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatReg(phys_reg, val, 64);
}
@@ -950,7 +1067,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegBits(phys_reg, val);
}
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 1cff6142d..83cb966e3 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -57,6 +57,8 @@ class Checker;
class ThreadContext;
template <class>
class O3ThreadContext;
+
+class Checkpoint;
class MemObject;
class Process;
@@ -197,6 +199,49 @@ class FullO3CPU : public BaseO3CPU
/** The tick event used for scheduling CPU ticks. */
ActivateThreadEvent activateThreadEvent[Impl::MaxThreads];
+ class DeallocateContextEvent : public Event
+ {
+ private:
+ /** Number of Thread to Activate */
+ int tid;
+
+ /** Pointer to the CPU. */
+ FullO3CPU<Impl> *cpu;
+
+ public:
+ /** Constructs the event. */
+ DeallocateContextEvent();
+
+ /** Initialize Event */
+ void init(int thread_num, FullO3CPU<Impl> *thread_cpu);
+
+ /** Processes the event, calling activateThread() on the CPU. */
+ void process();
+
+ /** Returns the description of the event. */
+ const char *description();
+ };
+
+ /** Schedule cpu to deallocate thread context.*/
+ void scheduleDeallocateContextEvent(int tid, int delay)
+ {
+ // Schedule thread to activate, regardless of its current state.
+ if (deallocateContextEvent[tid].squashed())
+ deallocateContextEvent[tid].reschedule(curTick + cycles(delay));
+ else if (!deallocateContextEvent[tid].scheduled())
+ deallocateContextEvent[tid].schedule(curTick + cycles(delay));
+ }
+
+ /** Unschedule thread deallocation in CPU */
+ void unscheduleDeallocateContextEvent(int tid)
+ {
+ if (deallocateContextEvent[tid].scheduled())
+ deallocateContextEvent[tid].squash();
+ }
+
+ /** The tick event used for scheduling CPU ticks. */
+ DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads];
+
public:
/** Constructs a CPU with the given parameters. */
FullO3CPU(Params *params);
@@ -206,6 +251,9 @@ class FullO3CPU : public BaseO3CPU
/** Registers statistics. */
void fullCPURegStats();
+ /** Returns a specific port. */
+ Port *getPort(const std::string &if_name, int idx);
+
/** Ticks CPU, calling tick() on each stage, and checking the overall
* activity to see if the CPU should deschedule itself.
*/
@@ -219,7 +267,10 @@ class FullO3CPU : public BaseO3CPU
{ return activeThreads.size(); }
/** Add Thread to Active Threads List */
- void activateThread(unsigned int tid);
+ void activateThread(unsigned tid);
+
+ /** Remove Thread from Active Threads List */
+ void deactivateThread(unsigned tid);
/** Setup CPU to insert a thread's context */
void insertThread(unsigned tid);
@@ -247,7 +298,7 @@ class FullO3CPU : public BaseO3CPU
/** Remove Thread from Active Threads List &&
* Remove Thread Context from CPU.
*/
- void deallocateContext(int tid);
+ void deallocateContext(int tid, int delay = 1);
/** Remove Thread from Active Threads List &&
* Remove Thread Context from CPU.
@@ -263,6 +314,13 @@ class FullO3CPU : public BaseO3CPU
/** Update The Order In Which We Process Threads. */
void updateThreadPriority();
+ /** Serialize state. */
+ virtual void serialize(std::ostream &os);
+
+ /** Unserialize from a checkpoint. */
+ virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+ public:
/** Executes a syscall on this cycle.
* ---------------------------------------
* Note: this is a virtual function. CPU-Specific
@@ -270,14 +328,21 @@ class FullO3CPU : public BaseO3CPU
*/
virtual void syscall(int tid) { panic("Unimplemented!"); }
- /** Switches out this CPU. */
- void switchOut(Sampler *sampler);
+ /** Starts draining the CPU's pipeline of all instructions in
+ * order to stop all memory accesses. */
+ virtual unsigned int drain(Event *drain_event);
+
+ /** Resumes execution after a drain. */
+ virtual void resume();
/** Signals to this CPU that a stage has completed switching out. */
- void signalSwitched();
+ void signalDrained();
+
+ /** Switches out this CPU. */
+ virtual void switchOut();
/** Takes over from another CPU. */
- void takeOverFrom(BaseCPU *oldCPU);
+ virtual void takeOverFrom(BaseCPU *oldCPU);
/** Get the current instruction sequence number, and increment it. */
InstSeqNum getAndIncrementInstSeq()
@@ -550,11 +615,11 @@ class FullO3CPU : public BaseO3CPU
/** Pointer to memory. */
MemObject *mem;
- /** Pointer to the sampler */
- Sampler *sampler;
+ /** Event to call process() on once draining has completed. */
+ Event *drainEvent;
- /** Counter of how many stages have completed switching out. */
- int switchCount;
+ /** Counter of how many stages have completed draining. */
+ int drainCount;
/** Pointers to all of the threads in the CPU. */
std::vector<Thread *> thread;
diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh
index 1edf3335d..7f5ecbc26 100644
--- a/src/cpu/o3/decode.hh
+++ b/src/cpu/o3/decode.hh
@@ -109,8 +109,14 @@ class DefaultDecode
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
+ /** Drains the decode stage. */
+ bool drain();
+
+ /** Resumes execution after a drain. */
+ void resume() { }
+
/** Switches out the decode stage. */
- void switchOut();
+ void switchOut() { }
/** Takes over from another CPU's thread. */
void takeOverFrom();
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 16be01784..8b851c032 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -165,11 +165,12 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr)
}
template <class Impl>
-void
-DefaultDecode<Impl>::switchOut()
+bool
+DefaultDecode<Impl>::drain()
{
- // Decode can immediately switch out.
- cpu->signalSwitched();
+ // Decode is done draining at any time.
+ cpu->signalDrained();
+ return true;
}
template <class Impl>
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
new file mode 100644
index 000000000..a2cdf2dba
--- /dev/null
+++ b/src/cpu/o3/dyn_inst.hh
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Korey Sewell
+ */
+
+#ifndef __CPU_O3_DYN_INST_HH__
+#define __CPU_O3_DYN_INST_HH__
+
+#include "arch/isa_specific.hh"
+
+#if THE_ISA == ALPHA_ISA
+template <class Impl>
+class AlphaDynInst;
+
+struct AlphaSimpleImpl;
+
+typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst;
+#endif
+
+#endif // __CPU_O3_DYN_INST_HH__
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 7fcd21b7d..931919af8 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -40,8 +40,6 @@
#include "mem/port.hh"
#include "sim/eventq.hh"
-class Sampler;
-
/**
* DefaultFetch class handles both single threaded and SMT fetch. Its
* width is specified by the parameters; each cycle it tries to fetch
@@ -164,6 +162,9 @@ class DefaultFetch
/** Registers statistics. */
void regStats();
+ /** Returns the icache port. */
+ Port *getIcachePort() { return icachePort; }
+
/** Sets CPU pointer. */
void setCPU(O3CPU *cpu_ptr);
@@ -182,11 +183,14 @@ class DefaultFetch
/** Processes cache completion event. */
void processCacheCompletion(PacketPtr pkt);
- /** Begins the switch out of the fetch stage. */
- void switchOut();
+ /** Begins the drain of the fetch stage. */
+ bool drain();
+
+ /** Resumes execution after a drain. */
+ void resume();
- /** Completes the switch out of the fetch stage. */
- void doSwitchOut();
+ /** Tells fetch stage to prepare to be switched out. */
+ void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
@@ -400,6 +404,12 @@ class DefaultFetch
/** The cache line being fetched. */
uint8_t *cacheData[Impl::MaxThreads];
+ /** The PC of the cacheline that has been loaded. */
+ Addr cacheDataPC[Impl::MaxThreads];
+
+ /** Whether or not the cache data is valid. */
+ bool cacheDataValid[Impl::MaxThreads];
+
/** Size of instructions. */
int instSize;
@@ -423,6 +433,9 @@ class DefaultFetch
*/
bool interruptPending;
+ /** Is there a drain pending. */
+ bool drainPending;
+
/** Records if fetch is switched out. */
bool switchedOut;
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 60eb76d17..4184e1867 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -109,6 +109,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
numThreads(params->numberOfThreads),
numFetchingThreads(params->smtNumFetchingThreads),
interruptPending(false),
+ drainPending(false),
switchedOut(false)
{
if (numThreads > Impl::MaxThreads)
@@ -161,6 +162,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
// Create space to store a cache line.
cacheData[tid] = new uint8_t[cacheBlkSize];
+ cacheDataPC[tid] = 0;
+ cacheDataValid[tid] = false;
stalls[tid].decode = 0;
stalls[tid].rename = 0;
@@ -279,10 +282,6 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
// Name is finally available, so create the port.
icachePort = new IcachePort(this);
- Port *mem_dport = mem->getPort("");
- icachePort->setPeer(mem_dport);
- mem_dport->setPeer(icachePort);
-
#if USE_CHECKER
if (cpu->checker) {
cpu->checker->setIcachePort(icachePort);
@@ -360,14 +359,19 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
return;
}
- // Wake up the CPU (if it went to sleep and was waiting on this completion
- // event).
- cpu->wakeCPU();
+ memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize);
+ cacheDataValid[tid] = true;
- DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
- tid);
+ if (!drainPending) {
+ // Wake up the CPU (if it went to sleep and was waiting on
+ // this completion event).
+ cpu->wakeCPU();
- switchToActive();
+ DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
+ tid);
+
+ switchToActive();
+ }
// Only switch to IcacheAccessComplete if we're not stalled as well.
if (checkStall(tid)) {
@@ -383,18 +387,27 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
}
template <class Impl>
+bool
+DefaultFetch<Impl>::drain()
+{
+ // Fetch is ready to drain at any time.
+ cpu->signalDrained();
+ drainPending = true;
+ return true;
+}
+
+template <class Impl>
void
-DefaultFetch<Impl>::switchOut()
+DefaultFetch<Impl>::resume()
{
- // Fetch is ready to switch out at any time.
- switchedOut = true;
- cpu->signalSwitched();
+ drainPending = false;
}
template <class Impl>
void
-DefaultFetch<Impl>::doSwitchOut()
+DefaultFetch<Impl>::switchOut()
{
+ switchedOut = true;
// Branch predictor needs to have its state cleared.
branchPred.switchOut();
}
@@ -498,7 +511,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
- if (cacheBlocked || (interruptPending && flags == 0) || switchedOut) {
+ if (cacheBlocked || (interruptPending && flags == 0)) {
// Hold off fetch from getting new instructions when:
// Cache is blocked, or
// while an interrupt is pending and we're not in PAL mode, or
@@ -509,6 +522,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// Align the fetch PC so it's at the start of a cache block.
fetch_PC = icacheBlockAlignPC(fetch_PC);
+ // If we've already got the block, no need to try to fetch it again.
+ if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
+ return true;
+ }
+
// Setup the memReq to do a read of the first instruction's address.
// Set the appropriate read size and flags as well.
// Build request here.
@@ -540,7 +558,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// Build packet here.
PacketPtr data_pkt = new Packet(mem_req,
Packet::ReadReq, Packet::Broadcast);
- data_pkt->dataStatic(cacheData[tid]);
+ data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
+
+ cacheDataPC[tid] = fetch_PC;
+ cacheDataValid[tid] = false;
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
@@ -898,7 +919,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
//////////////////////////////////////////
int tid = getFetchingThread(fetchPolicy);
- if (tid == -1) {
+ if (tid == -1 || drainPending) {
DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
// Breaks looping condition in tick()
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 2af68d8fc..6c6be4787 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -31,11 +31,12 @@
#ifndef __CPU_O3_IEW_HH__
#define __CPU_O3_IEW_HH__
+#include "config/full_system.hh"
+
#include <queue>
#include "base/statistics.hh"
#include "base/timebuf.hh"
-#include "config/full_system.hh"
#include "cpu/o3/comm.hh"
#include "cpu/o3/scoreboard.hh"
#include "cpu/o3/lsq.hh"
@@ -125,6 +126,9 @@ class DefaultIEW
/** Initializes stage; sends back the number of free IQ and LSQ entries. */
void initStage();
+ /** Returns the dcache port. */
+ Port *getDcachePort() { return ldstQueue.getDcachePort(); }
+
/** Sets CPU pointer for IEW, IQ, and LSQ. */
void setCPU(O3CPU *cpu_ptr);
@@ -143,11 +147,14 @@ class DefaultIEW
/** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *sb_ptr);
- /** Starts switch out of IEW stage. */
- void switchOut();
+ /** Drains IEW stage. */
+ bool drain();
+
+ /** Resumes execution after a drain. */
+ void resume();
/** Completes switch out of IEW stage. */
- void doSwitchOut();
+ void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
@@ -204,6 +211,45 @@ class DefaultIEW
/** Returns if the LSQ has any stores to writeback. */
bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); }
+ void incrWb(InstSeqNum &sn)
+ {
+ if (++wbOutstanding == wbMax)
+ ableToIssue = false;
+ DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+#ifdef DEBUG
+ wbList.insert(sn);
+#endif
+ }
+
+ void decrWb(InstSeqNum &sn)
+ {
+ if (wbOutstanding-- == wbMax)
+ ableToIssue = true;
+ DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+#ifdef DEBUG
+ assert(wbList.find(sn) != wbList.end());
+ wbList.erase(sn);
+#endif
+ }
+
+#ifdef DEBUG
+ std::set<InstSeqNum> wbList;
+
+ void dumpWb()
+ {
+ std::set<InstSeqNum>::iterator wb_it = wbList.begin();
+ while (wb_it != wbList.end()) {
+ cprintf("[sn:%lli]\n",
+ (*wb_it));
+ wb_it++;
+ }
+ }
+#endif
+
+ bool canIssue() { return ableToIssue; }
+
+ bool ableToIssue;
+
private:
/** Sends commit proper information for a squash due to a branch
* mispredict.
@@ -384,11 +430,8 @@ class DefaultIEW
*/
unsigned issueToExecuteDelay;
- /** Width of issue's read path, in instructions. The read path is both
- * the skid buffer and the rename instruction queue.
- * Note to self: is this really different than issueWidth?
- */
- unsigned issueReadWidth;
+ /** Width of dispatch, in instructions. */
+ unsigned dispatchWidth;
/** Width of issue, in instructions. */
unsigned issueWidth;
@@ -403,6 +446,17 @@ class DefaultIEW
*/
unsigned wbCycle;
+ /** Number of instructions in flight that will writeback. */
+ unsigned wbOutstanding;
+
+ /** Writeback width. */
+ unsigned wbWidth;
+
+ /** Writeback width * writeback depth, where writeback depth is
+ * the number of cycles of writing back instructions that can be
+ * buffered. */
+ unsigned wbMax;
+
/** Number of active threads. */
unsigned numThreads;
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 8e6fd46a1..684ae2295 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -42,16 +42,17 @@ using namespace std;
template<class Impl>
DefaultIEW<Impl>::DefaultIEW(Params *params)
- : // @todo: Make this into a parameter.
- issueToExecQueue(5, 5),
+ : issueToExecQueue(params->backComSize, params->forwardComSize),
instQueue(params),
ldstQueue(params),
fuPool(params->fuPool),
commitToIEWDelay(params->commitToIEWDelay),
renameToIEWDelay(params->renameToIEWDelay),
issueToExecuteDelay(params->issueToExecuteDelay),
- issueReadWidth(params->issueWidth),
+ dispatchWidth(params->dispatchWidth),
issueWidth(params->issueWidth),
+ wbOutstanding(0),
+ wbWidth(params->wbWidth),
numThreads(params->numberOfThreads),
switchedOut(false)
{
@@ -74,8 +75,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params)
fetchRedirect[i] = false;
}
+ wbMax = wbWidth * params->wbDepth;
+
updateLSQNextCycle = false;
+ ableToIssue = true;
+
skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
}
@@ -348,16 +353,23 @@ DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr)
}
template <class Impl>
+bool
+DefaultIEW<Impl>::drain()
+{
+ // IEW is ready to drain at any time.
+ cpu->signalDrained();
+ return true;
+}
+
+template <class Impl>
void
-DefaultIEW<Impl>::switchOut()
+DefaultIEW<Impl>::resume()
{
- // IEW is ready to switch out at any time.
- cpu->signalSwitched();
}
template <class Impl>
void
-DefaultIEW<Impl>::doSwitchOut()
+DefaultIEW<Impl>::switchOut()
{
// Clear any state.
switchedOut = true;
@@ -400,7 +412,7 @@ DefaultIEW<Impl>::takeOverFrom()
updateLSQNextCycle = false;
// @todo: Fix hardcoded number
- for (int i = 0; i < 6; ++i) {
+ for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
issueToExecQueue.advance();
}
}
@@ -559,12 +571,12 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
// free slot.
while ((*iewQueue)[wbCycle].insts[wbNumInst]) {
++wbNumInst;
- if (wbNumInst == issueWidth) {
+ if (wbNumInst == wbWidth) {
++wbCycle;
wbNumInst = 0;
}
- assert(wbCycle < 5);
+ assert((wbCycle * wbWidth + wbNumInst) < wbMax);
}
// Add finished instruction to queue to commit.
@@ -937,7 +949,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
// Loop through the instructions, putting them in the instruction
// queue.
for ( ; dis_num_inst < insts_to_add &&
- dis_num_inst < issueReadWidth;
+ dis_num_inst < dispatchWidth;
++dis_num_inst)
{
inst = insts_to_dispatch.front();
@@ -1189,6 +1201,7 @@ DefaultIEW<Impl>::executeInsts()
++iewExecSquashedInsts;
+ decrWb(inst->seqNum);
continue;
}
@@ -1351,6 +1364,8 @@ DefaultIEW<Impl>::writebackInsts()
}
writebackCount[tid]++;
}
+
+ decrWb(inst->seqNum);
}
}
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index b99bd0900..36e0842be 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -687,6 +687,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
int total_issued = 0;
while (total_issued < totalWidth &&
+ iewStage->canIssue() &&
order_it != order_end_it) {
OpClass op_class = (*order_it).queueType;
@@ -784,6 +785,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
listOrder.erase(order_it++);
statIssuedInstType[tid][op_class]++;
+ iewStage->incrWb(issuing_inst->seqNum);
} else {
statFuBusy[op_class]++;
fuBusy[tid]++;
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 89791fec9..190734dc2 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -65,6 +65,13 @@ class LSQ {
/** Registers statistics of each LSQ unit. */
void regStats();
+ /** Returns dcache port.
+ * @todo: Dcache port needs to be moved up to this level for SMT
+ * to work. For now it just returns the port from one of the
+ * threads.
+ */
+ Port *getDcachePort() { return &dcachePort; }
+
/** Sets the pointer to the list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
/** Sets the CPU pointer. */
@@ -251,6 +258,15 @@ class LSQ {
bool willWB(unsigned tid)
{ return thread[tid].willWB(); }
+ /** Returns if the cache is currently blocked. */
+ bool cacheBlocked()
+ { return retryTid != -1; }
+
+ /** Sets the retry thread id, indicating that one of the LSQUnits
+ * tried to access the cache but the cache was blocked. */
+ void setRetryTid(int tid)
+ { retryTid = tid; }
+
/** Debugging function to print out all instructions. */
void dumpInsts();
/** Debugging function to print out instructions from a specific thread. */
@@ -267,7 +283,49 @@ class LSQ {
template <class T>
Fault write(RequestPtr req, T &data, int store_idx);
- private:
+ /** DcachePort class for this LSQ. Handles doing the
+ * communication with the cache/memory.
+ */
+ class DcachePort : public Port
+ {
+ protected:
+ /** Pointer to LSQ. */
+ LSQ *lsq;
+
+ public:
+ /** Default constructor. */
+ DcachePort(LSQ *_lsq)
+ : lsq(_lsq)
+ { }
+
+ protected:
+ /** Atomic version of receive. Panics. */
+ virtual Tick recvAtomic(PacketPtr pkt);
+
+ /** Functional version of receive. Panics. */
+ virtual void recvFunctional(PacketPtr pkt);
+
+ /** Receives status change. Other than range changing, panics. */
+ virtual void recvStatusChange(Status status);
+
+ /** Returns the address ranges of this device. */
+ virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ AddrRangeList &snoop)
+ { resp.clear(); snoop.clear(); }
+
+ /** Timing version of receive. Handles writing back and
+ * completing the load or store that has returned from
+ * memory. */
+ virtual bool recvTiming(PacketPtr pkt);
+
+ /** Handles doing a retry of the previous send. */
+ virtual void recvRetry();
+ };
+
+ /** D-cache port. */
+ DcachePort dcachePort;
+
+ protected:
/** The LSQ policy for SMT mode. */
LSQPolicy lsqPolicy;
@@ -296,6 +354,10 @@ class LSQ {
/** Number of Threads. */
unsigned numThreads;
+
+ /** The thread id of the LSQ Unit that is currently waiting for a
+ * retry. */
+ int retryTid;
};
template <class Impl>
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 5173f8be1..4e3957029 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -36,9 +36,53 @@
using namespace std;
template <class Impl>
+Tick
+LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
+{
+ panic("O3CPU model does not work with atomic mode!");
+ return curTick;
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+{
+ panic("O3CPU doesn't expect recvFunctional callback!");
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvStatusChange(Status status)
+{
+ if (status == RangeChange)
+ return;
+
+ panic("O3CPU doesn't expect recvStatusChange callback!");
+}
+
+template <class Impl>
+bool
+LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
+{
+ lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
+ return true;
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvRetry()
+{
+ lsq->thread[lsq->retryTid].recvRetry();
+ // Speculatively clear the retry Tid. This will get set again if
+ // the LSQUnit was unable to complete its access.
+ lsq->retryTid = -1;
+}
+
+template <class Impl>
LSQ<Impl>::LSQ(Params *params)
- : LQEntries(params->LQEntries), SQEntries(params->SQEntries),
- numThreads(params->numberOfThreads)
+ : dcachePort(this), LQEntries(params->LQEntries),
+ SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
+ retryTid(-1)
{
DPRINTF(LSQ, "Creating LSQ object.\n");
@@ -94,7 +138,8 @@ LSQ<Impl>::LSQ(Params *params)
//Initialize LSQs
for (int tid=0; tid < numThreads; tid++) {
- thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
+ thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid);
+ thread[tid].setDcachePort(&dcachePort);
}
}
@@ -130,6 +175,8 @@ LSQ<Impl>::setCPU(O3CPU *cpu_ptr)
{
cpu = cpu_ptr;
+ dcachePort.setName(name());
+
for (int tid=0; tid < numThreads; tid++) {
thread[tid].setCPU(cpu_ptr);
}
@@ -502,6 +549,9 @@ LSQ<Impl>::hasStoresToWB()
{
list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ if ((*activeThreads).empty())
+ return false;
+
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
if (!hasStoresToWB(tid))
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 74b8fe5bb..512b5a63c 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -64,6 +64,7 @@ class LSQUnit {
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::CPUPol::IEW IEW;
+ typedef typename Impl::CPUPol::LSQ LSQ;
typedef typename Impl::CPUPol::IssueStruct IssueStruct;
public:
@@ -71,7 +72,7 @@ class LSQUnit {
LSQUnit();
/** Initializes the LSQ unit with the specified number of entries. */
- void init(Params *params, unsigned maxLQEntries,
+ void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
unsigned maxSQEntries, unsigned id);
/** Returns the name of the LSQ unit. */
@@ -87,6 +88,10 @@ class LSQUnit {
void setIEW(IEW *iew_ptr)
{ iewStage = iew_ptr; }
+ /** Sets the pointer to the dcache port. */
+ void setDcachePort(Port *dcache_port)
+ { dcachePort = dcache_port; }
+
/** Switches out LSQ unit. */
void switchOut();
@@ -206,6 +211,9 @@ class LSQUnit {
!storeQueue[storeWBIdx].completed &&
!isStoreBlocked; }
+ /** Handles doing the retry. */
+ void recvRetry();
+
private:
/** Writes back the instruction, sending it to IEW. */
void writeback(DynInstPtr &inst, PacketPtr pkt);
@@ -216,9 +224,6 @@ class LSQUnit {
/** Completes the store at the specified index. */
void completeStore(int store_idx);
- /** Handles doing the retry. */
- void recvRetry();
-
/** Increments the given store index (circular queue). */
inline void incrStIdx(int &store_idx);
/** Decrements the given store index (circular queue). */
@@ -239,54 +244,11 @@ class LSQUnit {
/** Pointer to the IEW stage. */
IEW *iewStage;
- /** Pointer to memory object. */
- MemObject *mem;
+ /** Pointer to the LSQ. */
+ LSQ *lsq;
- /** DcachePort class for this LSQ Unit. Handles doing the
- * communication with the cache/memory.
- * @todo: Needs to be moved to the LSQ level and have some sort
- * of arbitration.
- */
- class DcachePort : public Port
- {
- protected:
- /** Pointer to CPU. */
- O3CPU *cpu;
- /** Pointer to LSQ. */
- LSQUnit *lsq;
-
- public:
- /** Default constructor. */
- DcachePort(O3CPU *_cpu, LSQUnit *_lsq)
- : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq)
- { }
-
- protected:
- /** Atomic version of receive. Panics. */
- virtual Tick recvAtomic(PacketPtr pkt);
-
- /** Functional version of receive. Panics. */
- virtual void recvFunctional(PacketPtr pkt);
-
- /** Receives status change. Other than range changing, panics. */
- virtual void recvStatusChange(Status status);
-
- /** Returns the address ranges of this device. */
- virtual void getDeviceAddressRanges(AddrRangeList &resp,
- AddrRangeList &snoop)
- { resp.clear(); snoop.clear(); }
-
- /** Timing version of receive. Handles writing back and
- * completing the load or store that has returned from
- * memory. */
- virtual bool recvTiming(PacketPtr pkt);
-
- /** Handles doing a retry of the previous send. */
- virtual void recvRetry();
- };
-
- /** Pointer to the D-cache. */
- DcachePort *dcachePort;
+ /** Pointer to the dcache port. Used only for sending. */
+ Port *dcachePort;
/** Derived class to hold any sender state the LSQ needs. */
class LSQSenderState : public Packet::SenderState
@@ -639,6 +601,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
+ iewStage->decrWb(load_inst->seqNum);
++lsqRescheduledLoads;
// Do not generate a writeback event as this instruction is not
@@ -653,7 +616,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
}
// If there's no forwarding case, then go access memory
- DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
+ DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n",
load_inst->seqNum, load_inst->readPC());
assert(!load_inst->memData);
@@ -661,9 +624,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
++usedPorts;
- DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
- load_inst->readPC());
-
PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
data_pkt->dataStatic(load_inst->memData);
@@ -673,8 +633,18 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
state->inst = load_inst;
data_pkt->senderState = state;
- // if we have a cache, do cache access too
- if (!dcachePort->sendTiming(data_pkt)) {
+ // if we the cache is not blocked, do cache access
+ if (!lsq->cacheBlocked()) {
+ if (!dcachePort->sendTiming(data_pkt)) {
+ // If the access didn't succeed, tell the LSQ by setting
+ // the retry thread id.
+ lsq->setRetryTid(lsqID);
+ }
+ }
+
+ // If the cache was blocked, or has become blocked due to the access,
+ // handle it.
+ if (lsq->cacheBlocked()) {
++lsqCacheBlocked;
// There's an older load that's already going to squash.
if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 714acb2ef..4f5dbbf1c 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -31,6 +31,7 @@
#include "config/use_checker.hh"
+#include "cpu/o3/lsq.hh"
#include "cpu/o3/lsq_unit.hh"
#include "base/str.hh"
#include "mem/packet.hh"
@@ -77,6 +78,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
if (isSwitchedOut() || inst->isSquashed()) {
+ iewStage->decrWb(inst->seqNum);
delete state;
delete pkt;
return;
@@ -95,46 +97,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
}
template <class Impl>
-Tick
-LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
-{
- panic("O3CPU model does not work with atomic mode!");
- return curTick;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
-{
- panic("O3CPU doesn't expect recvFunctional callback!");
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvStatusChange(Status status)
-{
- if (status == RangeChange)
- return;
-
- panic("O3CPU doesn't expect recvStatusChange callback!");
-}
-
-template <class Impl>
-bool
-LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt)
-{
- lsq->completeDataAccess(pkt);
- return true;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvRetry()
-{
- lsq->recvRetry();
-}
-
-template <class Impl>
LSQUnit<Impl>::LSQUnit()
: loads(0), stores(0), storesToWB(0), stalled(false),
isStoreBlocked(false), isLoadBlocked(false),
@@ -144,13 +106,15 @@ LSQUnit<Impl>::LSQUnit()
template<class Impl>
void
-LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
+LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
unsigned maxSQEntries, unsigned id)
{
DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
switchedOut = false;
+ lsq = lsq_ptr;
+
lsqID = id;
// Add 1 for the sentinel entry (they are circular queues).
@@ -167,8 +131,6 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
usedPorts = 0;
cachePorts = params->cachePorts;
- mem = params->mem;
-
memDepViolator = NULL;
blockedLoadSeqNum = 0;
@@ -179,11 +141,6 @@ void
LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr)
{
cpu = cpu_ptr;
- dcachePort = new DcachePort(cpu, this);
-
- Port *mem_dport = mem->getPort("");
- dcachePort->setPeer(mem_dport);
- mem_dport->setPeer(dcachePort);
#if USE_CHECKER
if (cpu->checker) {
@@ -591,7 +548,7 @@ LSQUnit<Impl>::writebackStores()
storeQueue[storeWBIdx].canWB &&
usedPorts < cachePorts) {
- if (isStoreBlocked) {
+ if (isStoreBlocked || lsq->cacheBlocked()) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
" is blocked!\n");
break;
@@ -833,6 +790,7 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
// Squashed instructions do not need to complete their access.
if (inst->isSquashed()) {
+ iewStage->decrWb(inst->seqNum);
assert(!inst->isStore());
++lsqIgnoredResponses;
return;
@@ -914,6 +872,7 @@ LSQUnit<Impl>::recvRetry()
} else {
// Still blocked!
++lsqCacheBlocked;
+ lsq->setRetryTid(lsqID);
}
} else if (isLoadBlocked) {
DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, "
diff --git a/src/cpu/o3/params.hh b/src/cpu/o3/params.hh
index 69a1bb937..1c234bcd7 100755
--- a/src/cpu/o3/params.hh
+++ b/src/cpu/o3/params.hh
@@ -47,6 +47,18 @@ class O3Params : public BaseO3CPU::Params
unsigned activity;
//
+ // Pointers to key objects
+ //
+#if !FULL_SYSTEM
+ std::vector<Process *> workload;
+ Process *process;
+#endif // FULL_SYSTEM
+
+ MemObject *mem;
+
+ BaseCPU *checker;
+
+ //
// Caches
//
// MemInterface *icacheInterface;
@@ -86,7 +98,10 @@ class O3Params : public BaseO3CPU::Params
unsigned commitToIEWDelay;
unsigned renameToIEWDelay;
unsigned issueToExecuteDelay;
+ unsigned dispatchWidth;
unsigned issueWidth;
+ unsigned wbWidth;
+ unsigned wbDepth;
FUPool *fuPool;
//
@@ -100,6 +115,12 @@ class O3Params : public BaseO3CPU::Params
Tick fetchTrapLatency;
//
+ // Timebuffer sizes
+ //
+ unsigned backComSize;
+ unsigned forwardComSize;
+
+ //
// Branch predictor (BP, BTB, RAS)
//
std::string predType;
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index 6972f055f..b6677b4b1 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -86,10 +86,6 @@ class PhysRegFile
//The duplication is unfortunate but it's better than having
//different ways to access certain registers.
- //Add these in later when everything else is in place
-// void serialize(std::ostream &os);
-// void unserialize(Checkpoint *cp, const std::string &section);
-
/** Reads an integer register. */
uint64_t readIntReg(PhysRegIndex reg_idx)
{
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 581fc8f81..034087feb 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -157,12 +157,15 @@ class DefaultRename
/** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *_scoreboard);
+ /** Drains the rename stage. */
+ bool drain();
+
+ /** Resumes execution after a drain. */
+ void resume() { }
+
/** Switches out the rename stage. */
void switchOut();
- /** Completes the switch out. */
- void doSwitchOut();
-
/** Takes over from another CPU's thread. */
void takeOverFrom();
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index df8b7f9da..805a72808 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -257,16 +257,17 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
}
template <class Impl>
-void
-DefaultRename<Impl>::switchOut()
+bool
+DefaultRename<Impl>::drain()
{
// Rename is ready to switch out at any time.
- cpu->signalSwitched();
+ cpu->signalDrained();
+ return true;
}
template <class Impl>
void
-DefaultRename<Impl>::doSwitchOut()
+DefaultRename<Impl>::switchOut()
{
// Clear any state, fix up the rename map.
for (int i = 0; i < numThreads; i++) {
diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh
index 6f8080ef4..7cd5a5143 100644
--- a/src/cpu/o3/rob.hh
+++ b/src/cpu/o3/rob.hh
@@ -308,7 +308,7 @@ class ROB
private:
/** The sequence number of the squashed instruction. */
- InstSeqNum squashedSeqNum;
+ InstSeqNum squashedSeqNum[Impl::MaxThreads];
/** Is the ROB done squashing. */
bool doneSquashing[Impl::MaxThreads];
diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh
index d9978b17f..1b9f666b8 100644
--- a/src/cpu/o3/rob_impl.hh
+++ b/src/cpu/o3/rob_impl.hh
@@ -41,10 +41,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
: numEntries(_numEntries),
squashWidth(_squashWidth),
numInstsInROB(0),
- squashedSeqNum(0),
numThreads(_numThreads)
{
for (int tid=0; tid < numThreads; tid++) {
+ squashedSeqNum[tid] = 0;
doneSquashing[tid] = true;
threadEntries[tid] = 0;
}
@@ -352,11 +352,11 @@ void
ROB<Impl>::doSquash(unsigned tid)
{
DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n",
- tid, squashedSeqNum);
+ tid, squashedSeqNum[tid]);
assert(squashIt[tid] != instList[tid].end());
- if ((*squashIt[tid])->seqNum < squashedSeqNum) {
+ if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
@@ -371,7 +371,7 @@ ROB<Impl>::doSquash(unsigned tid)
for (int numSquashed = 0;
numSquashed < squashWidth &&
squashIt[tid] != instList[tid].end() &&
- (*squashIt[tid])->seqNum > squashedSeqNum;
+ (*squashIt[tid])->seqNum > squashedSeqNum[tid];
++numSquashed)
{
DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n",
@@ -408,7 +408,7 @@ ROB<Impl>::doSquash(unsigned tid)
// Check if ROB is done squashing.
- if ((*squashIt[tid])->seqNum <= squashedSeqNum) {
+ if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
@@ -520,7 +520,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid)
doneSquashing[tid] = false;
- squashedSeqNum = squash_num;
+ squashedSeqNum[tid] = squash_num;
if (!instList[tid].empty()) {
InstIt tail_thread = instList[tid].end();
@@ -544,6 +544,7 @@ ROB<Impl>::readHeadInst()
}
}
*/
+
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::readHeadInst(unsigned tid)
@@ -558,6 +559,7 @@ ROB<Impl>::readHeadInst(unsigned tid)
return dummyInst;
}
}
+
/*
template <class Impl>
uint64_t
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index d097ee63e..df8d1a6d8 100755
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -112,7 +112,7 @@ class O3ThreadContext : public ThreadContext
virtual void suspend();
/** Set the status to Unallocated. */
- virtual void deallocate();
+ virtual void deallocate(int delay = 0);
/** Set the status to Halted. */
virtual void halt();
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index cfb71f623..bf8cbf850 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -115,7 +115,8 @@ template <class Impl>
void
O3ThreadContext<Impl>::activate(int delay)
{
- DPRINTF(O3CPU, "Calling activate on AlphaTC\n");
+ DPRINTF(O3CPU, "Calling activate on Thread Context %d\n",
+ getThreadNum());
if (thread->status() == ThreadContext::Active)
return;
@@ -139,7 +140,8 @@ template <class Impl>
void
O3ThreadContext<Impl>::suspend()
{
- DPRINTF(O3CPU, "Calling suspend on AlphaTC\n");
+ DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n",
+ getThreadNum());
if (thread->status() == ThreadContext::Suspended)
return;
@@ -163,22 +165,24 @@ O3ThreadContext<Impl>::suspend()
template <class Impl>
void
-O3ThreadContext<Impl>::deallocate()
+O3ThreadContext<Impl>::deallocate(int delay)
{
- DPRINTF(O3CPU, "Calling deallocate on AlphaTC\n");
+ DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n",
+ getThreadNum());
if (thread->status() == ThreadContext::Unallocated)
return;
thread->setStatus(ThreadContext::Unallocated);
- cpu->deallocateContext(thread->readTid());
+ cpu->deallocateContext(thread->readTid(), delay);
}
template <class Impl>
void
O3ThreadContext<Impl>::halt()
{
- DPRINTF(O3CPU, "Calling halt on AlphaTC\n");
+ DPRINTF(O3CPU, "Calling halt on Thread Context %d\n",
+ getThreadNum());
if (thread->status() == ThreadContext::Halted)
return;