summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/alpha/cpu_builder.cc12
-rw-r--r--src/cpu/o3/alpha/thread_context.hh15
-rw-r--r--src/cpu/o3/commit.hh14
-rw-r--r--src/cpu/o3/commit_impl.hh30
-rw-r--r--src/cpu/o3/cpu.cc333
-rw-r--r--src/cpu/o3/cpu.hh61
-rw-r--r--src/cpu/o3/decode.hh2
-rw-r--r--src/cpu/o3/decode_impl.hh3
-rw-r--r--src/cpu/o3/fetch.hh5
-rw-r--r--src/cpu/o3/fetch_impl.hh28
-rw-r--r--src/cpu/o3/iew.hh5
-rw-r--r--src/cpu/o3/iew_impl.hh8
-rw-r--r--src/cpu/o3/lsq.hh7
-rw-r--r--src/cpu/o3/lsq_impl.hh3
-rw-r--r--src/cpu/o3/lsq_unit.hh5
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh4
-rwxr-xr-xsrc/cpu/o3/params.hh6
-rw-r--r--src/cpu/o3/regfile.hh4
-rw-r--r--src/cpu/o3/rename.hh2
-rw-r--r--src/cpu/o3/rename_impl.hh3
-rw-r--r--src/cpu/o3/rob.hh2
-rw-r--r--src/cpu/o3/rob_impl.hh14
-rwxr-xr-xsrc/cpu/o3/thread_context.hh2
-rwxr-xr-xsrc/cpu/o3/thread_context_impl.hh16
24 files changed, 385 insertions, 199 deletions
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index d61eee4b1..5e767655d 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -102,7 +102,9 @@ Param<unsigned> renameToROBDelay;
Param<unsigned> commitWidth;
Param<unsigned> squashWidth;
Param<Tick> trapLatency;
-Param<Tick> fetchTrapLatency;
+
+Param<unsigned> backComSize;
+Param<unsigned> forwardComSize;
Param<std::string> predType;
Param<unsigned> localPredictorSize;
@@ -222,7 +224,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
INIT_PARAM(commitWidth, "Commit width"),
INIT_PARAM(squashWidth, "Squash width"),
INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
- INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
+
+ INIT_PARAM(backComSize, "Time buffer size for backwards communication"),
+ INIT_PARAM(forwardComSize, "Time buffer size for forward communication"),
INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
INIT_PARAM(localPredictorSize, "Size of local predictor"),
@@ -350,7 +354,9 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->commitWidth = commitWidth;
params->squashWidth = squashWidth;
params->trapLatency = trapLatency;
- params->fetchTrapLatency = fetchTrapLatency;
+
+ params->backComSize = backComSize;
+ params->forwardComSize = forwardComSize;
params->predType = predType;
params->localPredictorSize = localPredictorSize;
diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh
index 78b0ee788..ad52b0d2e 100644
--- a/src/cpu/o3/alpha/thread_context.hh
+++ b/src/cpu/o3/alpha/thread_context.hh
@@ -70,18 +70,19 @@ class AlphaTC : public O3ThreadContext<Impl>
{ panic("Not supported on Alpha!"); }
- // This function exits the thread context in the CPU and returns
- // 1 if the CPU has no more active threads (meaning it's OK to exit);
- // Used in syscall-emulation mode when a thread executes the 'exit'
- // syscall.
+ /** This function exits the thread context in the CPU and returns
+ * 1 if the CPU has no more active threads (meaning it's OK to exit);
+ * Used in syscall-emulation mode when a thread executes the 'exit'
+ * syscall.
+ */
virtual int exit()
{
- this->cpu->deallocateContext(this->thread->readTid());
+ this->deallocate();
// If there are still threads executing in the system
if (this->cpu->numActiveThreads())
- return 0;
+ return 0; // don't exit simulation
else
- return 1;
+ return 1; // exit simulation
}
};
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 49ff5cdad..956b6ec3e 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -162,10 +162,6 @@ class DefaultCommit
/** Sets the pointer to the queue coming from IEW. */
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
- void setFetchStage(Fetch *fetch_stage);
-
- Fetch *fetchStage;
-
/** Sets the pointer to the IEW stage. */
void setIEWStage(IEW *iew_stage);
@@ -188,7 +184,7 @@ class DefaultCommit
void initStage();
/** Initializes the draining of commit. */
- void drain();
+ bool drain();
/** Resumes execution after draining. */
void resume();
@@ -335,10 +331,6 @@ class DefaultCommit
/** Vector of all of the threads. */
std::vector<Thread *> thread;
- Fault fetchFault;
-
- int fetchTrapWait;
-
/** Records that commit has written to the time buffer this cycle. Used for
* the CPU to determine if it can deschedule itself if there is no activity.
*/
@@ -397,10 +389,6 @@ class DefaultCommit
*/
Tick trapLatency;
- Tick fetchTrapLatency;
-
- Tick fetchFaultTick;
-
/** The commit PC of each thread. Refers to the instruction that
* is currently being processed/committed.
*/
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 2eb05afac..904af1071 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -82,8 +82,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
numThreads(params->numberOfThreads),
drainPending(false),
switchedOut(false),
- trapLatency(params->trapLatency),
- fetchTrapLatency(params->fetchTrapLatency)
+ trapLatency(params->trapLatency)
{
_status = Active;
_nextStatus = Inactive;
@@ -123,9 +122,6 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
tcSquash[i] = false;
PC[i] = nextPC[i] = 0;
}
-
- fetchFaultTick = 0;
- fetchTrapWait = 0;
}
template <class Impl>
@@ -235,7 +231,6 @@ DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr)
cpu->activateStage(O3CPU::CommitIdx);
trapLatency = cpu->cycles(trapLatency);
- fetchTrapLatency = cpu->cycles(fetchTrapLatency);
}
template <class Impl>
@@ -294,13 +289,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
template <class Impl>
void
-DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage)
-{
- fetchStage = fetch_stage;
-}
-
-template <class Impl>
-void
DefaultCommit<Impl>::setIEWStage(IEW *iew_stage)
{
iewStage = iew_stage;
@@ -350,10 +338,18 @@ DefaultCommit<Impl>::initStage()
}
template <class Impl>
-void
+bool
DefaultCommit<Impl>::drain()
{
drainPending = true;
+
+ // If it's already drained, return true.
+ if (rob->isEmpty() && !iewStage->hasStoresToWB()) {
+ cpu->signalDrained();
+ return true;
+ }
+
+ return false;
}
template <class Impl>
@@ -369,6 +365,7 @@ template <class Impl>
void
DefaultCommit<Impl>::resume()
{
+ drainPending = false;
}
template <class Impl>
@@ -569,6 +566,9 @@ DefaultCommit<Impl>::tick()
return;
}
+ if ((*activeThreads).size() <= 0)
+ return;
+
list<unsigned>::iterator threads = (*activeThreads).begin();
// Check if any of the threads are done squashing. Change the
@@ -582,7 +582,7 @@ DefaultCommit<Impl>::tick()
commitStatus[tid] = Running;
} else {
DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any"
- "insts this cycle.\n", tid);
+ " insts this cycle.\n", tid);
rob->doSquash(tid);
toIEW->commitInfo[tid].robSquashing = true;
wroteToTimeBuffer = true;
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index b182d5ca7..7d2727401 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -115,6 +115,36 @@ FullO3CPU<Impl>::ActivateThreadEvent::description()
}
template <class Impl>
+FullO3CPU<Impl>::DeallocateContextEvent::DeallocateContextEvent()
+ : Event(&mainEventQueue, CPU_Tick_Pri)
+{
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::DeallocateContextEvent::init(int thread_num,
+ FullO3CPU<Impl> *thread_cpu)
+{
+ tid = thread_num;
+ cpu = thread_cpu;
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::DeallocateContextEvent::process()
+{
+ cpu->deactivateThread(tid);
+ cpu->removeThread(tid);
+}
+
+template <class Impl>
+const char *
+FullO3CPU<Impl>::DeallocateContextEvent::description()
+{
+ return "FullO3CPU \"Deallocate Context\" event";
+}
+
+template <class Impl>
FullO3CPU<Impl>::FullO3CPU(Params *params)
: BaseO3CPU(params),
tickEvent(this),
@@ -141,15 +171,14 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
TheISA::NumMiscRegs * number_of_threads,
TheISA::ZeroReg),
- // For now just have these time buffers be pretty big.
- // @todo: Make these time buffer sizes parameters or derived
- // from latencies
- timeBuffer(5, 5),
- fetchQueue(5, 5),
- decodeQueue(5, 5),
- renameQueue(5, 5),
- iewQueue(5, 5),
- activityRec(NumStages, 10, params->activity),
+ timeBuffer(params->backComSize, params->forwardComSize),
+ fetchQueue(params->backComSize, params->forwardComSize),
+ decodeQueue(params->backComSize, params->forwardComSize),
+ renameQueue(params->backComSize, params->forwardComSize),
+ iewQueue(params->backComSize, params->forwardComSize),
+ activityRec(NumStages,
+ params->backComSize + params->forwardComSize,
+ params->activity),
globalSeqNum(1),
@@ -214,7 +243,6 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
commit.setIEWQueue(&iewQueue);
commit.setRenameQueue(&renameQueue);
- commit.setFetchStage(&fetch);
commit.setIEWStage(&iew);
rename.setIEWStage(&iew);
rename.setCommitStage(&commit);
@@ -361,6 +389,18 @@ FullO3CPU<Impl>::fullCPURegStats()
}
template <class Impl>
+Port *
+FullO3CPU<Impl>::getPort(const std::string &if_name, int idx)
+{
+ if (if_name == "dcache_port")
+ return iew.getDcachePort();
+ else if (if_name == "icache_port")
+ return fetch.getIcachePort();
+ else
+ panic("No Such Port\n");
+}
+
+template <class Impl>
void
FullO3CPU<Impl>::tick()
{
@@ -400,7 +440,8 @@ FullO3CPU<Impl>::tick()
}
if (!tickEvent.scheduled()) {
- if (_status == SwitchedOut) {
+ if (_status == SwitchedOut ||
+ getState() == SimObject::DrainedTiming) {
// increment stat
lastRunningCycle = curTick;
} else if (!activityRec.active()) {
@@ -461,6 +502,118 @@ FullO3CPU<Impl>::init()
template <class Impl>
void
+FullO3CPU<Impl>::activateThread(unsigned tid)
+{
+ list<unsigned>::iterator isActive = find(
+ activeThreads.begin(), activeThreads.end(), tid);
+
+ if (isActive == activeThreads.end()) {
+ DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n",
+ tid);
+
+ activeThreads.push_back(tid);
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::deactivateThread(unsigned tid)
+{
+ //Remove From Active List, if Active
+ list<unsigned>::iterator thread_it =
+ find(activeThreads.begin(), activeThreads.end(), tid);
+
+ if (thread_it != activeThreads.end()) {
+ DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
+ tid);
+ activeThreads.erase(thread_it);
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::activateContext(int tid, int delay)
+{
+ // Needs to set each stage to running as well.
+ if (delay){
+ DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate "
+ "on cycle %d\n", tid, curTick + cycles(delay));
+ scheduleActivateThreadEvent(tid, delay);
+ } else {
+ activateThread(tid);
+ }
+
+ if(lastActivatedCycle < curTick) {
+ scheduleTickEvent(delay);
+
+ // Be sure to signal that there's some activity so the CPU doesn't
+ // deschedule itself.
+ activityRec.activity();
+ fetch.wakeFromQuiesce();
+
+ lastActivatedCycle = curTick;
+
+ _status = Running;
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::deallocateContext(int tid, int delay)
+{
+ // Schedule removal of thread data from CPU
+ if (delay){
+ DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate "
+ "on cycle %d\n", tid, curTick + cycles(delay));
+ scheduleDeallocateContextEvent(tid, delay);
+ } else {
+ deactivateThread(tid);
+ removeThread(tid);
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::suspendContext(int tid)
+{
+ DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid);
+ unscheduleTickEvent();
+ _status = Idle;
+/*
+ //Remove From Active List, if Active
+ list<unsigned>::iterator isActive = find(
+ activeThreads.begin(), activeThreads.end(), tid);
+
+ if (isActive != activeThreads.end()) {
+ DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
+ tid);
+ activeThreads.erase(isActive);
+ }
+*/
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::haltContext(int tid)
+{
+ DPRINTF(O3CPU,"[tid:%i]: Halting Thread Context", tid);
+/*
+ //Remove From Active List, if Active
+ list<unsigned>::iterator isActive = find(
+ activeThreads.begin(), activeThreads.end(), tid);
+
+ if (isActive != activeThreads.end()) {
+ DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
+ tid);
+ activeThreads.erase(isActive);
+
+ removeThread(tid);
+ }
+*/
+}
+
+template <class Impl>
+void
FullO3CPU<Impl>::insertThread(unsigned tid)
{
DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU");
@@ -511,7 +664,7 @@ template <class Impl>
void
FullO3CPU<Impl>::removeThread(unsigned tid)
{
- DPRINTF(O3CPU,"[tid:%i] Removing thread from CPU.");
+ DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid);
// Copy Thread Data From RegFile
// If thread is suspended, it might be re-allocated
@@ -537,6 +690,8 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
fetch.squash(0,tid);
decode.squash(tid);
rename.squash(tid);
+ iew.squash(tid);
+ commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid);
assert(iew.ldstQueue.getCount(tid) == 0);
@@ -600,6 +755,7 @@ FullO3CPU<Impl>::activateWhenReady(int tid)
//blocks fetch
contextSwitch = true;
+ //@todo: dont always add to waitlist
//do waitlist
cpuWaitList.push_back(tid);
}
@@ -607,112 +763,50 @@ FullO3CPU<Impl>::activateWhenReady(int tid)
template <class Impl>
void
-FullO3CPU<Impl>::activateThread(unsigned int tid)
-{
- list<unsigned>::iterator isActive = find(
- activeThreads.begin(), activeThreads.end(), tid);
-
- if (isActive == activeThreads.end()) {
- DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n",
- tid);
-
- activeThreads.push_back(tid);
+FullO3CPU<Impl>::serialize(std::ostream &os)
+{
+ SERIALIZE_ENUM(_status);
+ BaseCPU::serialize(os);
+ nameOut(os, csprintf("%s.tickEvent", name()));
+ tickEvent.serialize(os);
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // write out the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static SimpleThread temp;
+
+ for (int i = 0; i < thread.size(); i++) {
+ nameOut(os, csprintf("%s.xc.%i", name(), i));
+ temp.copyTC(thread[i]->getTC());
+ temp.serialize(os);
}
}
-
template <class Impl>
void
-FullO3CPU<Impl>::activateContext(int tid, int delay)
+FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
{
- // Needs to set each stage to running as well.
- if (delay){
- DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate "
- "on cycle %d\n", tid, curTick + cycles(delay));
- scheduleActivateThreadEvent(tid, delay);
- } else {
- activateThread(tid);
- }
-
- if(lastActivatedCycle < curTick) {
- scheduleTickEvent(delay);
-
- // Be sure to signal that there's some activity so the CPU doesn't
- // deschedule itself.
- activityRec.activity();
- fetch.wakeFromQuiesce();
+ UNSERIALIZE_ENUM(_status);
+ BaseCPU::unserialize(cp, section);
+ tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
- lastActivatedCycle = curTick;
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // read in the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static SimpleThread temp;
- _status = Running;
+ for (int i = 0; i < thread.size(); i++) {
+ temp.copyTC(thread[i]->getTC());
+ temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
+ thread[i]->getTC()->copyArchRegs(temp.getTC());
}
}
template <class Impl>
-void
-FullO3CPU<Impl>::suspendContext(int tid)
-{
- DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid);
- unscheduleTickEvent();
- _status = Idle;
-/*
- //Remove From Active List, if Active
- list<unsigned>::iterator isActive = find(
- activeThreads.begin(), activeThreads.end(), tid);
-
- if (isActive != activeThreads.end()) {
- DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
- tid);
- activeThreads.erase(isActive);
- }
-*/
-}
-
-template <class Impl>
-void
-FullO3CPU<Impl>::deallocateContext(int tid)
-{
- DPRINTF(O3CPU,"[tid:%i]: Deallocating Thread Context", tid);
-
- //Remove From Active List, if Active
- list<unsigned>::iterator thread_it =
- find(activeThreads.begin(), activeThreads.end(), tid);
-
- if (thread_it != activeThreads.end()) {
- DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
- tid);
- activeThreads.erase(thread_it);
-
- removeThread(tid);
- }
-}
-
-template <class Impl>
-void
-FullO3CPU<Impl>::haltContext(int tid)
-{
- DPRINTF(O3CPU,"[tid:%i]: Halting Thread Context", tid);
-/*
- //Remove From Active List, if Active
- list<unsigned>::iterator isActive = find(
- activeThreads.begin(), activeThreads.end(), tid);
-
- if (isActive != activeThreads.end()) {
- DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
- tid);
- activeThreads.erase(isActive);
-
- removeThread(tid);
- }
-*/
-}
-
-template <class Impl>
bool
FullO3CPU<Impl>::drain(Event *drain_event)
{
drainCount = 0;
- drainEvent = drain_event;
fetch.drain();
decode.drain();
rename.drain();
@@ -720,28 +814,40 @@ FullO3CPU<Impl>::drain(Event *drain_event)
commit.drain();
// Wake the CPU and record activity so everything can drain out if
- // the CPU is currently idle.
- wakeCPU();
- activityRec.activity();
+ // the CPU was not able to immediately drain.
+ if (getState() != SimObject::DrainedTiming) {
+ // A bit of a hack...set the drainEvent after all the drain()
+ // calls have been made, that way if all of the stages drain
+ // immediately, the signalDrained() function knows not to call
+ // process on the drain event.
+ drainEvent = drain_event;
+
+ wakeCPU();
+ activityRec.activity();
- return false;
+ return false;
+ } else {
+ return true;
+ }
}
template <class Impl>
void
FullO3CPU<Impl>::resume()
{
- if (_status == SwitchedOut)
- return;
fetch.resume();
decode.resume();
rename.resume();
iew.resume();
commit.resume();
+ if (_status == SwitchedOut || _status == Idle)
+ return;
+
if (!tickEvent.scheduled())
tickEvent.schedule(curTick);
_status = Running;
+ changeState(SimObject::Timing);
}
template <class Impl>
@@ -751,8 +857,13 @@ FullO3CPU<Impl>::signalDrained()
if (++drainCount == NumStages) {
if (tickEvent.scheduled())
tickEvent.squash();
- _status = Drained;
- drainEvent->process();
+
+ changeState(SimObject::DrainedTiming);
+
+ if (drainEvent) {
+ drainEvent->process();
+ drainEvent = NULL;
+ }
}
assert(drainCount <= 5);
}
@@ -781,7 +892,7 @@ void
FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
// Flush out any old data from the time buffers.
- for (int i = 0; i < 10; ++i) {
+ for (int i = 0; i < timeBuffer.getSize(); ++i) {
timeBuffer.advance();
fetchQueue.advance();
decodeQueue.advance();
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index cf3747601..2fbd013ac 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -111,7 +111,6 @@ class FullO3CPU : public BaseO3CPU
Idle,
Halted,
Blocked,
- Drained,
SwitchedOut
};
@@ -200,6 +199,49 @@ class FullO3CPU : public BaseO3CPU
/** The tick event used for scheduling CPU ticks. */
ActivateThreadEvent activateThreadEvent[Impl::MaxThreads];
+ class DeallocateContextEvent : public Event
+ {
+ private:
+ /** Number of Thread to Activate */
+ int tid;
+
+ /** Pointer to the CPU. */
+ FullO3CPU<Impl> *cpu;
+
+ public:
+ /** Constructs the event. */
+ DeallocateContextEvent();
+
+ /** Initialize Event */
+ void init(int thread_num, FullO3CPU<Impl> *thread_cpu);
+
+ /** Processes the event, calling activateThread() on the CPU. */
+ void process();
+
+ /** Returns the description of the event. */
+ const char *description();
+ };
+
+ /** Schedule cpu to deallocate thread context.*/
+ void scheduleDeallocateContextEvent(int tid, int delay)
+ {
+ // Schedule thread to activate, regardless of its current state.
+ if (deallocateContextEvent[tid].squashed())
+ deallocateContextEvent[tid].reschedule(curTick + cycles(delay));
+ else if (!deallocateContextEvent[tid].scheduled())
+ deallocateContextEvent[tid].schedule(curTick + cycles(delay));
+ }
+
+ /** Unschedule thread deallocation in CPU */
+ void unscheduleDeallocateContextEvent(int tid)
+ {
+ if (deallocateContextEvent[tid].scheduled())
+ deallocateContextEvent[tid].squash();
+ }
+
+ /** The tick event used for scheduling CPU ticks. */
+ DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads];
+
public:
/** Constructs a CPU with the given parameters. */
FullO3CPU(Params *params);
@@ -209,6 +251,9 @@ class FullO3CPU : public BaseO3CPU
/** Registers statistics. */
void fullCPURegStats();
+ /** Returns a specific port. */
+ Port *getPort(const std::string &if_name, int idx);
+
/** Ticks CPU, calling tick() on each stage, and checking the overall
* activity to see if the CPU should deschedule itself.
*/
@@ -222,7 +267,10 @@ class FullO3CPU : public BaseO3CPU
{ return activeThreads.size(); }
/** Add Thread to Active Threads List */
- void activateThread(unsigned int tid);
+ void activateThread(unsigned tid);
+
+ /** Remove Thread from Active Threads List */
+ void deactivateThread(unsigned tid);
/** Setup CPU to insert a thread's context */
void insertThread(unsigned tid);
@@ -250,7 +298,7 @@ class FullO3CPU : public BaseO3CPU
/** Remove Thread from Active Threads List &&
* Remove Thread Context from CPU.
*/
- void deallocateContext(int tid);
+ void deallocateContext(int tid, int delay = 1);
/** Remove Thread from Active Threads List &&
* Remove Thread Context from CPU.
@@ -266,6 +314,13 @@ class FullO3CPU : public BaseO3CPU
/** Update The Order In Which We Process Threads. */
void updateThreadPriority();
+ /** Serialize state. */
+ virtual void serialize(std::ostream &os);
+
+ /** Unserialize from a checkpoint. */
+ virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+ public:
/** Executes a syscall on this cycle.
* ---------------------------------------
* Note: this is a virtual function. CPU-Specific
diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh
index 1e96f1884..7f5ecbc26 100644
--- a/src/cpu/o3/decode.hh
+++ b/src/cpu/o3/decode.hh
@@ -110,7 +110,7 @@ class DefaultDecode
void setActiveThreads(std::list<unsigned> *at_ptr);
/** Drains the decode stage. */
- void drain();
+ bool drain();
/** Resumes execution after a drain. */
void resume() { }
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 71637883b..8b851c032 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -165,11 +165,12 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr)
}
template <class Impl>
-void
+bool
DefaultDecode<Impl>::drain()
{
// Decode is done draining at any time.
cpu->signalDrained();
+ return true;
}
template <class Impl>
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 9611f0455..85654cebc 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -162,6 +162,9 @@ class DefaultFetch
/** Registers statistics. */
void regStats();
+ /** Returns the icache port. */
+ Port *getIcachePort() { return icachePort; }
+
/** Sets CPU pointer. */
void setCPU(O3CPU *cpu_ptr);
@@ -181,7 +184,7 @@ class DefaultFetch
void processCacheCompletion(PacketPtr pkt);
/** Begins the drain of the fetch stage. */
- void drain();
+ bool drain();
/** Resumes execution after a drain. */
void resume();
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 500b5304e..de883b5ba 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -280,10 +280,6 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
// Name is finally available, so create the port.
icachePort = new IcachePort(this);
- Port *mem_dport = mem->getPort("");
- icachePort->setPeer(mem_dport);
- mem_dport->setPeer(icachePort);
-
#if USE_CHECKER
if (cpu->checker) {
cpu->checker->setIcachePort(icachePort);
@@ -354,22 +350,23 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
// to return.
if (fetchStatus[tid] != IcacheWaitResponse ||
pkt->req != memReq[tid] ||
- isSwitchedOut() ||
- drainPending) {
+ isSwitchedOut()) {
++fetchIcacheSquashes;
delete pkt->req;
delete pkt;
return;
}
- // Wake up the CPU (if it went to sleep and was waiting on this completion
- // event).
- cpu->wakeCPU();
+ if (!drainPending) {
+ // Wake up the CPU (if it went to sleep and was waiting on
+ // this completion event).
+ cpu->wakeCPU();
- DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
- tid);
+ DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
+ tid);
- switchToActive();
+ switchToActive();
+ }
// Only switch to IcacheAccessComplete if we're not stalled as well.
if (checkStall(tid)) {
@@ -385,12 +382,13 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
}
template <class Impl>
-void
+bool
DefaultFetch<Impl>::drain()
{
// Fetch is ready to drain at any time.
cpu->signalDrained();
drainPending = true;
+ return true;
}
template <class Impl>
@@ -508,7 +506,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
- if (cacheBlocked || (interruptPending && flags == 0) || drainPending) {
+ if (cacheBlocked || (interruptPending && flags == 0)) {
// Hold off fetch from getting new instructions when:
// Cache is blocked, or
// while an interrupt is pending and we're not in PAL mode, or
@@ -908,7 +906,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
//////////////////////////////////////////
int tid = getFetchingThread(fetchPolicy);
- if (tid == -1) {
+ if (tid == -1 || drainPending) {
DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
// Breaks looping condition in tick()
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 774b6dcbd..fb9afde54 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -125,6 +125,9 @@ class DefaultIEW
/** Initializes stage; sends back the number of free IQ and LSQ entries. */
void initStage();
+ /** Returns the dcache port. */
+ Port *getDcachePort() { return ldstQueue.getDcachePort(); }
+
/** Sets CPU pointer for IEW, IQ, and LSQ. */
void setCPU(O3CPU *cpu_ptr);
@@ -144,7 +147,7 @@ class DefaultIEW
void setScoreboard(Scoreboard *sb_ptr);
/** Drains IEW stage. */
- void drain();
+ bool drain();
/** Resumes execution after a drain. */
void resume();
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index c3aa748ae..684ae2295 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -42,8 +42,7 @@ using namespace std;
template<class Impl>
DefaultIEW<Impl>::DefaultIEW(Params *params)
- : // @todo: Make this into a parameter.
- issueToExecQueue(5, 5),
+ : issueToExecQueue(params->backComSize, params->forwardComSize),
instQueue(params),
ldstQueue(params),
fuPool(params->fuPool),
@@ -354,11 +353,12 @@ DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr)
}
template <class Impl>
-void
+bool
DefaultIEW<Impl>::drain()
{
// IEW is ready to drain at any time.
cpu->signalDrained();
+ return true;
}
template <class Impl>
@@ -412,7 +412,7 @@ DefaultIEW<Impl>::takeOverFrom()
updateLSQNextCycle = false;
// @todo: Fix hardcoded number
- for (int i = 0; i < 6; ++i) {
+ for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
issueToExecQueue.advance();
}
}
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 89791fec9..d5890950f 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -65,6 +65,13 @@ class LSQ {
/** Registers statistics of each LSQ unit. */
void regStats();
+ /** Returns dcache port.
+ * @todo: Dcache port needs to be moved up to this level for SMT
+ * to work. For now it just returns the port from one of the
+ * threads.
+ */
+ Port *getDcachePort() { return thread[0].getDcachePort(); }
+
/** Sets the pointer to the list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
/** Sets the CPU pointer. */
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 5173f8be1..89fd1a71d 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -502,6 +502,9 @@ LSQ<Impl>::hasStoresToWB()
{
list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ if ((*activeThreads).empty())
+ return false;
+
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
if (!hasStoresToWB(tid))
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 74b8fe5bb..4d7a8350b 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -77,6 +77,11 @@ class LSQUnit {
/** Returns the name of the LSQ unit. */
std::string name() const;
+ /** Returns the dcache port.
+ * @todo: Remove this once the port moves up to the LSQ level.
+ */
+ Port *getDcachePort() { return dcachePort; }
+
/** Registers statistics. */
void regStats();
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index bb3da7eec..8e951534f 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -182,10 +182,6 @@ LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr)
cpu = cpu_ptr;
dcachePort = new DcachePort(cpu, this);
- Port *mem_dport = mem->getPort("");
- dcachePort->setPeer(mem_dport);
- mem_dport->setPeer(dcachePort);
-
#if USE_CHECKER
if (cpu->checker) {
cpu->checker->setDcachePort(dcachePort);
diff --git a/src/cpu/o3/params.hh b/src/cpu/o3/params.hh
index ed53fa97a..1c234bcd7 100755
--- a/src/cpu/o3/params.hh
+++ b/src/cpu/o3/params.hh
@@ -115,6 +115,12 @@ class O3Params : public BaseO3CPU::Params
Tick fetchTrapLatency;
//
+ // Timebuffer sizes
+ //
+ unsigned backComSize;
+ unsigned forwardComSize;
+
+ //
// Branch predictor (BP, BTB, RAS)
//
std::string predType;
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index 6972f055f..b6677b4b1 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -86,10 +86,6 @@ class PhysRegFile
//The duplication is unfortunate but it's better than having
//different ways to access certain registers.
- //Add these in later when everything else is in place
-// void serialize(std::ostream &os);
-// void unserialize(Checkpoint *cp, const std::string &section);
-
/** Reads an integer register. */
uint64_t readIntReg(PhysRegIndex reg_idx)
{
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 538dd9bb4..034087feb 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -158,7 +158,7 @@ class DefaultRename
void setScoreboard(Scoreboard *_scoreboard);
/** Drains the rename stage. */
- void drain();
+ bool drain();
/** Resumes execution after a drain. */
void resume() { }
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index fddbae3db..805a72808 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -257,11 +257,12 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
}
template <class Impl>
-void
+bool
DefaultRename<Impl>::drain()
{
// Rename is ready to switch out at any time.
cpu->signalDrained();
+ return true;
}
template <class Impl>
diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh
index 6f8080ef4..7cd5a5143 100644
--- a/src/cpu/o3/rob.hh
+++ b/src/cpu/o3/rob.hh
@@ -308,7 +308,7 @@ class ROB
private:
/** The sequence number of the squashed instruction. */
- InstSeqNum squashedSeqNum;
+ InstSeqNum squashedSeqNum[Impl::MaxThreads];
/** Is the ROB done squashing. */
bool doneSquashing[Impl::MaxThreads];
diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh
index d9978b17f..1b9f666b8 100644
--- a/src/cpu/o3/rob_impl.hh
+++ b/src/cpu/o3/rob_impl.hh
@@ -41,10 +41,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
: numEntries(_numEntries),
squashWidth(_squashWidth),
numInstsInROB(0),
- squashedSeqNum(0),
numThreads(_numThreads)
{
for (int tid=0; tid < numThreads; tid++) {
+ squashedSeqNum[tid] = 0;
doneSquashing[tid] = true;
threadEntries[tid] = 0;
}
@@ -352,11 +352,11 @@ void
ROB<Impl>::doSquash(unsigned tid)
{
DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n",
- tid, squashedSeqNum);
+ tid, squashedSeqNum[tid]);
assert(squashIt[tid] != instList[tid].end());
- if ((*squashIt[tid])->seqNum < squashedSeqNum) {
+ if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
@@ -371,7 +371,7 @@ ROB<Impl>::doSquash(unsigned tid)
for (int numSquashed = 0;
numSquashed < squashWidth &&
squashIt[tid] != instList[tid].end() &&
- (*squashIt[tid])->seqNum > squashedSeqNum;
+ (*squashIt[tid])->seqNum > squashedSeqNum[tid];
++numSquashed)
{
DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n",
@@ -408,7 +408,7 @@ ROB<Impl>::doSquash(unsigned tid)
// Check if ROB is done squashing.
- if ((*squashIt[tid])->seqNum <= squashedSeqNum) {
+ if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
@@ -520,7 +520,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid)
doneSquashing[tid] = false;
- squashedSeqNum = squash_num;
+ squashedSeqNum[tid] = squash_num;
if (!instList[tid].empty()) {
InstIt tail_thread = instList[tid].end();
@@ -544,6 +544,7 @@ ROB<Impl>::readHeadInst()
}
}
*/
+
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::readHeadInst(unsigned tid)
@@ -558,6 +559,7 @@ ROB<Impl>::readHeadInst(unsigned tid)
return dummyInst;
}
}
+
/*
template <class Impl>
uint64_t
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index d097ee63e..df8d1a6d8 100755
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -112,7 +112,7 @@ class O3ThreadContext : public ThreadContext
virtual void suspend();
/** Set the status to Unallocated. */
- virtual void deallocate();
+ virtual void deallocate(int delay = 0);
/** Set the status to Halted. */
virtual void halt();
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index cfb71f623..bf8cbf850 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -115,7 +115,8 @@ template <class Impl>
void
O3ThreadContext<Impl>::activate(int delay)
{
- DPRINTF(O3CPU, "Calling activate on AlphaTC\n");
+ DPRINTF(O3CPU, "Calling activate on Thread Context %d\n",
+ getThreadNum());
if (thread->status() == ThreadContext::Active)
return;
@@ -139,7 +140,8 @@ template <class Impl>
void
O3ThreadContext<Impl>::suspend()
{
- DPRINTF(O3CPU, "Calling suspend on AlphaTC\n");
+ DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n",
+ getThreadNum());
if (thread->status() == ThreadContext::Suspended)
return;
@@ -163,22 +165,24 @@ O3ThreadContext<Impl>::suspend()
template <class Impl>
void
-O3ThreadContext<Impl>::deallocate()
+O3ThreadContext<Impl>::deallocate(int delay)
{
- DPRINTF(O3CPU, "Calling deallocate on AlphaTC\n");
+ DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n",
+ getThreadNum());
if (thread->status() == ThreadContext::Unallocated)
return;
thread->setStatus(ThreadContext::Unallocated);
- cpu->deallocateContext(thread->readTid());
+ cpu->deallocateContext(thread->readTid(), delay);
}
template <class Impl>
void
O3ThreadContext<Impl>::halt()
{
- DPRINTF(O3CPU, "Calling halt on AlphaTC\n");
+ DPRINTF(O3CPU, "Calling halt on Thread Context %d\n",
+ getThreadNum());
if (thread->status() == ThreadContext::Halted)
return;