summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitch Hayenga <mitch.hayenga@arm.com>2016-07-21 17:19:16 +0100
committerMitch Hayenga <mitch.hayenga@arm.com>2016-07-21 17:19:16 +0100
commitff4009ac005be0347015f8ba5a8e37a3aa930e69 (patch)
treeb80cfa7c70c0e39f54c8c3d78527722cb6658510
parent8a476d387c84f037d0ccf3cc20dc88870ab45fec (diff)
downloadgem5-ff4009ac005be0347015f8ba5a8e37a3aa930e69.tar.xz
cpu: Add SMT support to MinorCPU
This patch adds SMT support to the MinorCPU. Currently RoundRobin or Random thread scheduling are supported. Change-Id: I91faf39ff881af5918cca05051829fc6261f20e3
-rw-r--r--src/cpu/minor/MinorCPU.py4
-rw-r--r--src/cpu/minor/cpu.cc59
-rw-r--r--src/cpu/minor/cpu.hh23
-rw-r--r--src/cpu/minor/decode.cc161
-rw-r--r--src/cpu/minor/decode.hh63
-rw-r--r--src/cpu/minor/dyn_inst.cc6
-rw-r--r--src/cpu/minor/exec_context.hh13
-rw-r--r--src/cpu/minor/execute.cc668
-rw-r--r--src/cpu/minor/execute.hh128
-rw-r--r--src/cpu/minor/fetch1.cc273
-rw-r--r--src/cpu/minor/fetch1.hh73
-rw-r--r--src/cpu/minor/fetch2.cc251
-rw-r--r--src/cpu/minor/fetch2.hh121
-rw-r--r--src/cpu/minor/lsq.cc56
-rw-r--r--src/cpu/minor/lsq.hh8
-rw-r--r--src/cpu/minor/pipe_data.cc9
-rw-r--r--src/cpu/minor/pipe_data.hh15
-rw-r--r--src/cpu/minor/pipeline.cc9
-rw-r--r--src/cpu/minor/pipeline.hh2
-rw-r--r--src/sim/pseudo_inst.cc2
-rw-r--r--util/minorview/minor.pic12
21 files changed, 1255 insertions, 701 deletions
diff --git a/src/cpu/minor/MinorCPU.py b/src/cpu/minor/MinorCPU.py
index 9ab7b0b39..2c80af175 100644
--- a/src/cpu/minor/MinorCPU.py
+++ b/src/cpu/minor/MinorCPU.py
@@ -169,6 +169,8 @@ class MinorDefaultFUPool(MinorFUPool):
MinorDefaultFloatSimdFU(), MinorDefaultMemFU(),
MinorDefaultMiscFU()]
+class ThreadPolicy(Enum): vals = ['SingleThreaded', 'RoundRobin', 'Random']
+
class MinorCPU(BaseCPU):
type = 'MinorCPU'
cxx_header = "cpu/minor/cpu.hh"
@@ -185,6 +187,8 @@ class MinorCPU(BaseCPU):
def support_take_over(cls):
return True
+ threadPolicy = Param.ThreadPolicy('RoundRobin',
+ "Thread scheduling policy")
fetch1FetchLimit = Param.Unsigned(1,
"Number of line fetches allowable in flight at once")
fetch1LineSnapWidth = Param.Unsigned(0,
diff --git a/src/cpu/minor/cpu.cc b/src/cpu/minor/cpu.cc
index 79807a2a7..016a60f47 100644
--- a/src/cpu/minor/cpu.cc
+++ b/src/cpu/minor/cpu.cc
@@ -47,32 +47,33 @@
#include "debug/Quiesce.hh"
MinorCPU::MinorCPU(MinorCPUParams *params) :
- BaseCPU(params)
+ BaseCPU(params),
+ threadPolicy(params->threadPolicy)
{
/* This is only written for one thread at the moment */
Minor::MinorThread *thread;
- if (FullSystem) {
- thread = new Minor::MinorThread(this, 0, params->system, params->itb,
- params->dtb, params->isa[0]);
- } else {
- /* thread_id 0 */
- thread = new Minor::MinorThread(this, 0, params->system,
- params->workload[0], params->itb, params->dtb, params->isa[0]);
- }
-
- threads.push_back(thread);
+ for (ThreadID i = 0; i < numThreads; i++) {
+ if (FullSystem) {
+ thread = new Minor::MinorThread(this, i, params->system,
+ params->itb, params->dtb, params->isa[i]);
+ thread->setStatus(ThreadContext::Halted);
+ } else {
+ thread = new Minor::MinorThread(this, i, params->system,
+ params->workload[i], params->itb, params->dtb,
+ params->isa[i]);
+ }
- thread->setStatus(ThreadContext::Halted);
+ threads.push_back(thread);
+ ThreadContext *tc = thread->getTC();
+ threadContexts.push_back(tc);
+ }
- ThreadContext *tc = thread->getTC();
if (params->checker) {
fatal("The Minor model doesn't support checking (yet)\n");
}
- threadContexts.push_back(tc);
-
Minor::MinorDynInst::init();
pipeline = new Minor::Pipeline(*this, *params);
@@ -137,9 +138,6 @@ MinorCPU::serializeThread(CheckpointOut &cp, ThreadID thread_id) const
void
MinorCPU::unserializeThread(CheckpointIn &cp, ThreadID thread_id)
{
- if (thread_id != 0)
- fatal("Trying to load more than one thread into a MinorCPU\n");
-
threads[thread_id]->unserialize(cp);
}
@@ -170,11 +168,11 @@ void
MinorCPU::wakeup(ThreadID tid)
{
DPRINTF(Drain, "[tid:%d] MinorCPU wakeup\n", tid);
+ assert(tid < numThreads);
- if (threads[tid]->status() == ThreadContext::Suspended)
+ if (threads[tid]->status() == ThreadContext::Suspended) {
threads[tid]->activate();
-
- DPRINTF(Drain,"Suspended Processor awoke\n");
+ }
}
void
@@ -187,13 +185,10 @@ MinorCPU::startup()
for (auto i = threads.begin(); i != threads.end(); i ++)
(*i)->startup();
- /* Workaround cases in SE mode where a thread is activated with an
- * incorrect PC that is updated after the call to activate. This
- * causes problems for Minor since it instantiates a virtual
- * branch instruction when activateContext() is called which ends
- * up pointing to an illegal address. */
- if (threads[0]->status() == ThreadContext::Active)
- activateContext(0);
+ for (ThreadID tid = 0; tid < numThreads; tid++) {
+ threads[tid]->startup();
+ pipeline->wakeupFetch(tid);
+ }
}
DrainState
@@ -246,6 +241,7 @@ MinorCPU::drainResume()
for (ThreadID tid = 0; tid < numThreads; tid++)
wakeup(tid);
+
pipeline->drainResume();
}
@@ -278,7 +274,7 @@ MinorCPU::takeOverFrom(BaseCPU *old_cpu)
void
MinorCPU::activateContext(ThreadID thread_id)
{
- DPRINTF(MinorCPU, "ActivateContext thread: %d", thread_id);
+ DPRINTF(MinorCPU, "ActivateContext thread: %d\n", thread_id);
/* Do some cycle accounting. lastStopped is reset to stop the
* wakeup call on the pipeline from adding the quiesce period
@@ -289,7 +285,7 @@ MinorCPU::activateContext(ThreadID thread_id)
/* Wake up the thread, wakeup the pipeline tick */
threads[thread_id]->activate();
wakeupOnEvent(Minor::Pipeline::CPUStageId);
- pipeline->wakeupFetch();
+ pipeline->wakeupFetch(thread_id);
BaseCPU::activateContext(thread_id);
}
@@ -317,9 +313,6 @@ MinorCPU::wakeupOnEvent(unsigned int stage_id)
MinorCPU *
MinorCPUParams::create()
{
- numThreads = 1;
- if (!FullSystem && workload.size() != 1)
- panic("only one workload allowed");
return new MinorCPU(this);
}
diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh
index dad015e89..4e4762390 100644
--- a/src/cpu/minor/cpu.hh
+++ b/src/cpu/minor/cpu.hh
@@ -50,6 +50,7 @@
#include "cpu/minor/stats.hh"
#include "cpu/base.hh"
#include "cpu/simple_thread.hh"
+#include "enums/ThreadPolicy.hh"
#include "params/MinorCPU.hh"
namespace Minor
@@ -109,6 +110,8 @@ class MinorCPU : public BaseCPU
};
+ /** Thread Scheduling Policy (RoundRobin, Random, etc) */
+ Enums::ThreadPolicy threadPolicy;
protected:
/** Return a reference to the data port. */
MasterPort &getDataPort() override;
@@ -162,6 +165,26 @@ class MinorCPU : public BaseCPU
void activateContext(ThreadID thread_id) override;
void suspendContext(ThreadID thread_id) override;
+ /** Thread scheduling utility functions */
+ std::vector<ThreadID> roundRobinPriority(ThreadID priority)
+ {
+ std::vector<ThreadID> prio_list;
+ for (ThreadID i = 1; i <= numThreads; i++) {
+ prio_list.push_back((priority + i) % numThreads);
+ }
+ return prio_list;
+ }
+
+ std::vector<ThreadID> randomPriority()
+ {
+ std::vector<ThreadID> prio_list;
+ for (ThreadID i = 0; i < numThreads; i++) {
+ prio_list.push_back(i);
+ }
+ std::random_shuffle(prio_list.begin(), prio_list.end());
+ return prio_list;
+ }
+
/** Interface for stages to signal that they have become active after
* a callback or eventq event where the pipeline itself may have
* already been idled. The stage argument should be from the
diff --git a/src/cpu/minor/decode.cc b/src/cpu/minor/decode.cc
index 94eee2be3..6243bca01 100644
--- a/src/cpu/minor/decode.cc
+++ b/src/cpu/minor/decode.cc
@@ -49,7 +49,7 @@ Decode::Decode(const std::string &name,
MinorCPUParams &params,
Latch<ForwardInstData>::Output inp_,
Latch<ForwardInstData>::Input out_,
- Reservable &next_stage_input_buffer) :
+ std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) :
Named(name),
cpu(cpu_),
inp(inp_),
@@ -57,11 +57,8 @@ Decode::Decode(const std::string &name,
nextStageReserve(next_stage_input_buffer),
outputWidth(params.executeInputWidth),
processMoreThanOneInput(params.decodeCycleInput),
- inputBuffer(name + ".inputBuffer", "insts", params.decodeInputBufferSize),
- inputIndex(0),
- inMacroop(false),
- execSeqNum(InstId::firstExecSeqNum),
- blocked(false)
+ decodeInfo(params.numThreads),
+ threadPriority(0)
{
if (outputWidth < 1)
fatal("%s: executeInputWidth must be >= 1 (%d)\n", name, outputWidth);
@@ -70,29 +67,37 @@ Decode::Decode(const std::string &name,
fatal("%s: decodeInputBufferSize must be >= 1 (%d)\n", name,
params.decodeInputBufferSize);
}
+
+ /* Per-thread input buffers */
+ for (ThreadID tid = 0; tid < params.numThreads; tid++) {
+ inputBuffer.push_back(
+ InputBuffer<ForwardInstData>(
+ name + ".inputBuffer" + std::to_string(tid), "insts",
+ params.decodeInputBufferSize));
+ }
}
const ForwardInstData *
-Decode::getInput()
+Decode::getInput(ThreadID tid)
{
/* Get insts from the inputBuffer to work with */
- if (!inputBuffer.empty()) {
- const ForwardInstData &head = inputBuffer.front();
+ if (!inputBuffer[tid].empty()) {
+ const ForwardInstData &head = inputBuffer[tid].front();
- return (head.isBubble() ? NULL : &(inputBuffer.front()));
+ return (head.isBubble() ? NULL : &(inputBuffer[tid].front()));
} else {
return NULL;
}
}
void
-Decode::popInput()
+Decode::popInput(ThreadID tid)
{
- if (!inputBuffer.empty())
- inputBuffer.pop();
+ if (!inputBuffer[tid].empty())
+ inputBuffer[tid].pop();
- inputIndex = 0;
- inMacroop = false;
+ decodeInfo[tid].inputIndex = 0;
+ decodeInfo[tid].inMacroop = false;
}
#if TRACING_ON
@@ -117,32 +122,37 @@ dynInstAddTracing(MinorDynInstPtr inst, StaticInstPtr static_inst,
void
Decode::evaluate()
{
- inputBuffer.setTail(*inp.outputWire);
+ /* Push input onto appropriate input buffer */
+ if (!inp.outputWire->isBubble())
+ inputBuffer[inp.outputWire->threadId].setTail(*inp.outputWire);
+
ForwardInstData &insts_out = *out.inputWire;
assert(insts_out.isBubble());
- blocked = false;
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++)
+ decodeInfo[tid].blocked = !nextStageReserve[tid].canReserve();
- if (!nextStageReserve.canReserve()) {
- blocked = true;
- } else {
- const ForwardInstData *insts_in = getInput();
+ ThreadID tid = getScheduledThread();
+
+ if (tid != InvalidThreadID) {
+ DecodeThreadInfo &decode_info = decodeInfo[tid];
+ const ForwardInstData *insts_in = getInput(tid);
unsigned int output_index = 0;
/* Pack instructions into the output while we can. This may involve
* using more than one input line */
while (insts_in &&
- inputIndex < insts_in->width() && /* Still more input */
+ decode_info.inputIndex < insts_in->width() && /* Still more input */
output_index < outputWidth /* Still more output to fill */)
{
- MinorDynInstPtr inst = insts_in->insts[inputIndex];
+ MinorDynInstPtr inst = insts_in->insts[decode_info.inputIndex];
if (inst->isBubble()) {
/* Skip */
- inputIndex++;
- inMacroop = false;
+ decode_info.inputIndex++;
+ decode_info.inMacroop = false;
} else {
StaticInstPtr static_inst = inst->staticInst;
/* Static inst of a macro-op above the output_inst */
@@ -153,25 +163,26 @@ Decode::evaluate()
DPRINTF(Decode, "Fault being passed: %d\n",
inst->fault->name());
- inputIndex++;
- inMacroop = false;
+ decode_info.inputIndex++;
+ decode_info.inMacroop = false;
} else if (static_inst->isMacroop()) {
/* Generate a new micro-op */
StaticInstPtr static_micro_inst;
/* Set up PC for the next micro-op emitted */
- if (!inMacroop) {
- microopPC = inst->pc;
- inMacroop = true;
+ if (!decode_info.inMacroop) {
+ decode_info.microopPC = inst->pc;
+ decode_info.inMacroop = true;
}
/* Get the micro-op static instruction from the
* static_inst. */
static_micro_inst =
- static_inst->fetchMicroop(microopPC.microPC());
+ static_inst->fetchMicroop(
+ decode_info.microopPC.microPC());
output_inst = new MinorDynInst(inst->id);
- output_inst->pc = microopPC;
+ output_inst->pc = decode_info.microopPC;
output_inst->staticInst = static_micro_inst;
output_inst->fault = NoFault;
@@ -185,45 +196,46 @@ Decode::evaluate()
DPRINTF(Decode, "Microop decomposition inputIndex:"
" %d output_index: %d lastMicroop: %s microopPC:"
" %d.%d inst: %d\n",
- inputIndex, output_index,
+ decode_info.inputIndex, output_index,
(static_micro_inst->isLastMicroop() ?
"true" : "false"),
- microopPC.instAddr(), microopPC.microPC(),
+ decode_info.microopPC.instAddr(),
+ decode_info.microopPC.microPC(),
*output_inst);
/* Acknowledge that the static_inst isn't mine, it's my
* parent macro-op's */
parent_static_inst = static_inst;
- static_micro_inst->advancePC(microopPC);
+ static_micro_inst->advancePC(decode_info.microopPC);
/* Step input if this is the last micro-op */
if (static_micro_inst->isLastMicroop()) {
- inputIndex++;
- inMacroop = false;
+ decode_info.inputIndex++;
+ decode_info.inMacroop = false;
}
} else {
/* Doesn't need decomposing, pass on instruction */
DPRINTF(Decode, "Passing on inst: %s inputIndex:"
" %d output_index: %d\n",
- *output_inst, inputIndex, output_index);
+ *output_inst, decode_info.inputIndex, output_index);
parent_static_inst = static_inst;
/* Step input */
- inputIndex++;
- inMacroop = false;
+ decode_info.inputIndex++;
+ decode_info.inMacroop = false;
}
/* Set execSeqNum of output_inst */
- output_inst->id.execSeqNum = execSeqNum;
+ output_inst->id.execSeqNum = decode_info.execSeqNum;
/* Add tracing */
#if TRACING_ON
dynInstAddTracing(output_inst, parent_static_inst, cpu);
#endif
/* Step to next sequence number */
- execSeqNum++;
+ decode_info.execSeqNum++;
/* Correctly size the output before writing */
if (output_index == 0) insts_out.resize(outputWidth);
@@ -233,17 +245,17 @@ Decode::evaluate()
}
/* Have we finished with the input? */
- if (inputIndex == insts_in->width()) {
+ if (decode_info.inputIndex == insts_in->width()) {
/* If we have just been producing micro-ops, we *must* have
* got to the end of that for inputIndex to be pushed past
* insts_in->width() */
- assert(!inMacroop);
- popInput();
+ assert(!decode_info.inMacroop);
+ popInput(tid);
insts_in = NULL;
if (processMoreThanOneInput) {
DPRINTF(Decode, "Wrapping\n");
- insts_in = getInput();
+ insts_in = getInput(tid);
}
}
}
@@ -261,22 +273,65 @@ Decode::evaluate()
if (!insts_out.isBubble()) {
/* Note activity of following buffer */
cpu.activityRecorder->activity();
- nextStageReserve.reserve();
+ insts_out.threadId = tid;
+ nextStageReserve[tid].reserve();
}
/* If we still have input to process and somewhere to put it,
* mark stage as active */
- if (getInput() && nextStageReserve.canReserve())
- cpu.activityRecorder->activateStage(Pipeline::DecodeStageId);
+ for (ThreadID i = 0; i < cpu.numThreads; i++)
+ {
+ if (getInput(i) && nextStageReserve[i].canReserve()) {
+ cpu.activityRecorder->activateStage(Pipeline::DecodeStageId);
+ break;
+ }
+ }
/* Make sure the input (if any left) is pushed */
- inputBuffer.pushTail();
+ if (!inp.outputWire->isBubble())
+ inputBuffer[inp.outputWire->threadId].pushTail();
+}
+
+inline ThreadID
+Decode::getScheduledThread()
+{
+ /* Select thread via policy. */
+ std::vector<ThreadID> priority_list;
+
+ switch (cpu.threadPolicy) {
+ case Enums::SingleThreaded:
+ priority_list.push_back(0);
+ break;
+ case Enums::RoundRobin:
+ priority_list = cpu.roundRobinPriority(threadPriority);
+ break;
+ case Enums::Random:
+ priority_list = cpu.randomPriority();
+ break;
+ default:
+ panic("Unknown fetch policy");
+ }
+
+ for (auto tid : priority_list) {
+ if (cpu.getContext(tid)->status() == ThreadContext::Active &&
+ getInput(tid) && !decodeInfo[tid].blocked) {
+ threadPriority = tid;
+ return tid;
+ }
+ }
+
+ return InvalidThreadID;
}
bool
Decode::isDrained()
{
- return inputBuffer.empty() && (*inp.outputWire).isBubble();
+ for (const auto &buffer : inputBuffer) {
+ if (!buffer.empty())
+ return false;
+ }
+
+ return (*inp.outputWire).isBubble();
}
void
@@ -284,13 +339,13 @@ Decode::minorTrace() const
{
std::ostringstream data;
- if (blocked)
+ if (decodeInfo[0].blocked)
data << 'B';
else
(*out.inputWire).reportData(data);
MINORTRACE("insts=%s\n", data.str());
- inputBuffer.minorTrace();
+ inputBuffer[0].minorTrace();
}
}
diff --git a/src/cpu/minor/decode.hh b/src/cpu/minor/decode.hh
index fcc18fd44..a4d29a59d 100644
--- a/src/cpu/minor/decode.hh
+++ b/src/cpu/minor/decode.hh
@@ -71,7 +71,7 @@ class Decode : public Named
Latch<ForwardInstData>::Input out;
/** Interface to reserve space in the next stage */
- Reservable &nextStageReserve;
+ std::vector<InputBuffer<ForwardInstData>> &nextStageReserve;
/** Width of output of this stage/input of next in instructions */
unsigned int outputWidth;
@@ -82,43 +82,68 @@ class Decode : public Named
public:
/* Public for Pipeline to be able to pass it to Fetch2 */
- InputBuffer<ForwardInstData> inputBuffer;
+ std::vector<InputBuffer<ForwardInstData>> inputBuffer;
protected:
/** Data members after this line are cycle-to-cycle state */
- /** Index into the inputBuffer's head marking the start of unhandled
- * instructions */
- unsigned int inputIndex;
+ struct DecodeThreadInfo {
- /** True when we're in the process of decomposing a micro-op and
- * microopPC will be valid. This is only the case when there isn't
- * sufficient space in Executes input buffer to take the whole of a
- * decomposed instruction and some of that instructions micro-ops must
- * be generated in a later cycle */
- bool inMacroop;
- TheISA::PCState microopPC;
+ /** Default Constructor */
+ DecodeThreadInfo() :
+ inputIndex(0),
+ inMacroop(false),
+ execSeqNum(InstId::firstExecSeqNum),
+ blocked(false)
+ { }
- /** Source of execSeqNums to number instructions. */
- InstSeqNum execSeqNum;
+ DecodeThreadInfo(const DecodeThreadInfo& other) :
+ inputIndex(other.inputIndex),
+ inMacroop(other.inMacroop),
+ execSeqNum(other.execSeqNum),
+ blocked(other.blocked)
+ { }
- /** Blocked indication for report */
- bool blocked;
+
+ /** Index into the inputBuffer's head marking the start of unhandled
+ * instructions */
+ unsigned int inputIndex;
+
+ /** True when we're in the process of decomposing a micro-op and
+ * microopPC will be valid. This is only the case when there isn't
+ * sufficient space in Executes input buffer to take the whole of a
+ * decomposed instruction and some of that instructions micro-ops must
+ * be generated in a later cycle */
+ bool inMacroop;
+ TheISA::PCState microopPC;
+
+ /** Source of execSeqNums to number instructions. */
+ InstSeqNum execSeqNum;
+
+ /** Blocked indication for report */
+ bool blocked;
+ };
+
+ std::vector<DecodeThreadInfo> decodeInfo;
+ ThreadID threadPriority;
protected:
/** Get a piece of data to work on, or 0 if there is no data. */
- const ForwardInstData *getInput();
+ const ForwardInstData *getInput(ThreadID tid);
/** Pop an element off the input buffer, if there are any */
- void popInput();
+ void popInput(ThreadID tid);
+ /** Use the current threading policy to determine the next thread to
+ * decode from. */
+ ThreadID getScheduledThread();
public:
Decode(const std::string &name,
MinorCPU &cpu_,
MinorCPUParams &params,
Latch<ForwardInstData>::Output inp_,
Latch<ForwardInstData>::Input out_,
- Reservable &next_stage_input_buffer);
+ std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer);
public:
/** Pass on input/buffer data to the output if you can */
diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc
index ab08e6b4a..5d54f6913 100644
--- a/src/cpu/minor/dyn_inst.cc
+++ b/src/cpu/minor/dyn_inst.cc
@@ -52,6 +52,12 @@
namespace Minor
{
+const InstSeqNum InstId::firstStreamSeqNum;
+const InstSeqNum InstId::firstPredictionSeqNum;
+const InstSeqNum InstId::firstLineSeqNum;
+const InstSeqNum InstId::firstFetchSeqNum;
+const InstSeqNum InstId::firstExecSeqNum;
+
std::ostream &
operator <<(std::ostream &os, const InstId &id)
{
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index 092ad5a2d..a5d646b6c 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -342,12 +342,17 @@ class ExecContext : public ::ExecContext
public:
// monitor/mwait funtions
- void armMonitor(Addr address) { getCpuPtr()->armMonitor(0, address); }
- bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(0, pkt); }
+ void armMonitor(Addr address)
+ { getCpuPtr()->armMonitor(inst->id.threadId, address); }
+
+ bool mwait(PacketPtr pkt)
+ { return getCpuPtr()->mwait(inst->id.threadId, pkt); }
+
void mwaitAtomic(ThreadContext *tc)
- { return getCpuPtr()->mwaitAtomic(0, tc, thread.dtb); }
+ { return getCpuPtr()->mwaitAtomic(inst->id.threadId, tc, thread.dtb); }
+
AddressMonitor *getAddrMonitor()
- { return getCpuPtr()->getCpuAddrMonitor(0); }
+ { return getCpuPtr()->getCpuAddrMonitor(inst->id.threadId); }
};
}
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 4298e1dcc..b13e0c020 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -86,15 +86,10 @@ Execute::Execute(const std::string &name_,
params.executeLSQTransfersQueueSize,
params.executeLSQStoreBufferSize,
params.executeLSQMaxStoreBufferStoresPerCycle),
- scoreboard(name_ + ".scoreboard"),
- inputBuffer(name_ + ".inputBuffer", "insts",
- params.executeInputBufferSize),
- inputIndex(0),
- lastCommitWasEndOfMacroop(true),
- instsBeingCommitted(params.executeCommitLimit),
- streamSeqNum(InstId::firstStreamSeqNum),
- lastPredictionSeqNum(InstId::firstPredictionSeqNum),
- drainState(NotDraining)
+ executeInfo(params.numThreads, ExecuteThreadInfo(params.executeCommitLimit)),
+ interruptPriority(0),
+ issuePriority(0),
+ commitPriority(0)
{
if (commitLimit < 1) {
fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_,
@@ -170,35 +165,50 @@ Execute::Execute(const std::string &name_,
}
}
- inFlightInsts = new Queue<QueuedInst,
- ReportTraitsAdaptor<QueuedInst> >(
- name_ + ".inFlightInsts", "insts", total_slots);
+ /* Per-thread structures */
+ for (ThreadID tid = 0; tid < params.numThreads; tid++) {
+ std::string tid_str = std::to_string(tid);
- inFUMemInsts = new Queue<QueuedInst,
- ReportTraitsAdaptor<QueuedInst> >(
- name_ + ".inFUMemInsts", "insts", total_slots);
+ /* Input Buffers */
+ inputBuffer.push_back(
+ InputBuffer<ForwardInstData>(
+ name_ + ".inputBuffer" + tid_str, "insts",
+ params.executeInputBufferSize));
+
+ /* Scoreboards */
+ scoreboard.push_back(Scoreboard(name_ + ".scoreboard" + tid_str));
+
+ /* In-flight instruction records */
+ executeInfo[tid].inFlightInsts = new Queue<QueuedInst,
+ ReportTraitsAdaptor<QueuedInst> >(
+ name_ + ".inFlightInsts" + tid_str, "insts", total_slots);
+
+ executeInfo[tid].inFUMemInsts = new Queue<QueuedInst,
+ ReportTraitsAdaptor<QueuedInst> >(
+ name_ + ".inFUMemInsts" + tid_str, "insts", total_slots);
+ }
}
const ForwardInstData *
-Execute::getInput()
+Execute::getInput(ThreadID tid)
{
/* Get a line from the inputBuffer to work with */
- if (!inputBuffer.empty()) {
- const ForwardInstData &head = inputBuffer.front();
+ if (!inputBuffer[tid].empty()) {
+ const ForwardInstData &head = inputBuffer[tid].front();
- return (head.isBubble() ? NULL : &(inputBuffer.front()));
+ return (head.isBubble() ? NULL : &(inputBuffer[tid].front()));
} else {
return NULL;
}
}
void
-Execute::popInput()
+Execute::popInput(ThreadID tid)
{
- if (!inputBuffer.empty())
- inputBuffer.pop();
+ if (!inputBuffer[tid].empty())
+ inputBuffer[tid].pop();
- inputIndex = 0;
+ executeInfo[tid].inputIndex = 0;
}
void
@@ -276,11 +286,12 @@ Execute::tryToBranch(MinorDynInstPtr inst, Fault fault, BranchData &branch)
reason = BranchData::NoBranch;
}
- updateBranchData(reason, inst, target, branch);
+ updateBranchData(inst->id.threadId, reason, inst, target, branch);
}
void
Execute::updateBranchData(
+ ThreadID tid,
BranchData::Reason reason,
MinorDynInstPtr inst, const TheISA::PCState &target,
BranchData &branch)
@@ -288,14 +299,15 @@ Execute::updateBranchData(
if (reason != BranchData::NoBranch) {
/* Bump up the stream sequence number on a real branch*/
if (BranchData::isStreamChange(reason))
- streamSeqNum++;
+ executeInfo[tid].streamSeqNum++;
/* Branches (even mis-predictions) don't change the predictionSeqNum,
* just the streamSeqNum */
- branch = BranchData(reason, streamSeqNum,
+ branch = BranchData(reason, tid,
+ executeInfo[tid].streamSeqNum,
/* Maintaining predictionSeqNum if there's no inst is just a
* courtesy and looks better on minorview */
- (inst->isBubble() ? lastPredictionSeqNum
+ (inst->isBubble() ? executeInfo[tid].lastPredictionSeqNum
: inst->id.predictionSeqNum),
target, inst);
@@ -419,8 +431,9 @@ Execute::takeInterrupt(ThreadID thread_id, BranchData &branch)
/* Assume that an interrupt *must* cause a branch. Assert this? */
- updateBranchData(BranchData::Interrupt, MinorDynInst::bubble(),
- cpu.getContext(thread_id)->pcState(), branch);
+ updateBranchData(thread_id, BranchData::Interrupt,
+ MinorDynInst::bubble(), cpu.getContext(thread_id)->pcState(),
+ branch);
}
return interrupt != NoFault;
@@ -506,9 +519,10 @@ cyclicIndexDec(unsigned int index, unsigned int cycle_size)
}
unsigned int
-Execute::issue(bool only_issue_microops)
+Execute::issue(ThreadID thread_id)
{
- const ForwardInstData *insts_in = getInput();
+ const ForwardInstData *insts_in = getInput(thread_id);
+ ExecuteThreadInfo &thread = executeInfo[thread_id];
/* Early termination if we have no instructions */
if (!insts_in)
@@ -534,8 +548,7 @@ Execute::issue(bool only_issue_microops)
unsigned num_insts_discarded = 0;
do {
- MinorDynInstPtr inst = insts_in->insts[inputIndex];
- ThreadID thread_id = inst->id.threadId;
+ MinorDynInstPtr inst = insts_in->insts[thread.inputIndex];
Fault fault = inst->fault;
bool discarded = false;
bool issued_mem_ref = false;
@@ -550,21 +563,12 @@ Execute::issue(bool only_issue_microops)
" thread\n", *inst);
issued = false;
- } else if (inst->id.streamSeqNum != streamSeqNum) {
+ } else if (inst->id.streamSeqNum != thread.streamSeqNum) {
DPRINTF(MinorExecute, "Discarding inst: %s as its stream"
" state was unexpected, expected: %d\n",
- *inst, streamSeqNum);
+ *inst, thread.streamSeqNum);
issued = true;
discarded = true;
- } else if (fault == NoFault && only_issue_microops &&
- /* Is this anything other than a non-first microop */
- (!inst->staticInst->isMicroop() ||
- !inst->staticInst->isFirstMicroop()))
- {
- DPRINTF(MinorExecute, "Not issuing new non-microop inst: %s\n",
- *inst);
-
- issued = false;
} else {
/* Try and issue an instruction into an FU, assume we didn't and
* fix that in the loop */
@@ -598,9 +602,10 @@ Execute::issue(bool only_issue_microops)
/* Mark the destinations for this instruction as
* busy */
- scoreboard.markupInstDests(inst, cpu.curCycle() +
+ scoreboard[thread_id].markupInstDests(inst, cpu.curCycle() +
Cycles(0), cpu.getContext(thread_id), false);
+ DPRINTF(MinorExecute, "Issuing %s to %d\n", inst->id, noCostFUIndex);
inst->fuIndex = noCostFUIndex;
inst->extraCommitDelay = Cycles(0);
inst->extraCommitDelayExpr = NULL;
@@ -608,7 +613,7 @@ Execute::issue(bool only_issue_microops)
/* Push the instruction onto the inFlight queue so
* it can be committed in order */
QueuedInst fu_inst(inst);
- inFlightInsts->push(fu_inst);
+ thread.inFlightInsts->push(fu_inst);
issued = true;
@@ -644,8 +649,8 @@ Execute::issue(bool only_issue_microops)
DPRINTF(MinorExecute, "Can't issue inst: %s as extra"
" decoding is suppressing it\n",
*inst);
- } else if (!scoreboard.canInstIssue(inst, src_latencies,
- cant_forward_from_fu_indices,
+ } else if (!scoreboard[thread_id].canInstIssue(inst,
+ src_latencies, cant_forward_from_fu_indices,
cpu.curCycle(), cpu.getContext(thread_id)))
{
DPRINTF(MinorExecute, "Can't issue inst: %s yet\n",
@@ -687,20 +692,20 @@ Execute::issue(bool only_issue_microops)
* early */
if (allowEarlyMemIssue) {
inst->instToWaitFor =
- scoreboard.execSeqNumToWaitFor(inst,
+ scoreboard[thread_id].execSeqNumToWaitFor(inst,
cpu.getContext(thread_id));
- if (lsq.getLastMemBarrier() >
+ if (lsq.getLastMemBarrier(thread_id) >
inst->instToWaitFor)
{
DPRINTF(MinorExecute, "A barrier will"
" cause a delay in mem ref issue of"
" inst: %s until after inst"
" %d(exec)\n", *inst,
- lsq.getLastMemBarrier());
+ lsq.getLastMemBarrier(thread_id));
inst->instToWaitFor =
- lsq.getLastMemBarrier();
+ lsq.getLastMemBarrier(thread_id);
} else {
DPRINTF(MinorExecute, "Memory ref inst:"
" %s must wait for inst %d(exec)"
@@ -714,7 +719,7 @@ Execute::issue(bool only_issue_microops)
* queue to ensure in-order issue to the LSQ */
DPRINTF(MinorExecute, "Pushing mem inst: %s\n",
*inst);
- inFUMemInsts->push(fu_inst);
+ thread.inFUMemInsts->push(fu_inst);
}
/* Issue to FU */
@@ -725,7 +730,7 @@ Execute::issue(bool only_issue_microops)
/* Mark the destinations for this instruction as
* busy */
- scoreboard.markupInstDests(inst, cpu.curCycle() +
+ scoreboard[thread_id].markupInstDests(inst, cpu.curCycle() +
fu->description.opLat +
extra_dest_retire_lat +
extra_assumed_lat,
@@ -734,7 +739,7 @@ Execute::issue(bool only_issue_microops)
/* Push the instruction onto the inFlight queue so
* it can be committed in order */
- inFlightInsts->push(fu_inst);
+ thread.inFlightInsts->push(fu_inst);
issued = true;
}
@@ -777,24 +782,24 @@ Execute::issue(bool only_issue_microops)
DPRINTF(MinorExecute, "Reached inst issue limit\n");
}
- inputIndex++;
+ thread.inputIndex++;
DPRINTF(MinorExecute, "Stepping to next inst inputIndex: %d\n",
- inputIndex);
+ thread.inputIndex);
}
/* Got to the end of a line */
- if (inputIndex == insts_in->width()) {
- popInput();
+ if (thread.inputIndex == insts_in->width()) {
+ popInput(thread_id);
/* Set insts_in to null to force us to leave the surrounding
* loop */
insts_in = NULL;
if (processMoreThanOneInput) {
DPRINTF(MinorExecute, "Wrapping\n");
- insts_in = getInput();
+ insts_in = getInput(thread_id);
}
}
- } while (insts_in && inputIndex < insts_in->width() &&
+ } while (insts_in && thread.inputIndex < insts_in->width() &&
/* We still have instructions */
fu_index != numFuncUnits && /* Not visited all FUs */
issued && /* We've not yet failed to issue an instruction */
@@ -805,9 +810,9 @@ Execute::issue(bool only_issue_microops)
}
bool
-Execute::tryPCEvents()
+Execute::tryPCEvents(ThreadID thread_id)
{
- ThreadContext *thread = cpu.getContext(0);
+ ThreadContext *thread = cpu.getContext(thread_id);
unsigned int num_pc_event_checks = 0;
/* Handle PC events on instructions */
@@ -934,6 +939,11 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
" there isn't space in the store buffer\n", *inst);
completed_inst = false;
+ } else if (inst->isInst() && inst->staticInst->isQuiesce()
+ && !branch.isBubble()){
+ /* This instruction can suspend, need to be able to communicate
+ * backwards, so no other branches may evaluate this cycle*/
+ completed_inst = false;
} else {
ExecContext context(cpu, *cpu.threads[thread_id], *this, inst);
@@ -962,7 +972,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
/* Keep a copy of this instruction's predictionSeqNum just in case
* we need to issue a branch without an instruction (such as an
* interrupt) */
- lastPredictionSeqNum = inst->id.predictionSeqNum;
+ executeInfo[thread_id].lastPredictionSeqNum = inst->id.predictionSeqNum;
/* Check to see if this instruction suspended the current thread. */
if (!inst->isFault() &&
@@ -971,17 +981,17 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
!isInterrupted(thread_id)) /* Don't suspend if we have
interrupts */
{
- TheISA::PCState resume_pc = cpu.getContext(0)->pcState();
+ TheISA::PCState resume_pc = cpu.getContext(thread_id)->pcState();
assert(resume_pc.microPC() == 0);
DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute"
- " inst: %s\n", inst->id.threadId, *inst);
+ " inst: %s\n", thread_id, *inst);
cpu.stats.numFetchSuspends++;
- updateBranchData(BranchData::SuspendThread, inst, resume_pc,
- branch);
+ updateBranchData(thread_id, BranchData::SuspendThread, inst,
+ resume_pc, branch);
}
}
@@ -989,10 +999,12 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
}
void
-Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
+Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard,
+ BranchData &branch)
{
Fault fault = NoFault;
Cycles now = cpu.curCycle();
+ ExecuteThreadInfo &ex_info = executeInfo[thread_id];
/**
* Try and execute as many instructions from the end of FU pipelines as
@@ -1030,13 +1042,13 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
* memCommitLimit */
unsigned int num_mem_refs_committed = 0;
- if (only_commit_microops && !inFlightInsts->empty()) {
+ if (only_commit_microops && !ex_info.inFlightInsts->empty()) {
DPRINTF(MinorInterrupt, "Only commit microops %s %d\n",
- *(inFlightInsts->front().inst),
- lastCommitWasEndOfMacroop);
+ *(ex_info.inFlightInsts->front().inst),
+ ex_info.lastCommitWasEndOfMacroop);
}
- while (!inFlightInsts->empty() && /* Some more instructions to process */
+ while (!ex_info.inFlightInsts->empty() && /* Some more instructions to process */
!branch.isStreamChange() && /* No real branch */
fault == NoFault && /* No faults */
completed_inst && /* Still finding instructions to execute */
@@ -1046,10 +1058,10 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
if (only_commit_microops) {
DPRINTF(MinorInterrupt, "Committing tail of insts before"
" interrupt: %s\n",
- *(inFlightInsts->front().inst));
+ *(ex_info.inFlightInsts->front().inst));
}
- QueuedInst *head_inflight_inst = &(inFlightInsts->front());
+ QueuedInst *head_inflight_inst = &(ex_info.inFlightInsts->front());
InstSeqNum head_exec_seq_num =
head_inflight_inst->inst->id.execSeqNum;
@@ -1071,8 +1083,8 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
/* If we're just completing a macroop before an interrupt or drain,
* can we stil commit another microop (rather than a memory response)
* without crosing into the next full instruction? */
- bool can_commit_insts = !inFlightInsts->empty() &&
- !(only_commit_microops && lastCommitWasEndOfMacroop);
+ bool can_commit_insts = !ex_info.inFlightInsts->empty() &&
+ !(only_commit_microops && ex_info.lastCommitWasEndOfMacroop);
/* Can we find a mem response for this inst */
LSQ::LSQRequestPtr mem_response =
@@ -1082,18 +1094,18 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
can_commit_insts);
/* Test for PC events after every instruction */
- if (isInbetweenInsts() && tryPCEvents()) {
- ThreadContext *thread = cpu.getContext(0);
+ if (isInbetweenInsts(thread_id) && tryPCEvents(thread_id)) {
+ ThreadContext *thread = cpu.getContext(thread_id);
/* Branch as there was a change in PC */
- updateBranchData(BranchData::UnpredictedBranch,
+ updateBranchData(thread_id, BranchData::UnpredictedBranch,
MinorDynInst::bubble(), thread->pcState(), branch);
} else if (mem_response &&
num_mem_refs_committed < memoryCommitLimit)
{
/* Try to commit from the memory responses next */
- discard_inst = inst->id.streamSeqNum != streamSeqNum ||
- discard;
+ discard_inst = inst->id.streamSeqNum !=
+ ex_info.streamSeqNum || discard;
DPRINTF(MinorExecute, "Trying to commit mem response: %s\n",
*inst);
@@ -1102,7 +1114,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
if (discard_inst) {
DPRINTF(MinorExecute, "Discarding mem inst: %s as its"
" stream state was unexpected, expected: %d\n",
- *inst, streamSeqNum);
+ *inst, ex_info.streamSeqNum);
lsq.popResponse(mem_response);
} else {
@@ -1128,11 +1140,11 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
* For any other case, leave it to the normal instruction
* issue below to handle them.
*/
- if (!inFUMemInsts->empty() && lsq.canRequest()) {
+ if (!ex_info.inFUMemInsts->empty() && lsq.canRequest()) {
DPRINTF(MinorExecute, "Trying to commit from mem FUs\n");
const MinorDynInstPtr head_mem_ref_inst =
- inFUMemInsts->front().inst;
+ ex_info.inFUMemInsts->front().inst;
FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex];
const MinorDynInstPtr &fu_inst = fu->front().inst;
@@ -1141,7 +1153,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
if (!fu_inst->isBubble() &&
!fu_inst->inLSQ &&
fu_inst->canEarlyIssue &&
- streamSeqNum == fu_inst->id.streamSeqNum &&
+ ex_info.streamSeqNum == fu_inst->id.streamSeqNum &&
head_exec_seq_num > fu_inst->instToWaitFor)
{
DPRINTF(MinorExecute, "Issuing mem ref early"
@@ -1184,7 +1196,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
* actually at the end of its pipeline
* Future instruction: handled above and only for
* mem refs on their way to the LSQ */
- } else /* if (fu_inst_seq_num == head_exec_seq_num) */ {
+ } else if (fu_inst.inst->id == inst->id) {
/* All instructions can be committed if they have the
* right execSeqNum and there are no in-flight
* mem insts before us */
@@ -1194,8 +1206,8 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
}
if (try_to_commit) {
- discard_inst = inst->id.streamSeqNum != streamSeqNum ||
- discard;
+ discard_inst = inst->id.streamSeqNum !=
+ ex_info.streamSeqNum || discard;
/* Is this instruction discardable as its streamSeqNum
* doesn't match? */
@@ -1209,8 +1221,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
DPRINTF(MinorExecute, "Evaluating expression for"
" extra commit delay inst: %s\n", *inst);
- ThreadContext *thread =
- cpu.getContext(inst->id.threadId);
+ ThreadContext *thread = cpu.getContext(thread_id);
TimingExprEvalContext context(inst->staticInst,
thread, NULL);
@@ -1241,9 +1252,9 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
/* @todo Think about making lastMemBarrier be
* MAX_UINT_64 to avoid using 0 as a marker value */
if (!inst->isFault() && inst->isMemRef() &&
- lsq.getLastMemBarrier() <
+ lsq.getLastMemBarrier(thread_id) <
inst->id.execSeqNum &&
- lsq.getLastMemBarrier() != 0)
+ lsq.getLastMemBarrier(thread_id) != 0)
{
DPRINTF(MinorExecute, "Not committing inst: %s yet"
" as there are incomplete barriers in flight\n",
@@ -1269,8 +1280,10 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
* instruction wasn't the inFlightInsts head
* but had already been committed, it would have
* unstalled the pipeline before here */
- if (inst->fuIndex != noCostFUIndex)
+ if (inst->fuIndex != noCostFUIndex) {
+ DPRINTF(MinorExecute, "Unstalling %d for inst %s\n", inst->fuIndex, inst->id);
funcUnits[inst->fuIndex]->stalled = false;
+ }
}
}
} else {
@@ -1286,7 +1299,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
if (discard_inst) {
DPRINTF(MinorExecute, "Discarding inst: %s as its stream"
" state was unexpected, expected: %d\n",
- *inst, streamSeqNum);
+ *inst, ex_info.streamSeqNum);
if (fault == NoFault)
cpu.stats.numDiscardedOps++;
@@ -1303,10 +1316,10 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
if (completed_inst && inst->isMemRef()) {
/* The MemRef could have been discarded from the FU or the memory
* queue, so just check an FU instruction */
- if (!inFUMemInsts->empty() &&
- inFUMemInsts->front().inst == inst)
+ if (!ex_info.inFUMemInsts->empty() &&
+ ex_info.inFUMemInsts->front().inst == inst)
{
- inFUMemInsts->pop();
+ ex_info.inFUMemInsts->pop();
}
}
@@ -1315,16 +1328,16 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
DPRINTF(MinorExecute, "Completed inst: %s\n", *inst);
/* Got to the end of a full instruction? */
- lastCommitWasEndOfMacroop = inst->isFault() ||
+ ex_info.lastCommitWasEndOfMacroop = inst->isFault() ||
inst->isLastOpInInst();
/* lastPredictionSeqNum is kept as a convenience to prevent its
* value from changing too much on the minorview display */
- lastPredictionSeqNum = inst->id.predictionSeqNum;
+ ex_info.lastPredictionSeqNum = inst->id.predictionSeqNum;
/* Finished with the inst, remove it from the inst queue and
* clear its dependencies */
- inFlightInsts->pop();
+ ex_info.inFlightInsts->pop();
/* Complete barriers in the LSQ/move to store buffer */
if (inst->isInst() && inst->staticInst->isMemBarrier()) {
@@ -1333,7 +1346,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
lsq.completeMemBarrierInst(inst, committed_inst);
}
- scoreboard.clearInstDests(inst, inst->isMemRef());
+ scoreboard[thread_id].clearInstDests(inst, inst->isMemRef());
}
/* Handle per-cycle instruction counting */
@@ -1343,7 +1356,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
/* Don't show no cost instructions as having taken a commit
* slot */
if (DTRACE(MinorTrace) && !is_no_cost_inst)
- instsBeingCommitted.insts[num_insts_committed] = inst;
+ ex_info.instsBeingCommitted.insts[num_insts_committed] = inst;
if (!is_no_cost_inst)
num_insts_committed++;
@@ -1369,124 +1382,112 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
}
bool
-Execute::isInbetweenInsts() const
+Execute::isInbetweenInsts(ThreadID thread_id) const
{
- return lastCommitWasEndOfMacroop &&
+ return executeInfo[thread_id].lastCommitWasEndOfMacroop &&
!lsq.accessesInFlight();
}
void
Execute::evaluate()
{
- inputBuffer.setTail(*inp.outputWire);
+ if (!inp.outputWire->isBubble())
+ inputBuffer[inp.outputWire->threadId].setTail(*inp.outputWire);
+
BranchData &branch = *out.inputWire;
- const ForwardInstData *insts_in = getInput();
+ unsigned int num_issued = 0;
/* Do all the cycle-wise activities for dcachePort here to potentially
* free up input spaces in the LSQ's requests queue */
lsq.step();
- /* Has an interrupt been signalled? This may not be acted on
- * straighaway so this is different from took_interrupt below */
+ /* Check interrupts first. Will halt commit if interrupt found */
bool interrupted = false;
- /* If there was an interrupt signalled, was it acted on now? */
- bool took_interrupt = false;
-
- if (cpu.getInterruptController(0)) {
- /* This is here because it seems that after drainResume the
- * interrupt controller isn't always set */
- interrupted = drainState == NotDraining && isInterrupted(0);
- } else {
- DPRINTF(MinorInterrupt, "No interrupt controller\n");
- }
+ ThreadID interrupt_tid = checkInterrupts(branch, interrupted);
- unsigned int num_issued = 0;
-
- if (DTRACE(MinorTrace)) {
- /* Empty the instsBeingCommitted for MinorTrace */
- instsBeingCommitted.bubbleFill();
- }
-
- /* THREAD threadId on isInterrupted */
- /* Act on interrupts */
- if (interrupted && isInbetweenInsts()) {
- took_interrupt = takeInterrupt(0, branch);
- /* Clear interrupted if no interrupt was actually waiting */
- interrupted = took_interrupt;
- }
-
- if (took_interrupt) {
- /* Do no commit/issue this cycle */
+ if (interrupt_tid != InvalidThreadID) {
+ /* Signalling an interrupt this cycle, not issuing/committing from
+ * any other threads */
} else if (!branch.isBubble()) {
/* It's important that this is here to carry Fetch1 wakeups to Fetch1
* without overwriting them */
DPRINTF(MinorInterrupt, "Execute skipping a cycle to allow old"
" branch to complete\n");
} else {
- if (interrupted) {
- if (inFlightInsts->empty()) {
- DPRINTF(MinorInterrupt, "Waiting but no insts\n");
+ ThreadID commit_tid = getCommittingThread();
+
+ if (commit_tid != InvalidThreadID) {
+ ExecuteThreadInfo& commit_info = executeInfo[commit_tid];
+
+ DPRINTF(MinorExecute, "Attempting to commit [tid:%d]\n",
+ commit_tid);
+ /* commit can set stalled flags observable to issue and so *must* be
+ * called first */
+ if (commit_info.drainState != NotDraining) {
+ if (commit_info.drainState == DrainCurrentInst) {
+ /* Commit only micro-ops, don't kill anything else */
+ commit(commit_tid, true, false, branch);
+
+ if (isInbetweenInsts(commit_tid))
+ setDrainState(commit_tid, DrainHaltFetch);
+
+ /* Discard any generated branch */
+ branch = BranchData::bubble();
+ } else if (commit_info.drainState == DrainAllInsts) {
+ /* Kill all instructions */
+ while (getInput(commit_tid))
+ popInput(commit_tid);
+ commit(commit_tid, false, true, branch);
+ }
} else {
- DPRINTF(MinorInterrupt, "Waiting for end of inst before"
- " signalling interrupt\n");
+ /* Commit micro-ops only if interrupted. Otherwise, commit
+ * anything you like */
+ DPRINTF(MinorExecute, "Committing micro-ops for interrupt[tid:%d]\n",
+ commit_tid);
+ bool only_commit_microops = interrupted &&
+ hasInterrupt(commit_tid);
+ commit(commit_tid, only_commit_microops, false, branch);
}
- }
- /* commit can set stalled flags observable to issue and so *must* be
- * called first */
- if (drainState != NotDraining) {
- if (drainState == DrainCurrentInst) {
- /* Commit only micro-ops, don't kill anything else */
- commit(true, false, branch);
-
- if (isInbetweenInsts())
- setDrainState(DrainHaltFetch);
-
- /* Discard any generated branch */
- branch = BranchData::bubble();
- } else if (drainState == DrainAllInsts) {
- /* Kill all instructions */
- while (getInput())
- popInput();
- commit(false, true, branch);
+ /* Halt fetch, but don't do it until we have the current instruction in
+ * the bag */
+ if (commit_info.drainState == DrainHaltFetch) {
+ updateBranchData(commit_tid, BranchData::HaltFetch,
+ MinorDynInst::bubble(), TheISA::PCState(0), branch);
+
+ cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
+ setDrainState(commit_tid, DrainAllInsts);
}
- } else {
- /* Commit micro-ops only if interrupted. Otherwise, commit
- * anything you like */
- commit(interrupted, false, branch);
}
-
+ ThreadID issue_tid = getIssuingThread();
/* This will issue merrily even when interrupted in the sure and
* certain knowledge that the interrupt with change the stream */
- if (insts_in)
- num_issued = issue(false);
- }
-
- /* Halt fetch, but don't do it until we have the current instruction in
- * the bag */
- if (drainState == DrainHaltFetch) {
- updateBranchData(BranchData::HaltFetch, MinorDynInst::bubble(),
- TheISA::PCState(0), branch);
+ if (issue_tid != InvalidThreadID) {
+ DPRINTF(MinorExecute, "Attempting to issue [tid:%d]\n",
+ issue_tid);
+ num_issued = issue(issue_tid);
+ }
- cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
- setDrainState(DrainAllInsts);
}
- MinorDynInstPtr next_issuable_inst = NULL;
+ /* Run logic to step functional units + decide if we are active on the next
+ * clock cycle */
+ std::vector<MinorDynInstPtr> next_issuable_insts;
bool can_issue_next = false;
- /* Find the next issuable instruction and see if it can be issued */
- if (getInput()) {
- MinorDynInstPtr inst = getInput()->insts[inputIndex];
-
- if (inst->isFault()) {
- can_issue_next = true;
- } else if (!inst->isBubble()) {
- if (cpu.getContext(inst->id.threadId)->status() !=
- ThreadContext::Suspended)
- {
- next_issuable_inst = inst;
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+ /* Find the next issuable instruction for each thread and see if it can
+ be issued */
+ if (getInput(tid)) {
+ unsigned int input_index = executeInfo[tid].inputIndex;
+ MinorDynInstPtr inst = getInput(tid)->insts[input_index];
+ if (inst->isFault()) {
+ can_issue_next = true;
+ } else if (!inst->isBubble()) {
+ if (cpu.getContext(tid)->status() != ThreadContext::Suspended) {
+ next_issuable_insts.push_back(inst);
+ }
}
}
}
@@ -1494,53 +1495,56 @@ Execute::evaluate()
bool becoming_stalled = true;
/* Advance the pipelines and note whether they still need to be
- * advanced */
+ * advanced */
for (unsigned int i = 0; i < numFuncUnits; i++) {
FUPipeline *fu = funcUnits[i];
-
fu->advance();
- /* If we need to go again, the pipeline will have been left or set
- * to be unstalled */
- if (fu->occupancy != 0 && !fu->stalled)
+ /* If we need to tick again, the pipeline will have been left or set
+ * to be unstalled */
+ if (fu->occupancy !=0 && !fu->stalled)
becoming_stalled = false;
- /* Could we possibly issue the next instruction? This is quite
- * an expensive test */
- if (next_issuable_inst && !fu->stalled &&
- scoreboard.canInstIssue(next_issuable_inst,
- NULL, NULL, cpu.curCycle() + Cycles(1),
- cpu.getContext(next_issuable_inst->id.threadId)) &&
- fu->provides(next_issuable_inst->staticInst->opClass()))
- {
- can_issue_next = true;
+ /* Could we possibly issue the next instruction from any thread?
+ * This is quite an expensive test and is only used to determine
+ * if the CPU should remain active, only run it if we aren't sure
+ * we are active next cycle yet */
+ for (auto inst : next_issuable_insts) {
+ if (!fu->stalled && fu->provides(inst->staticInst->opClass()) &&
+ scoreboard[inst->id.threadId].canInstIssue(inst,
+ NULL, NULL, cpu.curCycle() + Cycles(1),
+ cpu.getContext(inst->id.threadId))) {
+ can_issue_next = true;
+ break;
+ }
}
}
bool head_inst_might_commit = false;
/* Could the head in flight insts be committed */
- if (!inFlightInsts->empty()) {
- const QueuedInst &head_inst = inFlightInsts->front();
+ for (auto const &info : executeInfo) {
+ if (!info.inFlightInsts->empty()) {
+ const QueuedInst &head_inst = info.inFlightInsts->front();
- if (head_inst.inst->isNoCostInst()) {
- head_inst_might_commit = true;
- } else {
- FUPipeline *fu = funcUnits[head_inst.inst->fuIndex];
-
- /* Head inst is commitable */
- if ((fu->stalled &&
- fu->front().inst->id == head_inst.inst->id) ||
- lsq.findResponse(head_inst.inst))
- {
+ if (head_inst.inst->isNoCostInst()) {
head_inst_might_commit = true;
+ } else {
+ FUPipeline *fu = funcUnits[head_inst.inst->fuIndex];
+ if ((fu->stalled &&
+ fu->front().inst->id == head_inst.inst->id) ||
+ lsq.findResponse(head_inst.inst))
+ {
+ head_inst_might_commit = true;
+ break;
+ }
}
}
}
DPRINTF(Activity, "Need to tick num issued insts: %s%s%s%s%s%s\n",
(num_issued != 0 ? " (issued some insts)" : ""),
- (becoming_stalled ? " (becoming stalled)" : "(not becoming stalled)"),
+ (becoming_stalled ? "(becoming stalled)" : "(not becoming stalled)"),
(can_issue_next ? " (can issued next inst)" : ""),
(head_inst_might_commit ? "(head inst might commit)" : ""),
(lsq.needsToTick() ? " (LSQ needs to tick)" : ""),
@@ -1568,36 +1572,54 @@ Execute::evaluate()
cpu.activityRecorder->activity();
/* Make sure the input (if any left) is pushed */
- inputBuffer.pushTail();
+ if (!inp.outputWire->isBubble())
+ inputBuffer[inp.outputWire->threadId].pushTail();
}
-void
-Execute::wakeupFetch(BranchData::Reason reason)
+ThreadID
+Execute::checkInterrupts(BranchData& branch, bool& interrupted)
{
- BranchData branch;
- assert(branch.isBubble());
-
- /* THREAD thread id */
- ThreadContext *thread = cpu.getContext(0);
-
- /* Force a branch to the current PC (which should be the next inst.) to
- * wake up Fetch1 */
- if (!branch.isStreamChange() /* No real branch already happened */) {
- DPRINTF(MinorInterrupt, "Waking up Fetch (via Execute) by issuing"
- " a branch: %s\n", thread->pcState());
+ ThreadID tid = interruptPriority;
+ /* Evaluate interrupts in round-robin based upon service */
+ do {
+ /* Has an interrupt been signalled? This may not be acted on
+ * straighaway so this is different from took_interrupt */
+ bool thread_interrupted = false;
+
+ if (FullSystem && cpu.getInterruptController(tid)) {
+ /* This is here because it seems that after drainResume the
+ * interrupt controller isn't always set */
+ thread_interrupted = executeInfo[tid].drainState == NotDraining &&
+ isInterrupted(tid);
+ interrupted = interrupted || thread_interrupted;
+ } else {
+ DPRINTF(MinorInterrupt, "No interrupt controller\n");
+ }
+ DPRINTF(MinorInterrupt, "[tid:%d] thread_interrupted?=%d isInbetweenInsts?=%d\n",
+ tid, thread_interrupted, isInbetweenInsts(tid));
+ /* Act on interrupts */
+ if (thread_interrupted && isInbetweenInsts(tid)) {
+ if (takeInterrupt(tid, branch)) {
+ interruptPriority = tid;
+ return tid;
+ }
+ } else {
+ tid = (tid + 1) % cpu.numThreads;
+ }
+ } while (tid != interruptPriority);
- assert(thread->pcState().microPC() == 0);
+ return InvalidThreadID;
+}
- updateBranchData(reason,
- MinorDynInst::bubble(), thread->pcState(), branch);
- } else {
- DPRINTF(MinorInterrupt, "Already branching, no need for wakeup\n");
+bool
+Execute::hasInterrupt(ThreadID thread_id)
+{
+ if (FullSystem && cpu.getInterruptController(thread_id)) {
+ return executeInfo[thread_id].drainState == NotDraining &&
+ isInterrupted(thread_id);
}
- *out.inputWire = branch;
-
- /* Make sure we get ticked */
- cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
+ return false;
}
void
@@ -1606,10 +1628,10 @@ Execute::minorTrace() const
std::ostringstream insts;
std::ostringstream stalled;
- instsBeingCommitted.reportData(insts);
+ executeInfo[0].instsBeingCommitted.reportData(insts);
lsq.minorTrace();
- inputBuffer.minorTrace();
- scoreboard.minorTrace();
+ inputBuffer[0].minorTrace();
+ scoreboard[0].minorTrace();
/* Report functional unit stalling in one string */
unsigned int i = 0;
@@ -1623,14 +1645,110 @@ Execute::minorTrace() const
MINORTRACE("insts=%s inputIndex=%d streamSeqNum=%d"
" stalled=%s drainState=%d isInbetweenInsts=%d\n",
- insts.str(), inputIndex, streamSeqNum, stalled.str(), drainState,
- isInbetweenInsts());
+ insts.str(), executeInfo[0].inputIndex, executeInfo[0].streamSeqNum,
+ stalled.str(), executeInfo[0].drainState, isInbetweenInsts(0));
std::for_each(funcUnits.begin(), funcUnits.end(),
std::mem_fun(&FUPipeline::minorTrace));
- inFlightInsts->minorTrace();
- inFUMemInsts->minorTrace();
+ executeInfo[0].inFlightInsts->minorTrace();
+ executeInfo[0].inFUMemInsts->minorTrace();
+}
+
+inline ThreadID
+Execute::getCommittingThread()
+{
+ std::vector<ThreadID> priority_list;
+
+ switch (cpu.threadPolicy) {
+ case Enums::SingleThreaded:
+ return 0;
+ case Enums::RoundRobin:
+ priority_list = cpu.roundRobinPriority(commitPriority);
+ break;
+ case Enums::Random:
+ priority_list = cpu.randomPriority();
+ break;
+ default:
+ panic("Invalid thread policy");
+ }
+
+ for (auto tid : priority_list) {
+ ExecuteThreadInfo &ex_info = executeInfo[tid];
+ bool can_commit_insts = !ex_info.inFlightInsts->empty();
+ if (can_commit_insts) {
+ QueuedInst *head_inflight_inst = &(ex_info.inFlightInsts->front());
+ MinorDynInstPtr inst = head_inflight_inst->inst;
+
+ can_commit_insts = can_commit_insts &&
+ (!inst->inLSQ || (lsq.findResponse(inst) != NULL));
+
+ if (!inst->inLSQ) {
+ bool can_transfer_mem_inst = false;
+ if (!ex_info.inFUMemInsts->empty() && lsq.canRequest()) {
+ const MinorDynInstPtr head_mem_ref_inst =
+ ex_info.inFUMemInsts->front().inst;
+ FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex];
+ const MinorDynInstPtr &fu_inst = fu->front().inst;
+ can_transfer_mem_inst =
+ !fu_inst->isBubble() &&
+ fu_inst->id.threadId == tid &&
+ !fu_inst->inLSQ &&
+ fu_inst->canEarlyIssue &&
+ inst->id.execSeqNum > fu_inst->instToWaitFor;
+ }
+
+ bool can_execute_fu_inst = inst->fuIndex == noCostFUIndex;
+ if (can_commit_insts && !can_transfer_mem_inst &&
+ inst->fuIndex != noCostFUIndex)
+ {
+ QueuedInst& fu_inst = funcUnits[inst->fuIndex]->front();
+ can_execute_fu_inst = !fu_inst.inst->isBubble() &&
+ fu_inst.inst->id == inst->id;
+ }
+
+ can_commit_insts = can_commit_insts &&
+ (can_transfer_mem_inst || can_execute_fu_inst);
+ }
+ }
+
+
+ if (can_commit_insts) {
+ commitPriority = tid;
+ return tid;
+ }
+ }
+
+ return InvalidThreadID;
+}
+
+inline ThreadID
+Execute::getIssuingThread()
+{
+ std::vector<ThreadID> priority_list;
+
+ switch (cpu.threadPolicy) {
+ case Enums::SingleThreaded:
+ return 0;
+ case Enums::RoundRobin:
+ priority_list = cpu.roundRobinPriority(issuePriority);
+ break;
+ case Enums::Random:
+ priority_list = cpu.randomPriority();
+ break;
+ default:
+ panic("Invalid thread scheduling policy.");
+ }
+
+ for (auto tid : priority_list) {
+ if (cpu.getContext(tid)->status() == ThreadContext::Active &&
+ getInput(tid)) {
+ issuePriority = tid;
+ return tid;
+ }
+ }
+
+ return InvalidThreadID;
}
void
@@ -1638,11 +1756,10 @@ Execute::drainResume()
{
DPRINTF(Drain, "MinorExecute drainResume\n");
- setDrainState(NotDraining);
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+ setDrainState(tid, NotDraining);
+ }
- /* Wakeup fetch and keep the pipeline running until that branch takes
- * effect */
- wakeupFetch(BranchData::WakeupFetch);
cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
}
@@ -1671,10 +1788,10 @@ std::ostream &operator <<(std::ostream &os, Execute::DrainState state)
}
void
-Execute::setDrainState(DrainState state)
+Execute::setDrainState(ThreadID thread_id, DrainState state)
{
- DPRINTF(Drain, "setDrainState: %s\n", state);
- drainState = state;
+ DPRINTF(Drain, "setDrainState[%d]: %s\n", thread_id, state);
+ executeInfo[thread_id].drainState = state;
}
unsigned int
@@ -1682,29 +1799,39 @@ Execute::drain()
{
DPRINTF(Drain, "MinorExecute drain\n");
- if (drainState == NotDraining) {
- cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+ if (executeInfo[tid].drainState == NotDraining) {
+ cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
- /* Go to DrainCurrentInst if we're between microops
- * or waiting on an unbufferable memory operation.
- * Otherwise we can go straight to DrainHaltFetch
- */
- if (isInbetweenInsts())
- setDrainState(DrainHaltFetch);
- else
- setDrainState(DrainCurrentInst);
+ /* Go to DrainCurrentInst if we're between microops
+ * or waiting on an unbufferable memory operation.
+ * Otherwise we can go straight to DrainHaltFetch
+ */
+ if (isInbetweenInsts(tid))
+ setDrainState(tid, DrainHaltFetch);
+ else
+ setDrainState(tid, DrainCurrentInst);
+ }
}
-
return (isDrained() ? 0 : 1);
}
bool
Execute::isDrained()
{
- return drainState == DrainAllInsts &&
- inputBuffer.empty() &&
- inFlightInsts->empty() &&
- lsq.isDrained();
+ if (!lsq.isDrained())
+ return false;
+
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+ if (executeInfo[tid].drainState != DrainAllInsts ||
+ !inputBuffer[tid].empty() ||
+ !executeInfo[tid].inFlightInsts->empty()) {
+
+ return false;
+ }
+ }
+
+ return true;
}
Execute::~Execute()
@@ -1712,13 +1839,14 @@ Execute::~Execute()
for (unsigned int i = 0; i < numFuncUnits; i++)
delete funcUnits[i];
- delete inFlightInsts;
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++)
+ delete executeInfo[tid].inFlightInsts;
}
bool
Execute::instIsRightStream(MinorDynInstPtr inst)
{
- return inst->id.streamSeqNum == streamSeqNum;
+ return inst->id.streamSeqNum == executeInfo[inst->id.threadId].streamSeqNum;
}
bool
@@ -1726,8 +1854,8 @@ Execute::instIsHeadInst(MinorDynInstPtr inst)
{
bool ret = false;
- if (!inFlightInsts->empty())
- ret = inFlightInsts->front().inst->id == inst->id;
+ if (!executeInfo[inst->id.threadId].inFlightInsts->empty())
+ ret = executeInfo[inst->id.threadId].inFlightInsts->front().inst->id == inst->id;
return ret;
}
diff --git a/src/cpu/minor/execute.hh b/src/cpu/minor/execute.hh
index 8cd026534..165a5bae4 100644
--- a/src/cpu/minor/execute.hh
+++ b/src/cpu/minor/execute.hh
@@ -116,13 +116,13 @@ class Execute : public Named
LSQ lsq;
/** Scoreboard of instruction dependencies */
- Scoreboard scoreboard;
+ std::vector<Scoreboard> scoreboard;
/** The execution functional units */
std::vector<FUPipeline *> funcUnits;
public: /* Public for Pipeline to be able to pass it to Decode */
- InputBuffer<ForwardInstData> inputBuffer;
+ std::vector<InputBuffer<ForwardInstData>> inputBuffer;
protected:
/** Stage cycle-by-cycle state */
@@ -143,48 +143,75 @@ class Execute : public Named
DrainAllInsts /* Discarding all remaining insts */
};
- /** In-order instructions either in FUs or the LSQ */
- Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFlightInsts;
-
- /** Memory ref instructions still in the FUs */
- Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFUMemInsts;
-
- /** Index that we've completed upto in getInput data. We can say we're
- * popInput when this equals getInput()->width() */
- unsigned int inputIndex;
-
- /** The last commit was the end of a full instruction so an interrupt
- * can safely happen */
- bool lastCommitWasEndOfMacroop;
-
- /** Structure for reporting insts currently being processed/retired
- * for MinorTrace */
- ForwardInstData instsBeingCommitted;
-
- /** Source of sequence number for instuction streams. Increment this and
- * pass to fetch whenever an instruction stream needs to be changed.
- * For any more complicated behaviour (e.g. speculation) there'll need
- * to be another plan. THREAD, need one for each thread */
- InstSeqNum streamSeqNum;
+ struct ExecuteThreadInfo {
+ /** Constructor */
+ ExecuteThreadInfo(unsigned int insts_committed) :
+ inputIndex(0),
+ lastCommitWasEndOfMacroop(true),
+ instsBeingCommitted(insts_committed),
+ streamSeqNum(InstId::firstStreamSeqNum),
+ lastPredictionSeqNum(InstId::firstPredictionSeqNum),
+ drainState(NotDraining)
+ { }
+
+ ExecuteThreadInfo(const ExecuteThreadInfo& other) :
+ inputIndex(other.inputIndex),
+ lastCommitWasEndOfMacroop(other.lastCommitWasEndOfMacroop),
+ instsBeingCommitted(other.instsBeingCommitted),
+ streamSeqNum(other.streamSeqNum),
+ lastPredictionSeqNum(other.lastPredictionSeqNum),
+ drainState(other.drainState)
+ { }
+
+ /** In-order instructions either in FUs or the LSQ */
+ Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFlightInsts;
+
+ /** Memory ref instructions still in the FUs */
+ Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFUMemInsts;
+
+ /** Index that we've completed upto in getInput data. We can say we're
+ * popInput when this equals getInput()->width() */
+ unsigned int inputIndex;
+
+ /** The last commit was the end of a full instruction so an interrupt
+ * can safely happen */
+ bool lastCommitWasEndOfMacroop;
+
+ /** Structure for reporting insts currently being processed/retired
+ * for MinorTrace */
+ ForwardInstData instsBeingCommitted;
+
+ /** Source of sequence number for instuction streams. Increment this and
+ * pass to fetch whenever an instruction stream needs to be changed.
+ * For any more complicated behaviour (e.g. speculation) there'll need
+ * to be another plan. */
+ InstSeqNum streamSeqNum;
+
+ /** A prediction number for use where one isn't available from an
+ * instruction. This is harvested from committed instructions.
+ * This isn't really needed as the streamSeqNum will change on
+ * a branch, but it minimises disruption in stream identification */
+ InstSeqNum lastPredictionSeqNum;
+
+ /** State progression for draining NotDraining -> ... -> DrainAllInsts */
+ DrainState drainState;
+ };
- /** A prediction number for use where one isn't available from an
- * instruction. This is harvested from committed instructions.
- * This isn't really needed as the streamSeqNum will change on
- * a branch, but it minimises disruption in stream identification */
- InstSeqNum lastPredictionSeqNum;
+ std::vector<ExecuteThreadInfo> executeInfo;
- /** State progression for draining NotDraining -> ... -> DrainAllInsts */
- DrainState drainState;
+ ThreadID interruptPriority;
+ ThreadID issuePriority;
+ ThreadID commitPriority;
protected:
friend std::ostream &operator <<(std::ostream &os, DrainState state);
/** Get a piece of data to work on from the inputBuffer, or 0 if there
* is no data. */
- const ForwardInstData *getInput();
+ const ForwardInstData *getInput(ThreadID tid);
/** Pop an element off the input buffer, if there are any */
- void popInput();
+ void popInput(ThreadID tid);
/** Generate Branch data based (into branch) on an observed (or not)
* change in PC while executing an instruction.
@@ -193,7 +220,7 @@ class Execute : public Named
/** Actually create a branch to communicate to Fetch1/Fetch2 and,
* if that is a stream-changing branch update the streamSeqNum */
- void updateBranchData(BranchData::Reason reason,
+ void updateBranchData(ThreadID tid, BranchData::Reason reason,
MinorDynInstPtr inst, const TheISA::PCState &target,
BranchData &branch);
@@ -224,23 +251,32 @@ class Execute : public Named
bool isInterrupted(ThreadID thread_id) const;
/** Are we between instructions? Can we be interrupted? */
- bool isInbetweenInsts() const;
+ bool isInbetweenInsts(ThreadID thread_id) const;
/** Act on an interrupt. Returns true if an interrupt was actually
* signalled and invoked */
bool takeInterrupt(ThreadID thread_id, BranchData &branch);
/** Try and issue instructions from the inputBuffer */
- unsigned int issue(bool only_issue_microops);
+ unsigned int issue(ThreadID thread_id);
/** Try to act on PC-related events. Returns true if any were
* executed */
- bool tryPCEvents();
+ bool tryPCEvents(ThreadID thread_id);
/** Do the stats handling and instruction count and PC event events
* related to the new instruction/op counts */
void doInstCommitAccounting(MinorDynInstPtr inst);
+ /** Check all threads for possible interrupts. If interrupt is taken,
+ * returns the tid of the thread. interrupted is set if any thread
+ * has an interrupt, irrespective of if it is taken */
+ ThreadID checkInterrupts(BranchData& branch, bool& interrupted);
+
+ /** Checks if a specific thread has an interrupt. No action is taken.
+ * this is used for determining if a thread should only commit microops */
+ bool hasInterrupt(ThreadID thread_id);
+
/** Commit a single instruction. Returns true if the instruction being
* examined was completed (fully executed, discarded, or initiated a
* memory access), false if there is still some processing to do.
@@ -266,10 +302,16 @@ class Execute : public Named
* If discard is true then discard all instructions rather than
* committing.
* branch is set to any branch raised during commit. */
- void commit(bool only_commit_microops, bool discard, BranchData &branch);
+ void commit(ThreadID thread_id, bool only_commit_microops, bool discard,
+ BranchData &branch);
/** Set the drain state (with useful debugging messages) */
- void setDrainState(DrainState state);
+ void setDrainState(ThreadID thread_id, DrainState state);
+
+ /** Use the current threading policy to determine the next thread to
+ * decode from. */
+ ThreadID getCommittingThread();
+ ThreadID getIssuingThread();
public:
Execute(const std::string &name_,
@@ -282,12 +324,6 @@ class Execute : public Named
public:
- /** Cause Execute to issue an UnpredictedBranch (or WakeupFetch if
- * that was passed as the reason) to Fetch1 to wake the
- * system up (using the PC from the thread context). */
- void wakeupFetch(BranchData::Reason reason =
- BranchData::UnpredictedBranch);
-
/** Returns the DcachePort owned by this Execute to pass upwards */
MinorCPU::MinorCPUPort &getDcachePort();
diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc
index d19d7b042..f4f120534 100644
--- a/src/cpu/minor/fetch1.cc
+++ b/src/cpu/minor/fetch1.cc
@@ -57,7 +57,7 @@ Fetch1::Fetch1(const std::string &name_,
Latch<BranchData>::Output inp_,
Latch<ForwardLineData>::Input out_,
Latch<BranchData>::Output prediction_,
- Reservable &next_stage_input_buffer) :
+ std::vector<InputBuffer<ForwardLineData>> &next_stage_input_buffer) :
Named(name_),
cpu(cpu_),
inp(inp_),
@@ -68,11 +68,8 @@ Fetch1::Fetch1(const std::string &name_,
lineSnap(params.fetch1LineSnapWidth),
maxLineWidth(params.fetch1LineWidth),
fetchLimit(params.fetch1FetchLimit),
- state(FetchWaitingForPC),
- pc(0),
- streamSeqNum(InstId::firstStreamSeqNum),
- predictionSeqNum(InstId::firstPredictionSeqNum),
- blocked(false),
+ fetchInfo(params.numThreads),
+ threadPriority(0),
requests(name_ + ".requests", "lines", params.fetch1FetchLimit),
transfers(name_ + ".transfers", "lines", params.fetch1FetchLimit),
icacheState(IcacheRunning),
@@ -114,32 +111,67 @@ Fetch1::Fetch1(const std::string &name_,
}
}
+inline ThreadID
+Fetch1::getScheduledThread()
+{
+ /* Select thread via policy. */
+ std::vector<ThreadID> priority_list;
+
+ switch (cpu.threadPolicy) {
+ case Enums::SingleThreaded:
+ priority_list.push_back(0);
+ break;
+ case Enums::RoundRobin:
+ priority_list = cpu.roundRobinPriority(threadPriority);
+ break;
+ case Enums::Random:
+ priority_list = cpu.randomPriority();
+ break;
+ default:
+ panic("Unknown fetch policy");
+ }
+
+ for (auto tid : priority_list) {
+ if (cpu.getContext(tid)->status() == ThreadContext::Active &&
+ !fetchInfo[tid].blocked &&
+ fetchInfo[tid].state == FetchRunning) {
+ threadPriority = tid;
+ return tid;
+ }
+ }
+
+ return InvalidThreadID;
+}
+
void
-Fetch1::fetchLine()
+Fetch1::fetchLine(ThreadID tid)
{
+ /* Reference the currently used thread state. */
+ Fetch1ThreadInfo &thread = fetchInfo[tid];
+
/* If line_offset != 0, a request is pushed for the remainder of the
* line. */
/* Use a lower, sizeof(MachInst) aligned address for the fetch */
- Addr aligned_pc = pc.instAddr() & ~((Addr) lineSnap - 1);
+ Addr aligned_pc = thread.pc.instAddr() & ~((Addr) lineSnap - 1);
unsigned int line_offset = aligned_pc % lineSnap;
unsigned int request_size = maxLineWidth - line_offset;
/* Fill in the line's id */
- InstId request_id(0 /* thread */,
- streamSeqNum, predictionSeqNum,
+ InstId request_id(tid,
+ thread.streamSeqNum, thread.predictionSeqNum,
lineSeqNum);
- FetchRequestPtr request = new FetchRequest(*this, request_id, pc);
+ FetchRequestPtr request = new FetchRequest(*this, request_id, thread.pc);
DPRINTF(Fetch, "Inserting fetch into the fetch queue "
"%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n",
- request_id, aligned_pc, pc, line_offset, request_size);
+ request_id, aligned_pc, thread.pc, line_offset, request_size);
- request->request.setContext(cpu.threads[0]->getTC()->contextId());
+ request->request.setContext(cpu.threads[tid]->getTC()->contextId());
request->request.setVirt(0 /* asid */,
aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(),
/* I've no idea why we need the PC, but give it */
- pc.instAddr());
+ thread.pc.instAddr());
DPRINTF(Fetch, "Submitting ITLB request\n");
numFetchesInITLB++;
@@ -165,12 +197,12 @@ Fetch1::fetchLine()
* reliable 'new' PC if the next line has a new stream sequence number. */
#if THE_ISA == ALPHA_ISA
/* Restore the low bits of the PC used as address space flags */
- Addr pc_low_bits = pc.instAddr() &
+ Addr pc_low_bits = thread.pc.instAddr() &
((Addr) (1 << sizeof(TheISA::MachInst)) - 1);
- pc.set(aligned_pc + request_size + pc_low_bits);
+ thread.pc.set(aligned_pc + request_size + pc_low_bits);
#else
- pc.set(aligned_pc + request_size);
+ thread.pc.set(aligned_pc + request_size);
#endif
}
@@ -454,46 +486,58 @@ operator <<(std::ostream &os, Fetch1::FetchState state)
void
Fetch1::changeStream(const BranchData &branch)
{
+ Fetch1ThreadInfo &thread = fetchInfo[branch.threadId];
+
updateExpectedSeqNums(branch);
/* Start fetching again if we were stopped */
switch (branch.reason) {
case BranchData::SuspendThread:
- DPRINTF(Fetch, "Suspending fetch: %s\n", branch);
- state = FetchWaitingForPC;
+ {
+ if (thread.wakeupGuard) {
+ DPRINTF(Fetch, "Not suspending fetch due to guard: %s\n",
+ branch);
+ } else {
+ DPRINTF(Fetch, "Suspending fetch: %s\n", branch);
+ thread.state = FetchWaitingForPC;
+ }
+ }
break;
case BranchData::HaltFetch:
DPRINTF(Fetch, "Halting fetch\n");
- state = FetchHalted;
+ thread.state = FetchHalted;
break;
default:
DPRINTF(Fetch, "Changing stream on branch: %s\n", branch);
- state = FetchRunning;
+ thread.state = FetchRunning;
break;
}
- pc = branch.target;
+ thread.pc = branch.target;
}
void
Fetch1::updateExpectedSeqNums(const BranchData &branch)
{
+ Fetch1ThreadInfo &thread = fetchInfo[branch.threadId];
+
DPRINTF(Fetch, "Updating streamSeqNum from: %d to %d,"
" predictionSeqNum from: %d to %d\n",
- streamSeqNum, branch.newStreamSeqNum,
- predictionSeqNum, branch.newPredictionSeqNum);
+ thread.streamSeqNum, branch.newStreamSeqNum,
+ thread.predictionSeqNum, branch.newPredictionSeqNum);
/* Change the stream */
- streamSeqNum = branch.newStreamSeqNum;
+ thread.streamSeqNum = branch.newStreamSeqNum;
/* Update the prediction. Note that it's possible for this to
* actually set the prediction to an *older* value if new
* predictions have been discarded by execute */
- predictionSeqNum = branch.newPredictionSeqNum;
+ thread.predictionSeqNum = branch.newPredictionSeqNum;
}
void
Fetch1::processResponse(Fetch1::FetchRequestPtr response,
ForwardLineData &line)
{
+ Fetch1ThreadInfo &thread = fetchInfo[response->id.threadId];
PacketPtr packet = response->packet;
/* Pass the prefetch abort (if any) on to Fetch2 in a ForwardLineData
@@ -514,7 +558,7 @@ Fetch1::processResponse(Fetch1::FetchRequestPtr response,
* can't (currently) selectively remove this stream from the queues */
DPRINTF(Fetch, "Stopping line fetch because of fault: %s\n",
response->fault->name());
- state = Fetch1::FetchWaitingForPC;
+ thread.state = Fetch1::FetchWaitingForPC;
} else {
line.adoptPacketData(packet);
/* Null the response's packet to prevent the response from trying to
@@ -532,61 +576,86 @@ Fetch1::evaluate()
assert(line_out.isBubble());
- blocked = !nextStageReserve.canReserve();
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++)
+ fetchInfo[tid].blocked = !nextStageReserve[tid].canReserve();
- /* Are we changing stream? Look to the Execute branches first, then
- * to predicted changes of stream from Fetch2 */
- /* @todo, find better way to express ignoring branch predictions */
- if (execute_branch.isStreamChange() &&
- execute_branch.reason != BranchData::BranchPrediction)
- {
- if (state == FetchHalted) {
- if (execute_branch.reason == BranchData::WakeupFetch) {
- DPRINTF(Fetch, "Waking up fetch: %s\n", execute_branch);
+ /** Are both branches from later stages valid and for the same thread? */
+ if (execute_branch.threadId != InvalidThreadID &&
+ execute_branch.threadId == fetch2_branch.threadId) {
+
+ Fetch1ThreadInfo &thread = fetchInfo[execute_branch.threadId];
+
+ /* Are we changing stream? Look to the Execute branches first, then
+ * to predicted changes of stream from Fetch2 */
+ if (execute_branch.isStreamChange()) {
+ if (thread.state == FetchHalted) {
+ DPRINTF(Fetch, "Halted, ignoring branch: %s\n", execute_branch);
+ } else {
changeStream(execute_branch);
+ }
+
+ if (!fetch2_branch.isBubble()) {
+ DPRINTF(Fetch, "Ignoring simultaneous prediction: %s\n",
+ fetch2_branch);
+ }
+
+ /* The streamSeqNum tagging in request/response ->req should handle
+ * discarding those requests when we get to them. */
+ } else if (thread.state != FetchHalted && fetch2_branch.isStreamChange()) {
+ /* Handle branch predictions by changing the instruction source
+ * if we're still processing the same stream (as set by streamSeqNum)
+ * as the one of the prediction.
+ */
+ if (fetch2_branch.newStreamSeqNum != thread.streamSeqNum) {
+ DPRINTF(Fetch, "Not changing stream on prediction: %s,"
+ " streamSeqNum mismatch\n",
+ fetch2_branch);
} else {
- DPRINTF(Fetch, "Halted, ignoring branch: %s\n",
- execute_branch);
+ changeStream(fetch2_branch);
}
- } else {
- changeStream(execute_branch);
}
+ } else {
+ /* Fetch2 and Execute branches are for different threads */
+ if (execute_branch.threadId != InvalidThreadID &&
+ execute_branch.isStreamChange()) {
- if (!fetch2_branch.isBubble()) {
- DPRINTF(Fetch, "Ignoring simultaneous prediction: %s\n",
- fetch2_branch);
+ if (fetchInfo[execute_branch.threadId].state == FetchHalted) {
+ DPRINTF(Fetch, "Halted, ignoring branch: %s\n", execute_branch);
+ } else {
+ changeStream(execute_branch);
+ }
}
- /* The streamSeqNum tagging in request/response ->req should handle
- * discarding those requests when we get to them. */
- } else if (state != FetchHalted && fetch2_branch.isStreamChange()) {
- /* Handle branch predictions by changing the instruction source
- * if we're still processing the same stream (as set by streamSeqNum)
- * as the one of the prediction.
- */
- if (fetch2_branch.newStreamSeqNum != streamSeqNum) {
- DPRINTF(Fetch, "Not changing stream on prediction: %s,"
- " streamSeqNum mismatch\n",
- fetch2_branch);
- } else {
- changeStream(fetch2_branch);
+ if (fetch2_branch.threadId != InvalidThreadID &&
+ fetch2_branch.isStreamChange()) {
+
+ if (fetchInfo[fetch2_branch.threadId].state == FetchHalted) {
+ DPRINTF(Fetch, "Halted, ignoring branch: %s\n", fetch2_branch);
+ } else if (fetch2_branch.newStreamSeqNum != fetchInfo[fetch2_branch.threadId].streamSeqNum) {
+ DPRINTF(Fetch, "Not changing stream on prediction: %s,"
+ " streamSeqNum mismatch\n", fetch2_branch);
+ } else {
+ changeStream(fetch2_branch);
+ }
}
}
- /* Can we fetch? */
- /* The bare minimum requirements for initiating a fetch */
- /* THREAD need to handle multiple threads */
- if (state == FetchRunning && /* We are actually fetching */
- !blocked && /* Space in the Fetch2 inputBuffer */
- /* The thread we're going to fetch for (thread 0), is active */
- cpu.getContext(0)->status() == ThreadContext::Active &&
- numInFlightFetches() < fetchLimit)
- {
- fetchLine();
- /* Take up a slot in the fetch queue */
- nextStageReserve.reserve();
+ if (numInFlightFetches() < fetchLimit) {
+ ThreadID fetch_tid = getScheduledThread();
+
+ if (fetch_tid != InvalidThreadID) {
+ DPRINTF(Fetch, "Fetching from thread %d\n", fetch_tid);
+
+ /* Generate fetch to selected thread */
+ fetchLine(fetch_tid);
+ /* Take up a slot in the fetch queue */
+ nextStageReserve[fetch_tid].reserve();
+ } else {
+ DPRINTF(Fetch, "No active threads available to fetch from\n");
+ }
}
+
/* Halting shouldn't prevent fetches in flight from being processed */
/* Step fetches through the icachePort queues and memory system */
stepQueues();
@@ -599,9 +668,9 @@ Fetch1::evaluate()
Fetch1::FetchRequestPtr response = transfers.front();
if (response->isDiscardable()) {
- nextStageReserve.freeReservation();
+ nextStageReserve[response->id.threadId].freeReservation();
- DPRINTF(Fetch, "Discarding translated fetch at it's for"
+ DPRINTF(Fetch, "Discarding translated fetch as it's for"
" an old stream\n");
/* Wake up next cycle just in case there was some other
@@ -626,19 +695,49 @@ Fetch1::evaluate()
* generate a line output (tested just above) or to initiate a memory
* fetch which will signal activity when it returns/needs stepping
* between queues */
+
+
+ /* This looks hackish. And it is, but there doesn't seem to be a better
+ * way to do this. The signal from commit to suspend fetch takes 1
+ * clock cycle to propagate to fetch. However, a legitimate wakeup
+ * may occur between cycles from the memory system. Thus wakeup guard
+ * prevents us from suspending in that case. */
+
+ for (auto& thread : fetchInfo) {
+ thread.wakeupGuard = false;
+ }
+}
+
+void
+Fetch1::wakeupFetch(ThreadID tid)
+{
+ ThreadContext *thread_ctx = cpu.getContext(tid);
+ Fetch1ThreadInfo &thread = fetchInfo[tid];
+ thread.pc = thread_ctx->pcState();
+ thread.state = FetchRunning;
+ thread.wakeupGuard = true;
+ DPRINTF(Fetch, "[tid:%d]: Changing stream wakeup %s\n",
+ tid, thread_ctx->pcState());
+
+ cpu.wakeupOnEvent(Pipeline::Fetch1StageId);
}
bool
Fetch1::isDrained()
{
- DPRINTF(Drain, "isDrained %s %s%s\n",
- state,
- (numInFlightFetches() == 0 ? "" : "inFlightFetches "),
- ((*out.inputWire).isBubble() ? "" : "outputtingLine"));
-
- return state == FetchHalted &&
- numInFlightFetches() == 0 &&
- (*out.inputWire).isBubble();
+ bool drained = numInFlightFetches() == 0 && (*out.inputWire).isBubble();
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+ Fetch1ThreadInfo &thread = fetchInfo[tid];
+ DPRINTF(Drain, "isDrained[tid:%d]: %s %s%s\n",
+ tid,
+ thread.state == FetchHalted,
+ (numInFlightFetches() == 0 ? "" : "inFlightFetches "),
+ ((*out.inputWire).isBubble() ? "" : "outputtingLine"));
+
+ drained = drained && thread.state == FetchHalted;
+ }
+
+ return drained;
}
void
@@ -649,26 +748,32 @@ Fetch1::FetchRequest::reportData(std::ostream &os) const
bool Fetch1::FetchRequest::isDiscardable() const
{
+ Fetch1ThreadInfo &thread = fetch.fetchInfo[id.threadId];
+
/* Can't discard lines in TLB/memory */
return state != InTranslation && state != RequestIssuing &&
- (id.streamSeqNum != fetch.streamSeqNum ||
- id.predictionSeqNum != fetch.predictionSeqNum);
+ (id.streamSeqNum != thread.streamSeqNum ||
+ id.predictionSeqNum != thread.predictionSeqNum);
}
void
Fetch1::minorTrace() const
{
+ // TODO: Un-bork minorTrace for THREADS
+ // bork bork bork
+ const Fetch1ThreadInfo &thread = fetchInfo[0];
+
std::ostringstream data;
- if (blocked)
+ if (thread.blocked)
data << 'B';
else
(*out.inputWire).reportData(data);
MINORTRACE("state=%s icacheState=%s in_tlb_mem=%s/%s"
- " streamSeqNum=%d lines=%s\n", state, icacheState,
+ " streamSeqNum=%d lines=%s\n", thread.state, icacheState,
numFetchesInITLB, numFetchesInMemorySystem,
- streamSeqNum, data.str());
+ thread.streamSeqNum, data.str());
requests.minorTrace();
transfers.minorTrace();
}
diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh
index d4a35c468..cf6c9d254 100644
--- a/src/cpu/minor/fetch1.hh
+++ b/src/cpu/minor/fetch1.hh
@@ -197,7 +197,7 @@ class Fetch1 : public Named
Latch<BranchData>::Output prediction;
/** Interface to reserve space in the next stage */
- Reservable &nextStageReserve;
+ std::vector<InputBuffer<ForwardLineData>> &nextStageReserve;
/** IcachePort to pass to the CPU. Fetch1 is the only module that uses
* it. */
@@ -233,26 +233,53 @@ class Fetch1 : public Named
/** Stage cycle-by-cycle state */
- FetchState state;
+ struct Fetch1ThreadInfo {
- /** Fetch PC value. This is updated by branches from Execute, branch
- * prediction targets from Fetch2 and by incrementing it as we fetch
- * lines subsequent to those two sources. */
- TheISA::PCState pc;
+ /** Consturctor to initialize all fields. */
+ Fetch1ThreadInfo() :
+ state(FetchWaitingForPC),
+ pc(TheISA::PCState(0)),
+ streamSeqNum(InstId::firstStreamSeqNum),
+ predictionSeqNum(InstId::firstPredictionSeqNum),
+ blocked(false),
+ wakeupGuard(false)
+ { }
+
+ Fetch1ThreadInfo(const Fetch1ThreadInfo& other) :
+ state(other.state),
+ pc(other.pc),
+ streamSeqNum(other.streamSeqNum),
+ predictionSeqNum(other.predictionSeqNum),
+ blocked(other.blocked)
+ { }
+
+ FetchState state;
+
+ /** Fetch PC value. This is updated by branches from Execute, branch
+ * prediction targets from Fetch2 and by incrementing it as we fetch
+ * lines subsequent to those two sources. */
+ TheISA::PCState pc;
- /** Stream sequence number. This changes on request from Execute and is
- * used to tag instructions by the fetch stream to which they belong.
- * Execute originates new prediction sequence numbers. */
- InstSeqNum streamSeqNum;
+ /** Stream sequence number. This changes on request from Execute and is
+ * used to tag instructions by the fetch stream to which they belong.
+ * Execute originates new prediction sequence numbers. */
+ InstSeqNum streamSeqNum;
- /** Prediction sequence number. This changes when requests from Execute
- * or Fetch2 ask for a change of fetch address and is used to tag lines
- * by the prediction to which they belong. Fetch2 originates
- * prediction sequence numbers. */
- InstSeqNum predictionSeqNum;
+ /** Prediction sequence number. This changes when requests from Execute
+ * or Fetch2 ask for a change of fetch address and is used to tag lines
+ * by the prediction to which they belong. Fetch2 originates
+ * prediction sequence numbers. */
+ InstSeqNum predictionSeqNum;
- /** Blocked indication for report */
- bool blocked;
+ /** Blocked indication for report */
+ bool blocked;
+
+ /** Signal to guard against sleeping first cycle of wakeup */
+ bool wakeupGuard;
+ };
+
+ std::vector<Fetch1ThreadInfo> fetchInfo;
+ ThreadID threadPriority;
/** State of memory access for head instruction fetch */
enum IcacheState
@@ -307,10 +334,15 @@ class Fetch1 : public Named
friend std::ostream &operator <<(std::ostream &os,
IcacheState state);
+
+ /** Use the current threading policy to determine the next thread to
+ * fetch from. */
+ ThreadID getScheduledThread();
+
/** Insert a line fetch into the requests. This can be a partial
* line request where the given address has a non-0 offset into a
* line. */
- void fetchLine();
+ void fetchLine(ThreadID tid);
/** Try and issue a fetch for a translated request at the
* head of the requests queue. Also tries to move the request
@@ -354,7 +386,7 @@ class Fetch1 : public Named
Latch<BranchData>::Output inp_,
Latch<ForwardLineData>::Input out_,
Latch<BranchData>::Output prediction_,
- Reservable &next_stage_input_buffer);
+ std::vector<InputBuffer<ForwardLineData>> &next_stage_input_buffer);
public:
/** Returns the IcachePort owned by this Fetch1 */
@@ -363,6 +395,9 @@ class Fetch1 : public Named
/** Pass on input/buffer data to the output if you can */
void evaluate();
+ /** Initiate fetch1 fetching */
+ void wakeupFetch(ThreadID tid);
+
void minorTrace() const;
/** Is this stage drained? For Fetch1, draining is initiated by
diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc
index cb45f16e3..ae02b1c22 100644
--- a/src/cpu/minor/fetch2.cc
+++ b/src/cpu/minor/fetch2.cc
@@ -58,7 +58,7 @@ Fetch2::Fetch2(const std::string &name,
Latch<BranchData>::Output branchInp_,
Latch<BranchData>::Input predictionOut_,
Latch<ForwardInstData>::Input out_,
- Reservable &next_stage_input_buffer) :
+ std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) :
Named(name),
cpu(cpu_),
inp(inp_),
@@ -69,15 +69,8 @@ Fetch2::Fetch2(const std::string &name,
outputWidth(params.decodeInputWidth),
processMoreThanOneInput(params.fetch2CycleInput),
branchPredictor(*params.branchPred),
- inputBuffer(name + ".inputBuffer", "lines", params.fetch2InputBufferSize),
- inputIndex(0),
- pc(TheISA::PCState(0)),
- havePC(false),
- lastStreamSeqNum(InstId::firstStreamSeqNum),
- fetchSeqNum(InstId::firstFetchSeqNum),
- expectedStreamSeqNum(InstId::firstStreamSeqNum),
- predictionSeqNum(InstId::firstPredictionSeqNum),
- blocked(false)
+ fetchInfo(params.numThreads),
+ threadPriority(0)
{
if (outputWidth < 1)
fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth);
@@ -86,38 +79,46 @@ Fetch2::Fetch2(const std::string &name,
fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name,
params.fetch2InputBufferSize);
}
+
+ /* Per-thread input buffers */
+ for (ThreadID tid = 0; tid < params.numThreads; tid++) {
+ inputBuffer.push_back(
+ InputBuffer<ForwardLineData>(
+ name + ".inputBuffer" + std::to_string(tid), "lines",
+ params.fetch2InputBufferSize));
+ }
}
const ForwardLineData *
-Fetch2::getInput()
+Fetch2::getInput(ThreadID tid)
{
/* Get a line from the inputBuffer to work with */
- if (!inputBuffer.empty()) {
- return &(inputBuffer.front());
+ if (!inputBuffer[tid].empty()) {
+ return &(inputBuffer[tid].front());
} else {
return NULL;
}
}
void
-Fetch2::popInput()
+Fetch2::popInput(ThreadID tid)
{
- if (!inputBuffer.empty()) {
- inputBuffer.front().freeLine();
- inputBuffer.pop();
+ if (!inputBuffer[tid].empty()) {
+ inputBuffer[tid].front().freeLine();
+ inputBuffer[tid].pop();
}
- inputIndex = 0;
+ fetchInfo[tid].inputIndex = 0;
}
void
-Fetch2::dumpAllInput()
+Fetch2::dumpAllInput(ThreadID tid)
{
DPRINTF(Fetch, "Dumping whole input buffer\n");
- while (!inputBuffer.empty())
- popInput();
+ while (!inputBuffer[tid].empty())
+ popInput(tid);
- inputIndex = 0;
+ fetchInfo[tid].inputIndex = 0;
}
void
@@ -139,9 +140,6 @@ Fetch2::updateBranchPrediction(const BranchData &branch)
case BranchData::SuspendThread:
/* Don't need to act on suspends */
break;
- case BranchData::WakeupFetch:
- /* Don't need to act on wakeups, no instruction tied to action. */
- break;
case BranchData::HaltFetch:
/* Don't need to act on fetch wakeup */
break;
@@ -180,6 +178,7 @@ Fetch2::updateBranchPrediction(const BranchData &branch)
void
Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch)
{
+ Fetch2ThreadInfo &thread = fetchInfo[inst->id.threadId];
TheISA::PCState inst_pc = inst->pc;
assert(!inst->predictedTaken);
@@ -209,35 +208,37 @@ Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch)
if (inst->predictedTaken) {
/* Update the predictionSeqNum and remember the streamSeqNum that it
* was associated with */
- expectedStreamSeqNum = inst->id.streamSeqNum;
+ thread.expectedStreamSeqNum = inst->id.streamSeqNum;
BranchData new_branch = BranchData(BranchData::BranchPrediction,
- inst->id.streamSeqNum, predictionSeqNum + 1,
+ inst->id.threadId,
+ inst->id.streamSeqNum, thread.predictionSeqNum + 1,
inst->predictedTarget, inst);
/* Mark with a new prediction number by the stream number of the
* instruction causing the prediction */
- predictionSeqNum++;
+ thread.predictionSeqNum++;
branch = new_branch;
DPRINTF(Branch, "Branch predicted taken inst: %s target: %s"
" new predictionSeqNum: %d\n",
- *inst, inst->predictedTarget, predictionSeqNum);
+ *inst, inst->predictedTarget, thread.predictionSeqNum);
}
}
void
Fetch2::evaluate()
{
- inputBuffer.setTail(*inp.outputWire);
+ /* Push input onto appropriate input buffer */
+ if (!inp.outputWire->isBubble())
+ inputBuffer[inp.outputWire->id.threadId].setTail(*inp.outputWire);
+
ForwardInstData &insts_out = *out.inputWire;
BranchData prediction;
BranchData &branch_inp = *branchInp.outputWire;
assert(insts_out.isBubble());
- blocked = false;
-
/* React to branches from Execute to update local branch prediction
* structures */
updateBranchPrediction(branch_inp);
@@ -247,39 +248,48 @@ Fetch2::evaluate()
if (branch_inp.isStreamChange()) {
DPRINTF(Fetch, "Dumping all input as a stream changing branch"
" has arrived\n");
- dumpAllInput();
- havePC = false;
+ dumpAllInput(branch_inp.threadId);
+ fetchInfo[branch_inp.threadId].havePC = false;
}
+ assert(insts_out.isBubble());
/* Even when blocked, clear out input lines with the wrong
* prediction sequence number */
- {
- const ForwardLineData *line_in = getInput();
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+ Fetch2ThreadInfo &thread = fetchInfo[tid];
+
+ thread.blocked = !nextStageReserve[tid].canReserve();
+
+ const ForwardLineData *line_in = getInput(tid);
while (line_in &&
- expectedStreamSeqNum == line_in->id.streamSeqNum &&
- predictionSeqNum != line_in->id.predictionSeqNum)
+ thread.expectedStreamSeqNum == line_in->id.streamSeqNum &&
+ thread.predictionSeqNum != line_in->id.predictionSeqNum)
{
DPRINTF(Fetch, "Discarding line %s"
" due to predictionSeqNum mismatch (expected: %d)\n",
- line_in->id, predictionSeqNum);
+ line_in->id, thread.predictionSeqNum);
- popInput();
- havePC = false;
+ popInput(tid);
+ fetchInfo[tid].havePC = false;
if (processMoreThanOneInput) {
DPRINTF(Fetch, "Wrapping\n");
- line_in = getInput();
+ line_in = getInput(tid);
} else {
line_in = NULL;
}
}
}
- if (!nextStageReserve.canReserve()) {
- blocked = true;
- } else {
- const ForwardLineData *line_in = getInput();
+ ThreadID tid = getScheduledThread();
+ DPRINTF(Fetch, "Scheduled Thread: %d\n", tid);
+
+ assert(insts_out.isBubble());
+ if (tid != InvalidThreadID) {
+ Fetch2ThreadInfo &fetch_info = fetchInfo[tid];
+
+ const ForwardLineData *line_in = getInput(tid);
unsigned int output_index = 0;
@@ -288,7 +298,7 @@ Fetch2::evaluate()
* for faulting lines */
while (line_in &&
(line_in->isFault() ||
- inputIndex < line_in->lineWidth) && /* More input */
+ fetch_info.inputIndex < line_in->lineWidth) && /* More input */
output_index < outputWidth && /* More output to fill */
prediction.isBubble() /* No predicted branch */)
{
@@ -298,26 +308,26 @@ Fetch2::evaluate()
/* Discard line due to prediction sequence number being wrong but
* without the streamSeqNum number having changed */
bool discard_line =
- expectedStreamSeqNum == line_in->id.streamSeqNum &&
- predictionSeqNum != line_in->id.predictionSeqNum;
+ fetch_info.expectedStreamSeqNum == line_in->id.streamSeqNum &&
+ fetch_info.predictionSeqNum != line_in->id.predictionSeqNum;
/* Set the PC if the stream changes. Setting havePC to false in
* a previous cycle handles all other change of flow of control
* issues */
- bool set_pc = lastStreamSeqNum != line_in->id.streamSeqNum;
+ bool set_pc = fetch_info.lastStreamSeqNum != line_in->id.streamSeqNum;
- if (!discard_line && (!havePC || set_pc)) {
+ if (!discard_line && (!fetch_info.havePC || set_pc)) {
/* Set the inputIndex to be the MachInst-aligned offset
* from lineBaseAddr of the new PC value */
- inputIndex =
+ fetch_info.inputIndex =
(line_in->pc.instAddr() & BaseCPU::PCMask) -
line_in->lineBaseAddr;
DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x"
" lineBaseAddr: 0x%x lineWidth: 0x%x\n",
- line_in->pc, inputIndex, line_in->lineBaseAddr,
+ line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr,
line_in->lineWidth);
- pc = line_in->pc;
- havePC = true;
+ fetch_info.pc = line_in->pc;
+ fetch_info.havePC = true;
decoder->reset();
}
@@ -330,7 +340,8 @@ Fetch2::evaluate()
* stream */
DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)"
" due to predictionSeqNum mismatch (expected: %d)\n",
- line_in->id, inputIndex, predictionSeqNum);
+ line_in->id, fetch_info.inputIndex,
+ fetch_info.predictionSeqNum);
} else if (line_in->isFault()) {
/* Pack a fault as a MinorDynInst with ->fault set */
@@ -339,13 +350,13 @@ Fetch2::evaluate()
dyn_inst = new MinorDynInst(line_in->id);
/* Fetch and prediction sequence numbers originate here */
- dyn_inst->id.fetchSeqNum = fetchSeqNum;
- dyn_inst->id.predictionSeqNum = predictionSeqNum;
+ dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum;
+ dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum;
/* To complete the set, test that exec sequence number has
* not been set */
assert(dyn_inst->id.execSeqNum == 0);
- dyn_inst->pc = pc;
+ dyn_inst->pc = fetch_info.pc;
/* Pack a faulting instruction but allow other
* instructions to be generated. (Fetch2 makes no
@@ -361,13 +372,14 @@ Fetch2::evaluate()
* assign */
inst_word = TheISA::gtoh(
*(reinterpret_cast<TheISA::MachInst *>
- (line + inputIndex)));
+ (line + fetch_info.inputIndex)));
if (!decoder->instReady()) {
- decoder->moreBytes(pc,
- line_in->lineBaseAddr + inputIndex, inst_word);
- DPRINTF(Fetch, "Offering MachInst to decoder"
- " addr: 0x%x\n", line_in->lineBaseAddr + inputIndex);
+ decoder->moreBytes(fetch_info.pc,
+ line_in->lineBaseAddr + fetch_info.inputIndex,
+ inst_word);
+ DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n",
+ line_in->lineBaseAddr + fetch_info.inputIndex);
}
/* Maybe make the above a loop to accomodate ISAs with
@@ -379,8 +391,8 @@ Fetch2::evaluate()
dyn_inst = new MinorDynInst(line_in->id);
/* Fetch and prediction sequence numbers originate here */
- dyn_inst->id.fetchSeqNum = fetchSeqNum;
- dyn_inst->id.predictionSeqNum = predictionSeqNum;
+ dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum;
+ dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum;
/* To complete the set, test that exec sequence number
* has not been set */
assert(dyn_inst->id.execSeqNum == 0);
@@ -388,17 +400,19 @@ Fetch2::evaluate()
/* Note that the decoder can update the given PC.
* Remember not to assign it until *after* calling
* decode */
- StaticInstPtr decoded_inst = decoder->decode(pc);
+ StaticInstPtr decoded_inst = decoder->decode(fetch_info.pc);
dyn_inst->staticInst = decoded_inst;
- dyn_inst->pc = pc;
+ dyn_inst->pc = fetch_info.pc;
+ DPRINTF(Fetch, "decoder inst %s\n", *dyn_inst);
+
DPRINTF(Fetch, "Instruction extracted from line %s"
" lineWidth: %d output_index: %d inputIndex: %d"
" pc: %s inst: %s\n",
line_in->id,
- line_in->lineWidth, output_index, inputIndex,
- pc, *dyn_inst);
+ line_in->lineWidth, output_index, fetch_info.inputIndex,
+ fetch_info.pc, *dyn_inst);
#if THE_ISA == X86_ISA || THE_ISA == ARM_ISA
/* In SE mode, it's possible to branch to a microop when
@@ -415,12 +429,12 @@ Fetch2::evaluate()
* the case that, after a branch, the first un-advanced PC
* may be pointing to a microop other than 0. Once
* advanced, however, the microop number *must* be 0 */
- pc.upc(0);
- pc.nupc(1);
+ fetch_info.pc.upc(0);
+ fetch_info.pc.nupc(1);
#endif
/* Advance PC for the next instruction */
- TheISA::advancePC(pc, decoded_inst);
+ TheISA::advancePC(fetch_info.pc, decoded_inst);
/* Predict any branches and issue a branch if
* necessary */
@@ -432,22 +446,23 @@ Fetch2::evaluate()
/* Step on the pointer into the line if there's no
* complete instruction waiting */
if (decoder->needMoreBytes()) {
- inputIndex += sizeof(TheISA::MachInst);
+ fetch_info.inputIndex += sizeof(TheISA::MachInst);
DPRINTF(Fetch, "Updated inputIndex value PC: %s"
" inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n",
- line_in->pc, inputIndex, line_in->lineBaseAddr,
+ line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr,
line_in->lineWidth);
}
}
if (dyn_inst) {
/* Step to next sequence number */
- fetchSeqNum++;
+ fetch_info.fetchSeqNum++;
/* Correctly size the output before writing */
- if (output_index == 0)
+ if (output_index == 0) {
insts_out.resize(outputWidth);
+ }
/* Pack the generated dynamic instruction into the output */
insts_out.insts[output_index] = dyn_inst;
output_index++;
@@ -463,7 +478,7 @@ Fetch2::evaluate()
/* Remember the streamSeqNum of this line so we can tell when
* we change stream */
- lastStreamSeqNum = line_in->id.streamSeqNum;
+ fetch_info.lastStreamSeqNum = line_in->id.streamSeqNum;
/* Asked to discard line or there was a branch or fault */
if (!prediction.isBubble() || /* The remains of a
@@ -471,33 +486,35 @@ Fetch2::evaluate()
line_in->isFault() /* A line which is just a fault */)
{
DPRINTF(Fetch, "Discarding all input on branch/fault\n");
- dumpAllInput();
- havePC = false;
+ dumpAllInput(tid);
+ fetch_info.havePC = false;
line_in = NULL;
} else if (discard_line) {
/* Just discard one line, one's behind it may have new
* stream sequence numbers. There's a DPRINTF above
* for this event */
- popInput();
- havePC = false;
+ popInput(tid);
+ fetch_info.havePC = false;
line_in = NULL;
- } else if (inputIndex == line_in->lineWidth) {
+ } else if (fetch_info.inputIndex == line_in->lineWidth) {
/* Got to end of a line, pop the line but keep PC
* in case this is a line-wrapping inst. */
- popInput();
+ popInput(tid);
line_in = NULL;
}
if (!line_in && processMoreThanOneInput) {
DPRINTF(Fetch, "Wrapping\n");
- line_in = getInput();
+ line_in = getInput(tid);
}
}
/* The rest of the output (if any) should already have been packed
* with bubble instructions by insts_out's initialisation */
}
-
+ if (tid == InvalidThreadID) {
+ assert(insts_out.isBubble());
+ }
/** Reserve a slot in the next stage and output data */
*predictionOut.inputWire = prediction;
@@ -506,24 +523,66 @@ Fetch2::evaluate()
if (!insts_out.isBubble()) {
/* Note activity of following buffer */
cpu.activityRecorder->activity();
- nextStageReserve.reserve();
+ insts_out.threadId = tid;
+ nextStageReserve[tid].reserve();
}
/* If we still have input to process and somewhere to put it,
* mark stage as active */
- if (getInput() && nextStageReserve.canReserve())
- cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId);
+ for (ThreadID i = 0; i < cpu.numThreads; i++)
+ {
+ if (getInput(i) && nextStageReserve[i].canReserve()) {
+ cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId);
+ break;
+ }
+ }
/* Make sure the input (if any left) is pushed */
- inputBuffer.pushTail();
+ if (!inp.outputWire->isBubble())
+ inputBuffer[inp.outputWire->id.threadId].pushTail();
+}
+
+inline ThreadID
+Fetch2::getScheduledThread()
+{
+ /* Select thread via policy. */
+ std::vector<ThreadID> priority_list;
+
+ switch (cpu.threadPolicy) {
+ case Enums::SingleThreaded:
+ priority_list.push_back(0);
+ break;
+ case Enums::RoundRobin:
+ priority_list = cpu.roundRobinPriority(threadPriority);
+ break;
+ case Enums::Random:
+ priority_list = cpu.randomPriority();
+ break;
+ default:
+ panic("Unknown fetch policy");
+ }
+
+ for (auto tid : priority_list) {
+ if (cpu.getContext(tid)->status() == ThreadContext::Active &&
+ getInput(tid) && !fetchInfo[tid].blocked) {
+ threadPriority = tid;
+ return tid;
+ }
+ }
+
+ return InvalidThreadID;
}
bool
Fetch2::isDrained()
{
- return inputBuffer.empty() &&
- (*inp.outputWire).isBubble() &&
- (*predictionOut.inputWire).isBubble();
+ for (const auto &buffer : inputBuffer) {
+ if (!buffer.empty())
+ return false;
+ }
+
+ return (*inp.outputWire).isBubble() &&
+ (*predictionOut.inputWire).isBubble();
}
void
@@ -531,14 +590,14 @@ Fetch2::minorTrace() const
{
std::ostringstream data;
- if (blocked)
+ if (fetchInfo[0].blocked)
data << 'B';
else
(*out.inputWire).reportData(data);
MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n",
- inputIndex, havePC, predictionSeqNum, data.str());
- inputBuffer.minorTrace();
+ fetchInfo[0].inputIndex, fetchInfo[0].havePC, fetchInfo[0].predictionSeqNum, data.str());
+ inputBuffer[0].minorTrace();
}
}
diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh
index 2fc38b377..33c683b82 100644
--- a/src/cpu/minor/fetch2.hh
+++ b/src/cpu/minor/fetch2.hh
@@ -78,7 +78,7 @@ class Fetch2 : public Named
Latch<ForwardInstData>::Input out;
/** Interface to reserve space in the next stage */
- Reservable &nextStageReserve;
+ std::vector<InputBuffer<ForwardInstData>> &nextStageReserve;
/** Width of output of this stage/input of next in instructions */
unsigned int outputWidth;
@@ -92,61 +92,90 @@ class Fetch2 : public Named
public:
/* Public so that Pipeline can pass it to Fetch1 */
- InputBuffer<ForwardLineData> inputBuffer;
+ std::vector<InputBuffer<ForwardLineData>> inputBuffer;
protected:
/** Data members after this line are cycle-to-cycle state */
- /** Index into an incompletely processed input line that instructions
- * are to be extracted from */
- unsigned int inputIndex;
-
- /** Remembered program counter value. Between contiguous lines, this
- * is just updated with advancePC. For lines following changes of
- * stream, a new PC must be loaded and havePC be set.
- * havePC is needed to accomodate instructions which span across
- * lines meaning that Fetch2 and the decoder need to remember a PC
- * value and a partially-offered instruction from the previous line */
- TheISA::PCState pc;
-
- /** PC is currently valid. Initially false, gets set to true when a
- * change-of-stream line is received and false again when lines are
- * discarded for any reason */
- bool havePC;
-
- /** Stream sequence number of the last seen line used to identify changes
- * of instruction stream */
- InstSeqNum lastStreamSeqNum;
-
- /** Fetch2 is the source of fetch sequence numbers. These represent the
- * sequence that instructions were extracted from fetched lines. */
- InstSeqNum fetchSeqNum;
-
- /** Stream sequence number remembered from last time the predictionSeqNum
- * changed. Lines should only be discarded when their predictionSeqNums
- * disagree with Fetch2::predictionSeqNum *and* they are from the same
- * stream that bore that prediction number */
- InstSeqNum expectedStreamSeqNum;
-
- /** Fetch2 is the source of prediction sequence numbers. These represent
- * predicted changes of control flow sources from branch prediction in
- * Fetch2. */
- InstSeqNum predictionSeqNum;
-
- /** Blocked indication for report */
- bool blocked;
+ struct Fetch2ThreadInfo {
+
+ /** Default constructor */
+ Fetch2ThreadInfo() :
+ inputIndex(0),
+ pc(TheISA::PCState(0)),
+ havePC(false),
+ lastStreamSeqNum(InstId::firstStreamSeqNum),
+ fetchSeqNum(InstId::firstFetchSeqNum),
+ expectedStreamSeqNum(InstId::firstStreamSeqNum),
+ predictionSeqNum(InstId::firstPredictionSeqNum),
+ blocked(false)
+ { }
+
+ Fetch2ThreadInfo(const Fetch2ThreadInfo& other) :
+ inputIndex(other.inputIndex),
+ pc(other.pc),
+ havePC(other.havePC),
+ lastStreamSeqNum(other.lastStreamSeqNum),
+ expectedStreamSeqNum(other.expectedStreamSeqNum),
+ predictionSeqNum(other.predictionSeqNum),
+ blocked(other.blocked)
+ { }
+
+ /** Index into an incompletely processed input line that instructions
+ * are to be extracted from */
+ unsigned int inputIndex;
+
+
+ /** Remembered program counter value. Between contiguous lines, this
+ * is just updated with advancePC. For lines following changes of
+ * stream, a new PC must be loaded and havePC be set.
+ * havePC is needed to accomodate instructions which span across
+ * lines meaning that Fetch2 and the decoder need to remember a PC
+ * value and a partially-offered instruction from the previous line */
+ TheISA::PCState pc;
+
+ /** PC is currently valid. Initially false, gets set to true when a
+ * change-of-stream line is received and false again when lines are
+ * discarded for any reason */
+ bool havePC;
+
+ /** Stream sequence number of the last seen line used to identify
+ * changes of instruction stream */
+ InstSeqNum lastStreamSeqNum;
+
+ /** Fetch2 is the source of fetch sequence numbers. These represent the
+ * sequence that instructions were extracted from fetched lines. */
+ InstSeqNum fetchSeqNum;
+
+ /** Stream sequence number remembered from last time the
+ * predictionSeqNum changed. Lines should only be discarded when their
+ * predictionSeqNums disagree with Fetch2::predictionSeqNum *and* they
+ * are from the same stream that bore that prediction number */
+ InstSeqNum expectedStreamSeqNum;
+
+ /** Fetch2 is the source of prediction sequence numbers. These
+ * represent predicted changes of control flow sources from branch
+ * prediction in Fetch2. */
+ InstSeqNum predictionSeqNum;
+
+ /** Blocked indication for report */
+ bool blocked;
+ };
+
+ std::vector<Fetch2ThreadInfo> fetchInfo;
+ ThreadID threadPriority;
protected:
/** Get a piece of data to work on from the inputBuffer, or 0 if there
* is no data. */
- const ForwardLineData *getInput();
+ const ForwardLineData *getInput(ThreadID tid);
/** Pop an element off the input buffer, if there are any */
- void popInput();
+ void popInput(ThreadID tid);
/** Dump the whole contents of the input buffer. Useful after a
* prediction changes control flow */
- void dumpAllInput();
+ void dumpAllInput(ThreadID tid);
/** Update local branch prediction structures from feedback from
* Execute. */
@@ -157,6 +186,10 @@ class Fetch2 : public Named
* carries the prediction to Fetch1 */
void predictBranch(MinorDynInstPtr inst, BranchData &branch);
+ /** Use the current threading policy to determine the next thread to
+ * fetch from. */
+ ThreadID getScheduledThread();
+
public:
Fetch2(const std::string &name,
MinorCPU &cpu_,
@@ -165,7 +198,7 @@ class Fetch2 : public Named
Latch<BranchData>::Output branchInp_,
Latch<BranchData>::Input predictionOut_,
Latch<ForwardInstData>::Input out_,
- Reservable &next_stage_input_buffer);
+ std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer);
public:
/** Pass on input/buffer data to the output if you can */
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index b5c0bc974..5995a52c2 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -216,13 +216,14 @@ operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state)
void
LSQ::clearMemBarrier(MinorDynInstPtr inst)
{
- bool is_last_barrier = inst->id.execSeqNum >= lastMemBarrier;
+ bool is_last_barrier =
+ inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId];
DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n",
(is_last_barrier ? "last" : "a"), *inst);
if (is_last_barrier)
- lastMemBarrier = 0;
+ lastMemBarrier[inst->id.threadId] = 0;
}
void
@@ -676,7 +677,8 @@ LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request,
while (ret == NoAddrRangeCoverage && i != slots.rend()) {
LSQRequestPtr slot = *i;
- if (slot->packet) {
+ if (slot->packet &&
+ slot->inst->id.threadId == request->inst->id.threadId) {
AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
if (coverage != NoAddrRangeCoverage) {
@@ -1042,8 +1044,9 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request)
request->issuedToMemory = true;
}
- if (tryToSend(request))
+ if (tryToSend(request)) {
moveFromRequestsToTransfers(request);
+ }
} else {
request->setState(LSQRequest::Complete);
moveFromRequestsToTransfers(request);
@@ -1145,6 +1148,9 @@ LSQ::tryToSend(LSQRequestPtr request)
}
}
+ if (ret)
+ threadSnoop(request);
+
return ret;
}
@@ -1293,7 +1299,7 @@ LSQ::LSQ(std::string name_, std::string dcache_port_name_,
cpu(cpu_),
execute(execute_),
dcachePort(dcache_port_name_, *this, cpu_),
- lastMemBarrier(0),
+ lastMemBarrier(cpu.numThreads, 0),
state(MemoryRunning),
inMemorySystemLimit(in_memory_system_limit),
lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)),
@@ -1526,7 +1532,7 @@ LSQ::minorTrace() const
MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
" lastMemBarrier=%d\n",
state, numAccessesInDTLB, numAccessesInMemorySystem,
- numStoresInTransfers, lastMemBarrier);
+ numStoresInTransfers, lastMemBarrier[0]);
requests.minorTrace();
transfers.minorTrace();
storeBuffer.minorTrace();
@@ -1565,12 +1571,12 @@ void
LSQ::issuedMemBarrierInst(MinorDynInstPtr inst)
{
assert(inst->isInst() && inst->staticInst->isMemBarrier());
- assert(inst->id.execSeqNum > lastMemBarrier);
+ assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]);
/* Remember the barrier. We only have a notion of one
* barrier so this may result in some mem refs being
* delayed if they are between barriers */
- lastMemBarrier = inst->id.execSeqNum;
+ lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum;
}
void
@@ -1616,10 +1622,40 @@ LSQ::recvTimingSnoopReq(PacketPtr pkt)
/* LLSC operations in Minor can't be speculative and are executed from
* the head of the requests queue. We shouldn't need to do more than
* this action on snoops. */
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+ if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu.wakeup(tid);
+ }
+ }
- /* THREAD */
if (pkt->isInvalidate() || pkt->isWrite()) {
- TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask);
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+ TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
+ cacheBlockMask);
+ }
+ }
+}
+
+void
+LSQ::threadSnoop(LSQRequestPtr request)
+{
+ /* LLSC operations in Minor can't be speculative and are executed from
+ * the head of the requests queue. We shouldn't need to do more than
+ * this action on snoops. */
+ ThreadID req_tid = request->inst->id.threadId;
+ PacketPtr pkt = request->packet;
+
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+ if (tid != req_tid) {
+ if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu.wakeup(tid);
+ }
+
+ if (pkt->isInvalidate() || pkt->isWrite()) {
+ TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
+ cacheBlockMask);
+ }
+ }
}
}
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh
index 09fb30d03..e0b72177c 100644
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -537,7 +537,7 @@ class LSQ : public Named
/** Most recent execSeqNum of a memory barrier instruction or
* 0 if there are no in-flight barriers. Useful as a
* dependency for early-issued memory operations */
- InstSeqNum lastMemBarrier;
+ std::vector<InstSeqNum> lastMemBarrier;
public:
/** Retry state of last issued memory transfer */
@@ -640,6 +640,9 @@ class LSQ : public Named
/** Can a request be sent to the memory system */
bool canSendToMemorySystem();
+ /** Snoop other threads monitors on memory system accesses */
+ void threadSnoop(LSQRequestPtr request);
+
public:
LSQ(std::string name_, std::string dcache_port_name_,
MinorCPU &cpu_, Execute &execute_,
@@ -691,7 +694,8 @@ class LSQ : public Named
void issuedMemBarrierInst(MinorDynInstPtr inst);
/** Get the execSeqNum of the last issued memory barrier */
- InstSeqNum getLastMemBarrier() const { return lastMemBarrier; }
+ InstSeqNum getLastMemBarrier(ThreadID thread_id) const
+ { return lastMemBarrier[thread_id]; }
/** Is there nothing left in the LSQ */
bool isDrained();
diff --git a/src/cpu/minor/pipe_data.cc b/src/cpu/minor/pipe_data.cc
index 447f9c0e7..208c9c9f4 100644
--- a/src/cpu/minor/pipe_data.cc
+++ b/src/cpu/minor/pipe_data.cc
@@ -71,9 +71,6 @@ operator <<(std::ostream &os, BranchData::Reason reason)
case BranchData::SuspendThread:
os << "SuspendThread";
break;
- case BranchData::WakeupFetch:
- os << "WakeupFetch";
- break;
case BranchData::HaltFetch:
os << "HaltFetch";
break;
@@ -102,7 +99,6 @@ BranchData::isStreamChange(const BranchData::Reason reason)
case BadlyPredictedBranch:
case SuspendThread:
case Interrupt:
- case WakeupFetch:
case HaltFetch:
ret = true;
break;
@@ -123,7 +119,6 @@ BranchData::isBranch(const BranchData::Reason reason)
case CorrectlyPredictedBranch:
case SuspendThread:
case Interrupt:
- case WakeupFetch:
case HaltFetch:
ret = false;
break;
@@ -228,8 +223,8 @@ ForwardLineData::reportData(std::ostream &os) const
os << id;
}
-ForwardInstData::ForwardInstData(unsigned int width) :
- numInsts(width)
+ForwardInstData::ForwardInstData(unsigned int width, ThreadID tid) :
+ numInsts(width), threadId(tid)
{
bubbleFill();
}
diff --git a/src/cpu/minor/pipe_data.hh b/src/cpu/minor/pipe_data.hh
index 4468cb89e..e514be2f9 100644
--- a/src/cpu/minor/pipe_data.hh
+++ b/src/cpu/minor/pipe_data.hh
@@ -91,8 +91,6 @@ class BranchData /* : public ReportIF, public BubbleIF */
* count it as stream changing itself and expect pc to be the PC
* of the next instruction */
SuspendThread,
- /* Wakeup fetching from Halted */
- WakeupFetch,
/* Branch from an interrupt (no instruction) */
Interrupt,
/* Stop fetching in anticipation of of draining */
@@ -112,6 +110,9 @@ class BranchData /* : public ReportIF, public BubbleIF */
/** Explanation for this branch */
Reason reason;
+ /** ThreadID associated with branch */
+ ThreadID threadId;
+
/** Sequence number of new stream/prediction to be adopted */
InstSeqNum newStreamSeqNum;
InstSeqNum newPredictionSeqNum;
@@ -124,18 +125,20 @@ class BranchData /* : public ReportIF, public BubbleIF */
public:
BranchData() :
- reason(NoBranch), newStreamSeqNum(0),
+ reason(NoBranch), threadId(InvalidThreadID), newStreamSeqNum(0),
newPredictionSeqNum(0), target(TheISA::PCState(0)),
inst(MinorDynInst::bubble())
{ }
BranchData(
Reason reason_,
+ ThreadID thread_id,
InstSeqNum new_stream_seq_num,
InstSeqNum new_prediction_seq_num,
TheISA::PCState target,
MinorDynInstPtr inst_) :
reason(reason_),
+ threadId(thread_id),
newStreamSeqNum(new_stream_seq_num),
newPredictionSeqNum(new_prediction_seq_num),
target(target),
@@ -258,8 +261,12 @@ class ForwardInstData /* : public ReportIF, public BubbleIF */
/** The number of insts slots that can be expected to be valid insts */
unsigned int numInsts;
+ /** Thread associated with these instructions */
+ ThreadID threadId;
+
public:
- explicit ForwardInstData(unsigned int width = 0);
+ explicit ForwardInstData(unsigned int width = 0,
+ ThreadID tid = InvalidThreadID);
ForwardInstData(const ForwardInstData &src);
diff --git a/src/cpu/minor/pipeline.cc b/src/cpu/minor/pipeline.cc
index 39b7f31f9..8c04e3949 100644
--- a/src/cpu/minor/pipeline.cc
+++ b/src/cpu/minor/pipeline.cc
@@ -187,9 +187,9 @@ Pipeline::getDataPort()
}
void
-Pipeline::wakeupFetch()
+Pipeline::wakeupFetch(ThreadID tid)
{
- execute.wakeupFetch();
+ fetch1.wakeupFetch(tid);
}
bool
@@ -212,6 +212,11 @@ void
Pipeline::drainResume()
{
DPRINTF(Drain, "Drain resume\n");
+
+ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+ fetch1.wakeupFetch(tid);
+ }
+
execute.drainResume();
}
diff --git a/src/cpu/minor/pipeline.hh b/src/cpu/minor/pipeline.hh
index 2e1aa9921..9b6ca0d32 100644
--- a/src/cpu/minor/pipeline.hh
+++ b/src/cpu/minor/pipeline.hh
@@ -112,7 +112,7 @@ class Pipeline : public Ticked
public:
/** Wake up the Fetch unit. This is needed on thread activation esp.
* after quiesce wakeup */
- void wakeupFetch();
+ void wakeupFetch(ThreadID tid);
/** Try to drain the CPU */
bool drain();
diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc
index 44fe2fcae..8f20c5ff9 100644
--- a/src/sim/pseudo_inst.cc
+++ b/src/sim/pseudo_inst.cc
@@ -261,7 +261,7 @@ quiesceSkip(ThreadContext *tc)
EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent();
- Tick resume = curTick() + 1;
+ Tick resume = cpu->nextCycle() + 1;
cpu->reschedule(quiesceEvent, resume, true);
diff --git a/util/minorview/minor.pic b/util/minorview/minor.pic
index 52731d66f..fd32a3ef3 100644
--- a/util/minorview/minor.pic
+++ b/util/minorview/minor.pic
@@ -115,9 +115,9 @@ macro predictionFrame: decoder=frame stripDir=vert dataElement=predictionSeqNum
# name ::= ? alphanumeric name with dots ?
# value ::= "(<char-except-">)*", <char-except-' '>* }
-Fi: fetch2.inputBuffer inputBuffer decoder=lines
-Di: decode.inputBuffer inputBuffer decoder=insts hideId=E
-Ei: execute.inputBuffer inputBuffer stripDir=horiz decoder=insts border=mid
+Fi: fetch2.inputBuffer0 inputBuffer decoder=lines
+Di: decode.inputBuffer0 inputBuffer decoder=insts hideId=E
+Ei: execute.inputBuffer0 inputBuffer stripDir=horiz decoder=insts border=mid
F1: fetch1 streamFrame blankStrips=11 name="Fetch1"
fe: fetch1 decoder=lines border=thin name="Line"
F2: fetch2 predictionFrame blankStrips=11 name="Fetch2"
@@ -146,9 +146,9 @@ f3: execute.fu.3 fu shorten=2 name=Div
f4: execute.fu.4 fu shorten=2 name=Float
f5: execute.fu.5 fu shorten=2 name=Mem
f6: execute.fu.6 fu shorten=2 name=Misc
-iq: execute.inFlightInsts fifo decoder=insts name="inFlightInsts"
-im: execute.inFUMemInsts fifo decoder=insts name="inFU..."
-sc: execute.scoreboard name="scoreboard" decoder=indexedCounts \
+iq: execute.inFlightInsts0 fifo decoder=insts name="inFlightInsts"
+im: execute.inFUMemInsts0 fifo decoder=insts name="inFU..."
+sc: execute.scoreboard0 name="scoreboard" decoder=indexedCounts \
dataElement=busy border=mid name="scoreboard" strips=38 stripelems=3
sa: activity dataElement=stages activity name="Stage activity"
ac: activity dataElement=activity decoder=counts border=mid name="Activity"