diff options
author | Mitch Hayenga <mitch.hayenga@arm.com> | 2016-07-21 17:19:16 +0100 |
---|---|---|
committer | Mitch Hayenga <mitch.hayenga@arm.com> | 2016-07-21 17:19:16 +0100 |
commit | ff4009ac005be0347015f8ba5a8e37a3aa930e69 (patch) | |
tree | b80cfa7c70c0e39f54c8c3d78527722cb6658510 | |
parent | 8a476d387c84f037d0ccf3cc20dc88870ab45fec (diff) | |
download | gem5-ff4009ac005be0347015f8ba5a8e37a3aa930e69.tar.xz |
cpu: Add SMT support to MinorCPU
This patch adds SMT support to the MinorCPU. Currently
RoundRobin or Random thread scheduling are supported.
Change-Id: I91faf39ff881af5918cca05051829fc6261f20e3
-rw-r--r-- | src/cpu/minor/MinorCPU.py | 4 | ||||
-rw-r--r-- | src/cpu/minor/cpu.cc | 59 | ||||
-rw-r--r-- | src/cpu/minor/cpu.hh | 23 | ||||
-rw-r--r-- | src/cpu/minor/decode.cc | 161 | ||||
-rw-r--r-- | src/cpu/minor/decode.hh | 63 | ||||
-rw-r--r-- | src/cpu/minor/dyn_inst.cc | 6 | ||||
-rw-r--r-- | src/cpu/minor/exec_context.hh | 13 | ||||
-rw-r--r-- | src/cpu/minor/execute.cc | 668 | ||||
-rw-r--r-- | src/cpu/minor/execute.hh | 128 | ||||
-rw-r--r-- | src/cpu/minor/fetch1.cc | 273 | ||||
-rw-r--r-- | src/cpu/minor/fetch1.hh | 73 | ||||
-rw-r--r-- | src/cpu/minor/fetch2.cc | 251 | ||||
-rw-r--r-- | src/cpu/minor/fetch2.hh | 121 | ||||
-rw-r--r-- | src/cpu/minor/lsq.cc | 56 | ||||
-rw-r--r-- | src/cpu/minor/lsq.hh | 8 | ||||
-rw-r--r-- | src/cpu/minor/pipe_data.cc | 9 | ||||
-rw-r--r-- | src/cpu/minor/pipe_data.hh | 15 | ||||
-rw-r--r-- | src/cpu/minor/pipeline.cc | 9 | ||||
-rw-r--r-- | src/cpu/minor/pipeline.hh | 2 | ||||
-rw-r--r-- | src/sim/pseudo_inst.cc | 2 | ||||
-rw-r--r-- | util/minorview/minor.pic | 12 |
21 files changed, 1255 insertions, 701 deletions
diff --git a/src/cpu/minor/MinorCPU.py b/src/cpu/minor/MinorCPU.py index 9ab7b0b39..2c80af175 100644 --- a/src/cpu/minor/MinorCPU.py +++ b/src/cpu/minor/MinorCPU.py @@ -169,6 +169,8 @@ class MinorDefaultFUPool(MinorFUPool): MinorDefaultFloatSimdFU(), MinorDefaultMemFU(), MinorDefaultMiscFU()] +class ThreadPolicy(Enum): vals = ['SingleThreaded', 'RoundRobin', 'Random'] + class MinorCPU(BaseCPU): type = 'MinorCPU' cxx_header = "cpu/minor/cpu.hh" @@ -185,6 +187,8 @@ class MinorCPU(BaseCPU): def support_take_over(cls): return True + threadPolicy = Param.ThreadPolicy('RoundRobin', + "Thread scheduling policy") fetch1FetchLimit = Param.Unsigned(1, "Number of line fetches allowable in flight at once") fetch1LineSnapWidth = Param.Unsigned(0, diff --git a/src/cpu/minor/cpu.cc b/src/cpu/minor/cpu.cc index 79807a2a7..016a60f47 100644 --- a/src/cpu/minor/cpu.cc +++ b/src/cpu/minor/cpu.cc @@ -47,32 +47,33 @@ #include "debug/Quiesce.hh" MinorCPU::MinorCPU(MinorCPUParams *params) : - BaseCPU(params) + BaseCPU(params), + threadPolicy(params->threadPolicy) { /* This is only written for one thread at the moment */ Minor::MinorThread *thread; - if (FullSystem) { - thread = new Minor::MinorThread(this, 0, params->system, params->itb, - params->dtb, params->isa[0]); - } else { - /* thread_id 0 */ - thread = new Minor::MinorThread(this, 0, params->system, - params->workload[0], params->itb, params->dtb, params->isa[0]); - } - - threads.push_back(thread); + for (ThreadID i = 0; i < numThreads; i++) { + if (FullSystem) { + thread = new Minor::MinorThread(this, i, params->system, + params->itb, params->dtb, params->isa[i]); + thread->setStatus(ThreadContext::Halted); + } else { + thread = new Minor::MinorThread(this, i, params->system, + params->workload[i], params->itb, params->dtb, + params->isa[i]); + } - thread->setStatus(ThreadContext::Halted); + threads.push_back(thread); + ThreadContext *tc = thread->getTC(); + threadContexts.push_back(tc); + } - ThreadContext *tc = thread->getTC(); if (params->checker) { fatal("The Minor model doesn't support checking (yet)\n"); } - threadContexts.push_back(tc); - Minor::MinorDynInst::init(); pipeline = new Minor::Pipeline(*this, *params); @@ -137,9 +138,6 @@ MinorCPU::serializeThread(CheckpointOut &cp, ThreadID thread_id) const void MinorCPU::unserializeThread(CheckpointIn &cp, ThreadID thread_id) { - if (thread_id != 0) - fatal("Trying to load more than one thread into a MinorCPU\n"); - threads[thread_id]->unserialize(cp); } @@ -170,11 +168,11 @@ void MinorCPU::wakeup(ThreadID tid) { DPRINTF(Drain, "[tid:%d] MinorCPU wakeup\n", tid); + assert(tid < numThreads); - if (threads[tid]->status() == ThreadContext::Suspended) + if (threads[tid]->status() == ThreadContext::Suspended) { threads[tid]->activate(); - - DPRINTF(Drain,"Suspended Processor awoke\n"); + } } void @@ -187,13 +185,10 @@ MinorCPU::startup() for (auto i = threads.begin(); i != threads.end(); i ++) (*i)->startup(); - /* Workaround cases in SE mode where a thread is activated with an - * incorrect PC that is updated after the call to activate. This - * causes problems for Minor since it instantiates a virtual - * branch instruction when activateContext() is called which ends - * up pointing to an illegal address. */ - if (threads[0]->status() == ThreadContext::Active) - activateContext(0); + for (ThreadID tid = 0; tid < numThreads; tid++) { + threads[tid]->startup(); + pipeline->wakeupFetch(tid); + } } DrainState @@ -246,6 +241,7 @@ MinorCPU::drainResume() for (ThreadID tid = 0; tid < numThreads; tid++) wakeup(tid); + pipeline->drainResume(); } @@ -278,7 +274,7 @@ MinorCPU::takeOverFrom(BaseCPU *old_cpu) void MinorCPU::activateContext(ThreadID thread_id) { - DPRINTF(MinorCPU, "ActivateContext thread: %d", thread_id); + DPRINTF(MinorCPU, "ActivateContext thread: %d\n", thread_id); /* Do some cycle accounting. lastStopped is reset to stop the * wakeup call on the pipeline from adding the quiesce period @@ -289,7 +285,7 @@ MinorCPU::activateContext(ThreadID thread_id) /* Wake up the thread, wakeup the pipeline tick */ threads[thread_id]->activate(); wakeupOnEvent(Minor::Pipeline::CPUStageId); - pipeline->wakeupFetch(); + pipeline->wakeupFetch(thread_id); BaseCPU::activateContext(thread_id); } @@ -317,9 +313,6 @@ MinorCPU::wakeupOnEvent(unsigned int stage_id) MinorCPU * MinorCPUParams::create() { - numThreads = 1; - if (!FullSystem && workload.size() != 1) - panic("only one workload allowed"); return new MinorCPU(this); } diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh index dad015e89..4e4762390 100644 --- a/src/cpu/minor/cpu.hh +++ b/src/cpu/minor/cpu.hh @@ -50,6 +50,7 @@ #include "cpu/minor/stats.hh" #include "cpu/base.hh" #include "cpu/simple_thread.hh" +#include "enums/ThreadPolicy.hh" #include "params/MinorCPU.hh" namespace Minor @@ -109,6 +110,8 @@ class MinorCPU : public BaseCPU }; + /** Thread Scheduling Policy (RoundRobin, Random, etc) */ + Enums::ThreadPolicy threadPolicy; protected: /** Return a reference to the data port. */ MasterPort &getDataPort() override; @@ -162,6 +165,26 @@ class MinorCPU : public BaseCPU void activateContext(ThreadID thread_id) override; void suspendContext(ThreadID thread_id) override; + /** Thread scheduling utility functions */ + std::vector<ThreadID> roundRobinPriority(ThreadID priority) + { + std::vector<ThreadID> prio_list; + for (ThreadID i = 1; i <= numThreads; i++) { + prio_list.push_back((priority + i) % numThreads); + } + return prio_list; + } + + std::vector<ThreadID> randomPriority() + { + std::vector<ThreadID> prio_list; + for (ThreadID i = 0; i < numThreads; i++) { + prio_list.push_back(i); + } + std::random_shuffle(prio_list.begin(), prio_list.end()); + return prio_list; + } + /** Interface for stages to signal that they have become active after * a callback or eventq event where the pipeline itself may have * already been idled. The stage argument should be from the diff --git a/src/cpu/minor/decode.cc b/src/cpu/minor/decode.cc index 94eee2be3..6243bca01 100644 --- a/src/cpu/minor/decode.cc +++ b/src/cpu/minor/decode.cc @@ -49,7 +49,7 @@ Decode::Decode(const std::string &name, MinorCPUParams ¶ms, Latch<ForwardInstData>::Output inp_, Latch<ForwardInstData>::Input out_, - Reservable &next_stage_input_buffer) : + std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) : Named(name), cpu(cpu_), inp(inp_), @@ -57,11 +57,8 @@ Decode::Decode(const std::string &name, nextStageReserve(next_stage_input_buffer), outputWidth(params.executeInputWidth), processMoreThanOneInput(params.decodeCycleInput), - inputBuffer(name + ".inputBuffer", "insts", params.decodeInputBufferSize), - inputIndex(0), - inMacroop(false), - execSeqNum(InstId::firstExecSeqNum), - blocked(false) + decodeInfo(params.numThreads), + threadPriority(0) { if (outputWidth < 1) fatal("%s: executeInputWidth must be >= 1 (%d)\n", name, outputWidth); @@ -70,29 +67,37 @@ Decode::Decode(const std::string &name, fatal("%s: decodeInputBufferSize must be >= 1 (%d)\n", name, params.decodeInputBufferSize); } + + /* Per-thread input buffers */ + for (ThreadID tid = 0; tid < params.numThreads; tid++) { + inputBuffer.push_back( + InputBuffer<ForwardInstData>( + name + ".inputBuffer" + std::to_string(tid), "insts", + params.decodeInputBufferSize)); + } } const ForwardInstData * -Decode::getInput() +Decode::getInput(ThreadID tid) { /* Get insts from the inputBuffer to work with */ - if (!inputBuffer.empty()) { - const ForwardInstData &head = inputBuffer.front(); + if (!inputBuffer[tid].empty()) { + const ForwardInstData &head = inputBuffer[tid].front(); - return (head.isBubble() ? NULL : &(inputBuffer.front())); + return (head.isBubble() ? NULL : &(inputBuffer[tid].front())); } else { return NULL; } } void -Decode::popInput() +Decode::popInput(ThreadID tid) { - if (!inputBuffer.empty()) - inputBuffer.pop(); + if (!inputBuffer[tid].empty()) + inputBuffer[tid].pop(); - inputIndex = 0; - inMacroop = false; + decodeInfo[tid].inputIndex = 0; + decodeInfo[tid].inMacroop = false; } #if TRACING_ON @@ -117,32 +122,37 @@ dynInstAddTracing(MinorDynInstPtr inst, StaticInstPtr static_inst, void Decode::evaluate() { - inputBuffer.setTail(*inp.outputWire); + /* Push input onto appropriate input buffer */ + if (!inp.outputWire->isBubble()) + inputBuffer[inp.outputWire->threadId].setTail(*inp.outputWire); + ForwardInstData &insts_out = *out.inputWire; assert(insts_out.isBubble()); - blocked = false; + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) + decodeInfo[tid].blocked = !nextStageReserve[tid].canReserve(); - if (!nextStageReserve.canReserve()) { - blocked = true; - } else { - const ForwardInstData *insts_in = getInput(); + ThreadID tid = getScheduledThread(); + + if (tid != InvalidThreadID) { + DecodeThreadInfo &decode_info = decodeInfo[tid]; + const ForwardInstData *insts_in = getInput(tid); unsigned int output_index = 0; /* Pack instructions into the output while we can. This may involve * using more than one input line */ while (insts_in && - inputIndex < insts_in->width() && /* Still more input */ + decode_info.inputIndex < insts_in->width() && /* Still more input */ output_index < outputWidth /* Still more output to fill */) { - MinorDynInstPtr inst = insts_in->insts[inputIndex]; + MinorDynInstPtr inst = insts_in->insts[decode_info.inputIndex]; if (inst->isBubble()) { /* Skip */ - inputIndex++; - inMacroop = false; + decode_info.inputIndex++; + decode_info.inMacroop = false; } else { StaticInstPtr static_inst = inst->staticInst; /* Static inst of a macro-op above the output_inst */ @@ -153,25 +163,26 @@ Decode::evaluate() DPRINTF(Decode, "Fault being passed: %d\n", inst->fault->name()); - inputIndex++; - inMacroop = false; + decode_info.inputIndex++; + decode_info.inMacroop = false; } else if (static_inst->isMacroop()) { /* Generate a new micro-op */ StaticInstPtr static_micro_inst; /* Set up PC for the next micro-op emitted */ - if (!inMacroop) { - microopPC = inst->pc; - inMacroop = true; + if (!decode_info.inMacroop) { + decode_info.microopPC = inst->pc; + decode_info.inMacroop = true; } /* Get the micro-op static instruction from the * static_inst. */ static_micro_inst = - static_inst->fetchMicroop(microopPC.microPC()); + static_inst->fetchMicroop( + decode_info.microopPC.microPC()); output_inst = new MinorDynInst(inst->id); - output_inst->pc = microopPC; + output_inst->pc = decode_info.microopPC; output_inst->staticInst = static_micro_inst; output_inst->fault = NoFault; @@ -185,45 +196,46 @@ Decode::evaluate() DPRINTF(Decode, "Microop decomposition inputIndex:" " %d output_index: %d lastMicroop: %s microopPC:" " %d.%d inst: %d\n", - inputIndex, output_index, + decode_info.inputIndex, output_index, (static_micro_inst->isLastMicroop() ? "true" : "false"), - microopPC.instAddr(), microopPC.microPC(), + decode_info.microopPC.instAddr(), + decode_info.microopPC.microPC(), *output_inst); /* Acknowledge that the static_inst isn't mine, it's my * parent macro-op's */ parent_static_inst = static_inst; - static_micro_inst->advancePC(microopPC); + static_micro_inst->advancePC(decode_info.microopPC); /* Step input if this is the last micro-op */ if (static_micro_inst->isLastMicroop()) { - inputIndex++; - inMacroop = false; + decode_info.inputIndex++; + decode_info.inMacroop = false; } } else { /* Doesn't need decomposing, pass on instruction */ DPRINTF(Decode, "Passing on inst: %s inputIndex:" " %d output_index: %d\n", - *output_inst, inputIndex, output_index); + *output_inst, decode_info.inputIndex, output_index); parent_static_inst = static_inst; /* Step input */ - inputIndex++; - inMacroop = false; + decode_info.inputIndex++; + decode_info.inMacroop = false; } /* Set execSeqNum of output_inst */ - output_inst->id.execSeqNum = execSeqNum; + output_inst->id.execSeqNum = decode_info.execSeqNum; /* Add tracing */ #if TRACING_ON dynInstAddTracing(output_inst, parent_static_inst, cpu); #endif /* Step to next sequence number */ - execSeqNum++; + decode_info.execSeqNum++; /* Correctly size the output before writing */ if (output_index == 0) insts_out.resize(outputWidth); @@ -233,17 +245,17 @@ Decode::evaluate() } /* Have we finished with the input? */ - if (inputIndex == insts_in->width()) { + if (decode_info.inputIndex == insts_in->width()) { /* If we have just been producing micro-ops, we *must* have * got to the end of that for inputIndex to be pushed past * insts_in->width() */ - assert(!inMacroop); - popInput(); + assert(!decode_info.inMacroop); + popInput(tid); insts_in = NULL; if (processMoreThanOneInput) { DPRINTF(Decode, "Wrapping\n"); - insts_in = getInput(); + insts_in = getInput(tid); } } } @@ -261,22 +273,65 @@ Decode::evaluate() if (!insts_out.isBubble()) { /* Note activity of following buffer */ cpu.activityRecorder->activity(); - nextStageReserve.reserve(); + insts_out.threadId = tid; + nextStageReserve[tid].reserve(); } /* If we still have input to process and somewhere to put it, * mark stage as active */ - if (getInput() && nextStageReserve.canReserve()) - cpu.activityRecorder->activateStage(Pipeline::DecodeStageId); + for (ThreadID i = 0; i < cpu.numThreads; i++) + { + if (getInput(i) && nextStageReserve[i].canReserve()) { + cpu.activityRecorder->activateStage(Pipeline::DecodeStageId); + break; + } + } /* Make sure the input (if any left) is pushed */ - inputBuffer.pushTail(); + if (!inp.outputWire->isBubble()) + inputBuffer[inp.outputWire->threadId].pushTail(); +} + +inline ThreadID +Decode::getScheduledThread() +{ + /* Select thread via policy. */ + std::vector<ThreadID> priority_list; + + switch (cpu.threadPolicy) { + case Enums::SingleThreaded: + priority_list.push_back(0); + break; + case Enums::RoundRobin: + priority_list = cpu.roundRobinPriority(threadPriority); + break; + case Enums::Random: + priority_list = cpu.randomPriority(); + break; + default: + panic("Unknown fetch policy"); + } + + for (auto tid : priority_list) { + if (cpu.getContext(tid)->status() == ThreadContext::Active && + getInput(tid) && !decodeInfo[tid].blocked) { + threadPriority = tid; + return tid; + } + } + + return InvalidThreadID; } bool Decode::isDrained() { - return inputBuffer.empty() && (*inp.outputWire).isBubble(); + for (const auto &buffer : inputBuffer) { + if (!buffer.empty()) + return false; + } + + return (*inp.outputWire).isBubble(); } void @@ -284,13 +339,13 @@ Decode::minorTrace() const { std::ostringstream data; - if (blocked) + if (decodeInfo[0].blocked) data << 'B'; else (*out.inputWire).reportData(data); MINORTRACE("insts=%s\n", data.str()); - inputBuffer.minorTrace(); + inputBuffer[0].minorTrace(); } } diff --git a/src/cpu/minor/decode.hh b/src/cpu/minor/decode.hh index fcc18fd44..a4d29a59d 100644 --- a/src/cpu/minor/decode.hh +++ b/src/cpu/minor/decode.hh @@ -71,7 +71,7 @@ class Decode : public Named Latch<ForwardInstData>::Input out; /** Interface to reserve space in the next stage */ - Reservable &nextStageReserve; + std::vector<InputBuffer<ForwardInstData>> &nextStageReserve; /** Width of output of this stage/input of next in instructions */ unsigned int outputWidth; @@ -82,43 +82,68 @@ class Decode : public Named public: /* Public for Pipeline to be able to pass it to Fetch2 */ - InputBuffer<ForwardInstData> inputBuffer; + std::vector<InputBuffer<ForwardInstData>> inputBuffer; protected: /** Data members after this line are cycle-to-cycle state */ - /** Index into the inputBuffer's head marking the start of unhandled - * instructions */ - unsigned int inputIndex; + struct DecodeThreadInfo { - /** True when we're in the process of decomposing a micro-op and - * microopPC will be valid. This is only the case when there isn't - * sufficient space in Executes input buffer to take the whole of a - * decomposed instruction and some of that instructions micro-ops must - * be generated in a later cycle */ - bool inMacroop; - TheISA::PCState microopPC; + /** Default Constructor */ + DecodeThreadInfo() : + inputIndex(0), + inMacroop(false), + execSeqNum(InstId::firstExecSeqNum), + blocked(false) + { } - /** Source of execSeqNums to number instructions. */ - InstSeqNum execSeqNum; + DecodeThreadInfo(const DecodeThreadInfo& other) : + inputIndex(other.inputIndex), + inMacroop(other.inMacroop), + execSeqNum(other.execSeqNum), + blocked(other.blocked) + { } - /** Blocked indication for report */ - bool blocked; + + /** Index into the inputBuffer's head marking the start of unhandled + * instructions */ + unsigned int inputIndex; + + /** True when we're in the process of decomposing a micro-op and + * microopPC will be valid. This is only the case when there isn't + * sufficient space in Executes input buffer to take the whole of a + * decomposed instruction and some of that instructions micro-ops must + * be generated in a later cycle */ + bool inMacroop; + TheISA::PCState microopPC; + + /** Source of execSeqNums to number instructions. */ + InstSeqNum execSeqNum; + + /** Blocked indication for report */ + bool blocked; + }; + + std::vector<DecodeThreadInfo> decodeInfo; + ThreadID threadPriority; protected: /** Get a piece of data to work on, or 0 if there is no data. */ - const ForwardInstData *getInput(); + const ForwardInstData *getInput(ThreadID tid); /** Pop an element off the input buffer, if there are any */ - void popInput(); + void popInput(ThreadID tid); + /** Use the current threading policy to determine the next thread to + * decode from. */ + ThreadID getScheduledThread(); public: Decode(const std::string &name, MinorCPU &cpu_, MinorCPUParams ¶ms, Latch<ForwardInstData>::Output inp_, Latch<ForwardInstData>::Input out_, - Reservable &next_stage_input_buffer); + std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer); public: /** Pass on input/buffer data to the output if you can */ diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc index ab08e6b4a..5d54f6913 100644 --- a/src/cpu/minor/dyn_inst.cc +++ b/src/cpu/minor/dyn_inst.cc @@ -52,6 +52,12 @@ namespace Minor { +const InstSeqNum InstId::firstStreamSeqNum; +const InstSeqNum InstId::firstPredictionSeqNum; +const InstSeqNum InstId::firstLineSeqNum; +const InstSeqNum InstId::firstFetchSeqNum; +const InstSeqNum InstId::firstExecSeqNum; + std::ostream & operator <<(std::ostream &os, const InstId &id) { diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh index 092ad5a2d..a5d646b6c 100644 --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -342,12 +342,17 @@ class ExecContext : public ::ExecContext public: // monitor/mwait funtions - void armMonitor(Addr address) { getCpuPtr()->armMonitor(0, address); } - bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(0, pkt); } + void armMonitor(Addr address) + { getCpuPtr()->armMonitor(inst->id.threadId, address); } + + bool mwait(PacketPtr pkt) + { return getCpuPtr()->mwait(inst->id.threadId, pkt); } + void mwaitAtomic(ThreadContext *tc) - { return getCpuPtr()->mwaitAtomic(0, tc, thread.dtb); } + { return getCpuPtr()->mwaitAtomic(inst->id.threadId, tc, thread.dtb); } + AddressMonitor *getAddrMonitor() - { return getCpuPtr()->getCpuAddrMonitor(0); } + { return getCpuPtr()->getCpuAddrMonitor(inst->id.threadId); } }; } diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 4298e1dcc..b13e0c020 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -86,15 +86,10 @@ Execute::Execute(const std::string &name_, params.executeLSQTransfersQueueSize, params.executeLSQStoreBufferSize, params.executeLSQMaxStoreBufferStoresPerCycle), - scoreboard(name_ + ".scoreboard"), - inputBuffer(name_ + ".inputBuffer", "insts", - params.executeInputBufferSize), - inputIndex(0), - lastCommitWasEndOfMacroop(true), - instsBeingCommitted(params.executeCommitLimit), - streamSeqNum(InstId::firstStreamSeqNum), - lastPredictionSeqNum(InstId::firstPredictionSeqNum), - drainState(NotDraining) + executeInfo(params.numThreads, ExecuteThreadInfo(params.executeCommitLimit)), + interruptPriority(0), + issuePriority(0), + commitPriority(0) { if (commitLimit < 1) { fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_, @@ -170,35 +165,50 @@ Execute::Execute(const std::string &name_, } } - inFlightInsts = new Queue<QueuedInst, - ReportTraitsAdaptor<QueuedInst> >( - name_ + ".inFlightInsts", "insts", total_slots); + /* Per-thread structures */ + for (ThreadID tid = 0; tid < params.numThreads; tid++) { + std::string tid_str = std::to_string(tid); - inFUMemInsts = new Queue<QueuedInst, - ReportTraitsAdaptor<QueuedInst> >( - name_ + ".inFUMemInsts", "insts", total_slots); + /* Input Buffers */ + inputBuffer.push_back( + InputBuffer<ForwardInstData>( + name_ + ".inputBuffer" + tid_str, "insts", + params.executeInputBufferSize)); + + /* Scoreboards */ + scoreboard.push_back(Scoreboard(name_ + ".scoreboard" + tid_str)); + + /* In-flight instruction records */ + executeInfo[tid].inFlightInsts = new Queue<QueuedInst, + ReportTraitsAdaptor<QueuedInst> >( + name_ + ".inFlightInsts" + tid_str, "insts", total_slots); + + executeInfo[tid].inFUMemInsts = new Queue<QueuedInst, + ReportTraitsAdaptor<QueuedInst> >( + name_ + ".inFUMemInsts" + tid_str, "insts", total_slots); + } } const ForwardInstData * -Execute::getInput() +Execute::getInput(ThreadID tid) { /* Get a line from the inputBuffer to work with */ - if (!inputBuffer.empty()) { - const ForwardInstData &head = inputBuffer.front(); + if (!inputBuffer[tid].empty()) { + const ForwardInstData &head = inputBuffer[tid].front(); - return (head.isBubble() ? NULL : &(inputBuffer.front())); + return (head.isBubble() ? NULL : &(inputBuffer[tid].front())); } else { return NULL; } } void -Execute::popInput() +Execute::popInput(ThreadID tid) { - if (!inputBuffer.empty()) - inputBuffer.pop(); + if (!inputBuffer[tid].empty()) + inputBuffer[tid].pop(); - inputIndex = 0; + executeInfo[tid].inputIndex = 0; } void @@ -276,11 +286,12 @@ Execute::tryToBranch(MinorDynInstPtr inst, Fault fault, BranchData &branch) reason = BranchData::NoBranch; } - updateBranchData(reason, inst, target, branch); + updateBranchData(inst->id.threadId, reason, inst, target, branch); } void Execute::updateBranchData( + ThreadID tid, BranchData::Reason reason, MinorDynInstPtr inst, const TheISA::PCState &target, BranchData &branch) @@ -288,14 +299,15 @@ Execute::updateBranchData( if (reason != BranchData::NoBranch) { /* Bump up the stream sequence number on a real branch*/ if (BranchData::isStreamChange(reason)) - streamSeqNum++; + executeInfo[tid].streamSeqNum++; /* Branches (even mis-predictions) don't change the predictionSeqNum, * just the streamSeqNum */ - branch = BranchData(reason, streamSeqNum, + branch = BranchData(reason, tid, + executeInfo[tid].streamSeqNum, /* Maintaining predictionSeqNum if there's no inst is just a * courtesy and looks better on minorview */ - (inst->isBubble() ? lastPredictionSeqNum + (inst->isBubble() ? executeInfo[tid].lastPredictionSeqNum : inst->id.predictionSeqNum), target, inst); @@ -419,8 +431,9 @@ Execute::takeInterrupt(ThreadID thread_id, BranchData &branch) /* Assume that an interrupt *must* cause a branch. Assert this? */ - updateBranchData(BranchData::Interrupt, MinorDynInst::bubble(), - cpu.getContext(thread_id)->pcState(), branch); + updateBranchData(thread_id, BranchData::Interrupt, + MinorDynInst::bubble(), cpu.getContext(thread_id)->pcState(), + branch); } return interrupt != NoFault; @@ -506,9 +519,10 @@ cyclicIndexDec(unsigned int index, unsigned int cycle_size) } unsigned int -Execute::issue(bool only_issue_microops) +Execute::issue(ThreadID thread_id) { - const ForwardInstData *insts_in = getInput(); + const ForwardInstData *insts_in = getInput(thread_id); + ExecuteThreadInfo &thread = executeInfo[thread_id]; /* Early termination if we have no instructions */ if (!insts_in) @@ -534,8 +548,7 @@ Execute::issue(bool only_issue_microops) unsigned num_insts_discarded = 0; do { - MinorDynInstPtr inst = insts_in->insts[inputIndex]; - ThreadID thread_id = inst->id.threadId; + MinorDynInstPtr inst = insts_in->insts[thread.inputIndex]; Fault fault = inst->fault; bool discarded = false; bool issued_mem_ref = false; @@ -550,21 +563,12 @@ Execute::issue(bool only_issue_microops) " thread\n", *inst); issued = false; - } else if (inst->id.streamSeqNum != streamSeqNum) { + } else if (inst->id.streamSeqNum != thread.streamSeqNum) { DPRINTF(MinorExecute, "Discarding inst: %s as its stream" " state was unexpected, expected: %d\n", - *inst, streamSeqNum); + *inst, thread.streamSeqNum); issued = true; discarded = true; - } else if (fault == NoFault && only_issue_microops && - /* Is this anything other than a non-first microop */ - (!inst->staticInst->isMicroop() || - !inst->staticInst->isFirstMicroop())) - { - DPRINTF(MinorExecute, "Not issuing new non-microop inst: %s\n", - *inst); - - issued = false; } else { /* Try and issue an instruction into an FU, assume we didn't and * fix that in the loop */ @@ -598,9 +602,10 @@ Execute::issue(bool only_issue_microops) /* Mark the destinations for this instruction as * busy */ - scoreboard.markupInstDests(inst, cpu.curCycle() + + scoreboard[thread_id].markupInstDests(inst, cpu.curCycle() + Cycles(0), cpu.getContext(thread_id), false); + DPRINTF(MinorExecute, "Issuing %s to %d\n", inst->id, noCostFUIndex); inst->fuIndex = noCostFUIndex; inst->extraCommitDelay = Cycles(0); inst->extraCommitDelayExpr = NULL; @@ -608,7 +613,7 @@ Execute::issue(bool only_issue_microops) /* Push the instruction onto the inFlight queue so * it can be committed in order */ QueuedInst fu_inst(inst); - inFlightInsts->push(fu_inst); + thread.inFlightInsts->push(fu_inst); issued = true; @@ -644,8 +649,8 @@ Execute::issue(bool only_issue_microops) DPRINTF(MinorExecute, "Can't issue inst: %s as extra" " decoding is suppressing it\n", *inst); - } else if (!scoreboard.canInstIssue(inst, src_latencies, - cant_forward_from_fu_indices, + } else if (!scoreboard[thread_id].canInstIssue(inst, + src_latencies, cant_forward_from_fu_indices, cpu.curCycle(), cpu.getContext(thread_id))) { DPRINTF(MinorExecute, "Can't issue inst: %s yet\n", @@ -687,20 +692,20 @@ Execute::issue(bool only_issue_microops) * early */ if (allowEarlyMemIssue) { inst->instToWaitFor = - scoreboard.execSeqNumToWaitFor(inst, + scoreboard[thread_id].execSeqNumToWaitFor(inst, cpu.getContext(thread_id)); - if (lsq.getLastMemBarrier() > + if (lsq.getLastMemBarrier(thread_id) > inst->instToWaitFor) { DPRINTF(MinorExecute, "A barrier will" " cause a delay in mem ref issue of" " inst: %s until after inst" " %d(exec)\n", *inst, - lsq.getLastMemBarrier()); + lsq.getLastMemBarrier(thread_id)); inst->instToWaitFor = - lsq.getLastMemBarrier(); + lsq.getLastMemBarrier(thread_id); } else { DPRINTF(MinorExecute, "Memory ref inst:" " %s must wait for inst %d(exec)" @@ -714,7 +719,7 @@ Execute::issue(bool only_issue_microops) * queue to ensure in-order issue to the LSQ */ DPRINTF(MinorExecute, "Pushing mem inst: %s\n", *inst); - inFUMemInsts->push(fu_inst); + thread.inFUMemInsts->push(fu_inst); } /* Issue to FU */ @@ -725,7 +730,7 @@ Execute::issue(bool only_issue_microops) /* Mark the destinations for this instruction as * busy */ - scoreboard.markupInstDests(inst, cpu.curCycle() + + scoreboard[thread_id].markupInstDests(inst, cpu.curCycle() + fu->description.opLat + extra_dest_retire_lat + extra_assumed_lat, @@ -734,7 +739,7 @@ Execute::issue(bool only_issue_microops) /* Push the instruction onto the inFlight queue so * it can be committed in order */ - inFlightInsts->push(fu_inst); + thread.inFlightInsts->push(fu_inst); issued = true; } @@ -777,24 +782,24 @@ Execute::issue(bool only_issue_microops) DPRINTF(MinorExecute, "Reached inst issue limit\n"); } - inputIndex++; + thread.inputIndex++; DPRINTF(MinorExecute, "Stepping to next inst inputIndex: %d\n", - inputIndex); + thread.inputIndex); } /* Got to the end of a line */ - if (inputIndex == insts_in->width()) { - popInput(); + if (thread.inputIndex == insts_in->width()) { + popInput(thread_id); /* Set insts_in to null to force us to leave the surrounding * loop */ insts_in = NULL; if (processMoreThanOneInput) { DPRINTF(MinorExecute, "Wrapping\n"); - insts_in = getInput(); + insts_in = getInput(thread_id); } } - } while (insts_in && inputIndex < insts_in->width() && + } while (insts_in && thread.inputIndex < insts_in->width() && /* We still have instructions */ fu_index != numFuncUnits && /* Not visited all FUs */ issued && /* We've not yet failed to issue an instruction */ @@ -805,9 +810,9 @@ Execute::issue(bool only_issue_microops) } bool -Execute::tryPCEvents() +Execute::tryPCEvents(ThreadID thread_id) { - ThreadContext *thread = cpu.getContext(0); + ThreadContext *thread = cpu.getContext(thread_id); unsigned int num_pc_event_checks = 0; /* Handle PC events on instructions */ @@ -934,6 +939,11 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue, " there isn't space in the store buffer\n", *inst); completed_inst = false; + } else if (inst->isInst() && inst->staticInst->isQuiesce() + && !branch.isBubble()){ + /* This instruction can suspend, need to be able to communicate + * backwards, so no other branches may evaluate this cycle*/ + completed_inst = false; } else { ExecContext context(cpu, *cpu.threads[thread_id], *this, inst); @@ -962,7 +972,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue, /* Keep a copy of this instruction's predictionSeqNum just in case * we need to issue a branch without an instruction (such as an * interrupt) */ - lastPredictionSeqNum = inst->id.predictionSeqNum; + executeInfo[thread_id].lastPredictionSeqNum = inst->id.predictionSeqNum; /* Check to see if this instruction suspended the current thread. */ if (!inst->isFault() && @@ -971,17 +981,17 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue, !isInterrupted(thread_id)) /* Don't suspend if we have interrupts */ { - TheISA::PCState resume_pc = cpu.getContext(0)->pcState(); + TheISA::PCState resume_pc = cpu.getContext(thread_id)->pcState(); assert(resume_pc.microPC() == 0); DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute" - " inst: %s\n", inst->id.threadId, *inst); + " inst: %s\n", thread_id, *inst); cpu.stats.numFetchSuspends++; - updateBranchData(BranchData::SuspendThread, inst, resume_pc, - branch); + updateBranchData(thread_id, BranchData::SuspendThread, inst, + resume_pc, branch); } } @@ -989,10 +999,12 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue, } void -Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) +Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard, + BranchData &branch) { Fault fault = NoFault; Cycles now = cpu.curCycle(); + ExecuteThreadInfo &ex_info = executeInfo[thread_id]; /** * Try and execute as many instructions from the end of FU pipelines as @@ -1030,13 +1042,13 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) * memCommitLimit */ unsigned int num_mem_refs_committed = 0; - if (only_commit_microops && !inFlightInsts->empty()) { + if (only_commit_microops && !ex_info.inFlightInsts->empty()) { DPRINTF(MinorInterrupt, "Only commit microops %s %d\n", - *(inFlightInsts->front().inst), - lastCommitWasEndOfMacroop); + *(ex_info.inFlightInsts->front().inst), + ex_info.lastCommitWasEndOfMacroop); } - while (!inFlightInsts->empty() && /* Some more instructions to process */ + while (!ex_info.inFlightInsts->empty() && /* Some more instructions to process */ !branch.isStreamChange() && /* No real branch */ fault == NoFault && /* No faults */ completed_inst && /* Still finding instructions to execute */ @@ -1046,10 +1058,10 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) if (only_commit_microops) { DPRINTF(MinorInterrupt, "Committing tail of insts before" " interrupt: %s\n", - *(inFlightInsts->front().inst)); + *(ex_info.inFlightInsts->front().inst)); } - QueuedInst *head_inflight_inst = &(inFlightInsts->front()); + QueuedInst *head_inflight_inst = &(ex_info.inFlightInsts->front()); InstSeqNum head_exec_seq_num = head_inflight_inst->inst->id.execSeqNum; @@ -1071,8 +1083,8 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) /* If we're just completing a macroop before an interrupt or drain, * can we stil commit another microop (rather than a memory response) * without crosing into the next full instruction? */ - bool can_commit_insts = !inFlightInsts->empty() && - !(only_commit_microops && lastCommitWasEndOfMacroop); + bool can_commit_insts = !ex_info.inFlightInsts->empty() && + !(only_commit_microops && ex_info.lastCommitWasEndOfMacroop); /* Can we find a mem response for this inst */ LSQ::LSQRequestPtr mem_response = @@ -1082,18 +1094,18 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) can_commit_insts); /* Test for PC events after every instruction */ - if (isInbetweenInsts() && tryPCEvents()) { - ThreadContext *thread = cpu.getContext(0); + if (isInbetweenInsts(thread_id) && tryPCEvents(thread_id)) { + ThreadContext *thread = cpu.getContext(thread_id); /* Branch as there was a change in PC */ - updateBranchData(BranchData::UnpredictedBranch, + updateBranchData(thread_id, BranchData::UnpredictedBranch, MinorDynInst::bubble(), thread->pcState(), branch); } else if (mem_response && num_mem_refs_committed < memoryCommitLimit) { /* Try to commit from the memory responses next */ - discard_inst = inst->id.streamSeqNum != streamSeqNum || - discard; + discard_inst = inst->id.streamSeqNum != + ex_info.streamSeqNum || discard; DPRINTF(MinorExecute, "Trying to commit mem response: %s\n", *inst); @@ -1102,7 +1114,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) if (discard_inst) { DPRINTF(MinorExecute, "Discarding mem inst: %s as its" " stream state was unexpected, expected: %d\n", - *inst, streamSeqNum); + *inst, ex_info.streamSeqNum); lsq.popResponse(mem_response); } else { @@ -1128,11 +1140,11 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) * For any other case, leave it to the normal instruction * issue below to handle them. */ - if (!inFUMemInsts->empty() && lsq.canRequest()) { + if (!ex_info.inFUMemInsts->empty() && lsq.canRequest()) { DPRINTF(MinorExecute, "Trying to commit from mem FUs\n"); const MinorDynInstPtr head_mem_ref_inst = - inFUMemInsts->front().inst; + ex_info.inFUMemInsts->front().inst; FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex]; const MinorDynInstPtr &fu_inst = fu->front().inst; @@ -1141,7 +1153,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) if (!fu_inst->isBubble() && !fu_inst->inLSQ && fu_inst->canEarlyIssue && - streamSeqNum == fu_inst->id.streamSeqNum && + ex_info.streamSeqNum == fu_inst->id.streamSeqNum && head_exec_seq_num > fu_inst->instToWaitFor) { DPRINTF(MinorExecute, "Issuing mem ref early" @@ -1184,7 +1196,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) * actually at the end of its pipeline * Future instruction: handled above and only for * mem refs on their way to the LSQ */ - } else /* if (fu_inst_seq_num == head_exec_seq_num) */ { + } else if (fu_inst.inst->id == inst->id) { /* All instructions can be committed if they have the * right execSeqNum and there are no in-flight * mem insts before us */ @@ -1194,8 +1206,8 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) } if (try_to_commit) { - discard_inst = inst->id.streamSeqNum != streamSeqNum || - discard; + discard_inst = inst->id.streamSeqNum != + ex_info.streamSeqNum || discard; /* Is this instruction discardable as its streamSeqNum * doesn't match? */ @@ -1209,8 +1221,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) DPRINTF(MinorExecute, "Evaluating expression for" " extra commit delay inst: %s\n", *inst); - ThreadContext *thread = - cpu.getContext(inst->id.threadId); + ThreadContext *thread = cpu.getContext(thread_id); TimingExprEvalContext context(inst->staticInst, thread, NULL); @@ -1241,9 +1252,9 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) /* @todo Think about making lastMemBarrier be * MAX_UINT_64 to avoid using 0 as a marker value */ if (!inst->isFault() && inst->isMemRef() && - lsq.getLastMemBarrier() < + lsq.getLastMemBarrier(thread_id) < inst->id.execSeqNum && - lsq.getLastMemBarrier() != 0) + lsq.getLastMemBarrier(thread_id) != 0) { DPRINTF(MinorExecute, "Not committing inst: %s yet" " as there are incomplete barriers in flight\n", @@ -1269,8 +1280,10 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) * instruction wasn't the inFlightInsts head * but had already been committed, it would have * unstalled the pipeline before here */ - if (inst->fuIndex != noCostFUIndex) + if (inst->fuIndex != noCostFUIndex) { + DPRINTF(MinorExecute, "Unstalling %d for inst %s\n", inst->fuIndex, inst->id); funcUnits[inst->fuIndex]->stalled = false; + } } } } else { @@ -1286,7 +1299,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) if (discard_inst) { DPRINTF(MinorExecute, "Discarding inst: %s as its stream" " state was unexpected, expected: %d\n", - *inst, streamSeqNum); + *inst, ex_info.streamSeqNum); if (fault == NoFault) cpu.stats.numDiscardedOps++; @@ -1303,10 +1316,10 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) if (completed_inst && inst->isMemRef()) { /* The MemRef could have been discarded from the FU or the memory * queue, so just check an FU instruction */ - if (!inFUMemInsts->empty() && - inFUMemInsts->front().inst == inst) + if (!ex_info.inFUMemInsts->empty() && + ex_info.inFUMemInsts->front().inst == inst) { - inFUMemInsts->pop(); + ex_info.inFUMemInsts->pop(); } } @@ -1315,16 +1328,16 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) DPRINTF(MinorExecute, "Completed inst: %s\n", *inst); /* Got to the end of a full instruction? */ - lastCommitWasEndOfMacroop = inst->isFault() || + ex_info.lastCommitWasEndOfMacroop = inst->isFault() || inst->isLastOpInInst(); /* lastPredictionSeqNum is kept as a convenience to prevent its * value from changing too much on the minorview display */ - lastPredictionSeqNum = inst->id.predictionSeqNum; + ex_info.lastPredictionSeqNum = inst->id.predictionSeqNum; /* Finished with the inst, remove it from the inst queue and * clear its dependencies */ - inFlightInsts->pop(); + ex_info.inFlightInsts->pop(); /* Complete barriers in the LSQ/move to store buffer */ if (inst->isInst() && inst->staticInst->isMemBarrier()) { @@ -1333,7 +1346,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) lsq.completeMemBarrierInst(inst, committed_inst); } - scoreboard.clearInstDests(inst, inst->isMemRef()); + scoreboard[thread_id].clearInstDests(inst, inst->isMemRef()); } /* Handle per-cycle instruction counting */ @@ -1343,7 +1356,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) /* Don't show no cost instructions as having taken a commit * slot */ if (DTRACE(MinorTrace) && !is_no_cost_inst) - instsBeingCommitted.insts[num_insts_committed] = inst; + ex_info.instsBeingCommitted.insts[num_insts_committed] = inst; if (!is_no_cost_inst) num_insts_committed++; @@ -1369,124 +1382,112 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) } bool -Execute::isInbetweenInsts() const +Execute::isInbetweenInsts(ThreadID thread_id) const { - return lastCommitWasEndOfMacroop && + return executeInfo[thread_id].lastCommitWasEndOfMacroop && !lsq.accessesInFlight(); } void Execute::evaluate() { - inputBuffer.setTail(*inp.outputWire); + if (!inp.outputWire->isBubble()) + inputBuffer[inp.outputWire->threadId].setTail(*inp.outputWire); + BranchData &branch = *out.inputWire; - const ForwardInstData *insts_in = getInput(); + unsigned int num_issued = 0; /* Do all the cycle-wise activities for dcachePort here to potentially * free up input spaces in the LSQ's requests queue */ lsq.step(); - /* Has an interrupt been signalled? This may not be acted on - * straighaway so this is different from took_interrupt below */ + /* Check interrupts first. Will halt commit if interrupt found */ bool interrupted = false; - /* If there was an interrupt signalled, was it acted on now? */ - bool took_interrupt = false; - - if (cpu.getInterruptController(0)) { - /* This is here because it seems that after drainResume the - * interrupt controller isn't always set */ - interrupted = drainState == NotDraining && isInterrupted(0); - } else { - DPRINTF(MinorInterrupt, "No interrupt controller\n"); - } + ThreadID interrupt_tid = checkInterrupts(branch, interrupted); - unsigned int num_issued = 0; - - if (DTRACE(MinorTrace)) { - /* Empty the instsBeingCommitted for MinorTrace */ - instsBeingCommitted.bubbleFill(); - } - - /* THREAD threadId on isInterrupted */ - /* Act on interrupts */ - if (interrupted && isInbetweenInsts()) { - took_interrupt = takeInterrupt(0, branch); - /* Clear interrupted if no interrupt was actually waiting */ - interrupted = took_interrupt; - } - - if (took_interrupt) { - /* Do no commit/issue this cycle */ + if (interrupt_tid != InvalidThreadID) { + /* Signalling an interrupt this cycle, not issuing/committing from + * any other threads */ } else if (!branch.isBubble()) { /* It's important that this is here to carry Fetch1 wakeups to Fetch1 * without overwriting them */ DPRINTF(MinorInterrupt, "Execute skipping a cycle to allow old" " branch to complete\n"); } else { - if (interrupted) { - if (inFlightInsts->empty()) { - DPRINTF(MinorInterrupt, "Waiting but no insts\n"); + ThreadID commit_tid = getCommittingThread(); + + if (commit_tid != InvalidThreadID) { + ExecuteThreadInfo& commit_info = executeInfo[commit_tid]; + + DPRINTF(MinorExecute, "Attempting to commit [tid:%d]\n", + commit_tid); + /* commit can set stalled flags observable to issue and so *must* be + * called first */ + if (commit_info.drainState != NotDraining) { + if (commit_info.drainState == DrainCurrentInst) { + /* Commit only micro-ops, don't kill anything else */ + commit(commit_tid, true, false, branch); + + if (isInbetweenInsts(commit_tid)) + setDrainState(commit_tid, DrainHaltFetch); + + /* Discard any generated branch */ + branch = BranchData::bubble(); + } else if (commit_info.drainState == DrainAllInsts) { + /* Kill all instructions */ + while (getInput(commit_tid)) + popInput(commit_tid); + commit(commit_tid, false, true, branch); + } } else { - DPRINTF(MinorInterrupt, "Waiting for end of inst before" - " signalling interrupt\n"); + /* Commit micro-ops only if interrupted. Otherwise, commit + * anything you like */ + DPRINTF(MinorExecute, "Committing micro-ops for interrupt[tid:%d]\n", + commit_tid); + bool only_commit_microops = interrupted && + hasInterrupt(commit_tid); + commit(commit_tid, only_commit_microops, false, branch); } - } - /* commit can set stalled flags observable to issue and so *must* be - * called first */ - if (drainState != NotDraining) { - if (drainState == DrainCurrentInst) { - /* Commit only micro-ops, don't kill anything else */ - commit(true, false, branch); - - if (isInbetweenInsts()) - setDrainState(DrainHaltFetch); - - /* Discard any generated branch */ - branch = BranchData::bubble(); - } else if (drainState == DrainAllInsts) { - /* Kill all instructions */ - while (getInput()) - popInput(); - commit(false, true, branch); + /* Halt fetch, but don't do it until we have the current instruction in + * the bag */ + if (commit_info.drainState == DrainHaltFetch) { + updateBranchData(commit_tid, BranchData::HaltFetch, + MinorDynInst::bubble(), TheISA::PCState(0), branch); + + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + setDrainState(commit_tid, DrainAllInsts); } - } else { - /* Commit micro-ops only if interrupted. Otherwise, commit - * anything you like */ - commit(interrupted, false, branch); } - + ThreadID issue_tid = getIssuingThread(); /* This will issue merrily even when interrupted in the sure and * certain knowledge that the interrupt with change the stream */ - if (insts_in) - num_issued = issue(false); - } - - /* Halt fetch, but don't do it until we have the current instruction in - * the bag */ - if (drainState == DrainHaltFetch) { - updateBranchData(BranchData::HaltFetch, MinorDynInst::bubble(), - TheISA::PCState(0), branch); + if (issue_tid != InvalidThreadID) { + DPRINTF(MinorExecute, "Attempting to issue [tid:%d]\n", + issue_tid); + num_issued = issue(issue_tid); + } - cpu.wakeupOnEvent(Pipeline::ExecuteStageId); - setDrainState(DrainAllInsts); } - MinorDynInstPtr next_issuable_inst = NULL; + /* Run logic to step functional units + decide if we are active on the next + * clock cycle */ + std::vector<MinorDynInstPtr> next_issuable_insts; bool can_issue_next = false; - /* Find the next issuable instruction and see if it can be issued */ - if (getInput()) { - MinorDynInstPtr inst = getInput()->insts[inputIndex]; - - if (inst->isFault()) { - can_issue_next = true; - } else if (!inst->isBubble()) { - if (cpu.getContext(inst->id.threadId)->status() != - ThreadContext::Suspended) - { - next_issuable_inst = inst; + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + /* Find the next issuable instruction for each thread and see if it can + be issued */ + if (getInput(tid)) { + unsigned int input_index = executeInfo[tid].inputIndex; + MinorDynInstPtr inst = getInput(tid)->insts[input_index]; + if (inst->isFault()) { + can_issue_next = true; + } else if (!inst->isBubble()) { + if (cpu.getContext(tid)->status() != ThreadContext::Suspended) { + next_issuable_insts.push_back(inst); + } } } } @@ -1494,53 +1495,56 @@ Execute::evaluate() bool becoming_stalled = true; /* Advance the pipelines and note whether they still need to be - * advanced */ + * advanced */ for (unsigned int i = 0; i < numFuncUnits; i++) { FUPipeline *fu = funcUnits[i]; - fu->advance(); - /* If we need to go again, the pipeline will have been left or set - * to be unstalled */ - if (fu->occupancy != 0 && !fu->stalled) + /* If we need to tick again, the pipeline will have been left or set + * to be unstalled */ + if (fu->occupancy !=0 && !fu->stalled) becoming_stalled = false; - /* Could we possibly issue the next instruction? This is quite - * an expensive test */ - if (next_issuable_inst && !fu->stalled && - scoreboard.canInstIssue(next_issuable_inst, - NULL, NULL, cpu.curCycle() + Cycles(1), - cpu.getContext(next_issuable_inst->id.threadId)) && - fu->provides(next_issuable_inst->staticInst->opClass())) - { - can_issue_next = true; + /* Could we possibly issue the next instruction from any thread? + * This is quite an expensive test and is only used to determine + * if the CPU should remain active, only run it if we aren't sure + * we are active next cycle yet */ + for (auto inst : next_issuable_insts) { + if (!fu->stalled && fu->provides(inst->staticInst->opClass()) && + scoreboard[inst->id.threadId].canInstIssue(inst, + NULL, NULL, cpu.curCycle() + Cycles(1), + cpu.getContext(inst->id.threadId))) { + can_issue_next = true; + break; + } } } bool head_inst_might_commit = false; /* Could the head in flight insts be committed */ - if (!inFlightInsts->empty()) { - const QueuedInst &head_inst = inFlightInsts->front(); + for (auto const &info : executeInfo) { + if (!info.inFlightInsts->empty()) { + const QueuedInst &head_inst = info.inFlightInsts->front(); - if (head_inst.inst->isNoCostInst()) { - head_inst_might_commit = true; - } else { - FUPipeline *fu = funcUnits[head_inst.inst->fuIndex]; - - /* Head inst is commitable */ - if ((fu->stalled && - fu->front().inst->id == head_inst.inst->id) || - lsq.findResponse(head_inst.inst)) - { + if (head_inst.inst->isNoCostInst()) { head_inst_might_commit = true; + } else { + FUPipeline *fu = funcUnits[head_inst.inst->fuIndex]; + if ((fu->stalled && + fu->front().inst->id == head_inst.inst->id) || + lsq.findResponse(head_inst.inst)) + { + head_inst_might_commit = true; + break; + } } } } DPRINTF(Activity, "Need to tick num issued insts: %s%s%s%s%s%s\n", (num_issued != 0 ? " (issued some insts)" : ""), - (becoming_stalled ? " (becoming stalled)" : "(not becoming stalled)"), + (becoming_stalled ? "(becoming stalled)" : "(not becoming stalled)"), (can_issue_next ? " (can issued next inst)" : ""), (head_inst_might_commit ? "(head inst might commit)" : ""), (lsq.needsToTick() ? " (LSQ needs to tick)" : ""), @@ -1568,36 +1572,54 @@ Execute::evaluate() cpu.activityRecorder->activity(); /* Make sure the input (if any left) is pushed */ - inputBuffer.pushTail(); + if (!inp.outputWire->isBubble()) + inputBuffer[inp.outputWire->threadId].pushTail(); } -void -Execute::wakeupFetch(BranchData::Reason reason) +ThreadID +Execute::checkInterrupts(BranchData& branch, bool& interrupted) { - BranchData branch; - assert(branch.isBubble()); - - /* THREAD thread id */ - ThreadContext *thread = cpu.getContext(0); - - /* Force a branch to the current PC (which should be the next inst.) to - * wake up Fetch1 */ - if (!branch.isStreamChange() /* No real branch already happened */) { - DPRINTF(MinorInterrupt, "Waking up Fetch (via Execute) by issuing" - " a branch: %s\n", thread->pcState()); + ThreadID tid = interruptPriority; + /* Evaluate interrupts in round-robin based upon service */ + do { + /* Has an interrupt been signalled? This may not be acted on + * straighaway so this is different from took_interrupt */ + bool thread_interrupted = false; + + if (FullSystem && cpu.getInterruptController(tid)) { + /* This is here because it seems that after drainResume the + * interrupt controller isn't always set */ + thread_interrupted = executeInfo[tid].drainState == NotDraining && + isInterrupted(tid); + interrupted = interrupted || thread_interrupted; + } else { + DPRINTF(MinorInterrupt, "No interrupt controller\n"); + } + DPRINTF(MinorInterrupt, "[tid:%d] thread_interrupted?=%d isInbetweenInsts?=%d\n", + tid, thread_interrupted, isInbetweenInsts(tid)); + /* Act on interrupts */ + if (thread_interrupted && isInbetweenInsts(tid)) { + if (takeInterrupt(tid, branch)) { + interruptPriority = tid; + return tid; + } + } else { + tid = (tid + 1) % cpu.numThreads; + } + } while (tid != interruptPriority); - assert(thread->pcState().microPC() == 0); + return InvalidThreadID; +} - updateBranchData(reason, - MinorDynInst::bubble(), thread->pcState(), branch); - } else { - DPRINTF(MinorInterrupt, "Already branching, no need for wakeup\n"); +bool +Execute::hasInterrupt(ThreadID thread_id) +{ + if (FullSystem && cpu.getInterruptController(thread_id)) { + return executeInfo[thread_id].drainState == NotDraining && + isInterrupted(thread_id); } - *out.inputWire = branch; - - /* Make sure we get ticked */ - cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + return false; } void @@ -1606,10 +1628,10 @@ Execute::minorTrace() const std::ostringstream insts; std::ostringstream stalled; - instsBeingCommitted.reportData(insts); + executeInfo[0].instsBeingCommitted.reportData(insts); lsq.minorTrace(); - inputBuffer.minorTrace(); - scoreboard.minorTrace(); + inputBuffer[0].minorTrace(); + scoreboard[0].minorTrace(); /* Report functional unit stalling in one string */ unsigned int i = 0; @@ -1623,14 +1645,110 @@ Execute::minorTrace() const MINORTRACE("insts=%s inputIndex=%d streamSeqNum=%d" " stalled=%s drainState=%d isInbetweenInsts=%d\n", - insts.str(), inputIndex, streamSeqNum, stalled.str(), drainState, - isInbetweenInsts()); + insts.str(), executeInfo[0].inputIndex, executeInfo[0].streamSeqNum, + stalled.str(), executeInfo[0].drainState, isInbetweenInsts(0)); std::for_each(funcUnits.begin(), funcUnits.end(), std::mem_fun(&FUPipeline::minorTrace)); - inFlightInsts->minorTrace(); - inFUMemInsts->minorTrace(); + executeInfo[0].inFlightInsts->minorTrace(); + executeInfo[0].inFUMemInsts->minorTrace(); +} + +inline ThreadID +Execute::getCommittingThread() +{ + std::vector<ThreadID> priority_list; + + switch (cpu.threadPolicy) { + case Enums::SingleThreaded: + return 0; + case Enums::RoundRobin: + priority_list = cpu.roundRobinPriority(commitPriority); + break; + case Enums::Random: + priority_list = cpu.randomPriority(); + break; + default: + panic("Invalid thread policy"); + } + + for (auto tid : priority_list) { + ExecuteThreadInfo &ex_info = executeInfo[tid]; + bool can_commit_insts = !ex_info.inFlightInsts->empty(); + if (can_commit_insts) { + QueuedInst *head_inflight_inst = &(ex_info.inFlightInsts->front()); + MinorDynInstPtr inst = head_inflight_inst->inst; + + can_commit_insts = can_commit_insts && + (!inst->inLSQ || (lsq.findResponse(inst) != NULL)); + + if (!inst->inLSQ) { + bool can_transfer_mem_inst = false; + if (!ex_info.inFUMemInsts->empty() && lsq.canRequest()) { + const MinorDynInstPtr head_mem_ref_inst = + ex_info.inFUMemInsts->front().inst; + FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex]; + const MinorDynInstPtr &fu_inst = fu->front().inst; + can_transfer_mem_inst = + !fu_inst->isBubble() && + fu_inst->id.threadId == tid && + !fu_inst->inLSQ && + fu_inst->canEarlyIssue && + inst->id.execSeqNum > fu_inst->instToWaitFor; + } + + bool can_execute_fu_inst = inst->fuIndex == noCostFUIndex; + if (can_commit_insts && !can_transfer_mem_inst && + inst->fuIndex != noCostFUIndex) + { + QueuedInst& fu_inst = funcUnits[inst->fuIndex]->front(); + can_execute_fu_inst = !fu_inst.inst->isBubble() && + fu_inst.inst->id == inst->id; + } + + can_commit_insts = can_commit_insts && + (can_transfer_mem_inst || can_execute_fu_inst); + } + } + + + if (can_commit_insts) { + commitPriority = tid; + return tid; + } + } + + return InvalidThreadID; +} + +inline ThreadID +Execute::getIssuingThread() +{ + std::vector<ThreadID> priority_list; + + switch (cpu.threadPolicy) { + case Enums::SingleThreaded: + return 0; + case Enums::RoundRobin: + priority_list = cpu.roundRobinPriority(issuePriority); + break; + case Enums::Random: + priority_list = cpu.randomPriority(); + break; + default: + panic("Invalid thread scheduling policy."); + } + + for (auto tid : priority_list) { + if (cpu.getContext(tid)->status() == ThreadContext::Active && + getInput(tid)) { + issuePriority = tid; + return tid; + } + } + + return InvalidThreadID; } void @@ -1638,11 +1756,10 @@ Execute::drainResume() { DPRINTF(Drain, "MinorExecute drainResume\n"); - setDrainState(NotDraining); + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + setDrainState(tid, NotDraining); + } - /* Wakeup fetch and keep the pipeline running until that branch takes - * effect */ - wakeupFetch(BranchData::WakeupFetch); cpu.wakeupOnEvent(Pipeline::ExecuteStageId); } @@ -1671,10 +1788,10 @@ std::ostream &operator <<(std::ostream &os, Execute::DrainState state) } void -Execute::setDrainState(DrainState state) +Execute::setDrainState(ThreadID thread_id, DrainState state) { - DPRINTF(Drain, "setDrainState: %s\n", state); - drainState = state; + DPRINTF(Drain, "setDrainState[%d]: %s\n", thread_id, state); + executeInfo[thread_id].drainState = state; } unsigned int @@ -1682,29 +1799,39 @@ Execute::drain() { DPRINTF(Drain, "MinorExecute drain\n"); - if (drainState == NotDraining) { - cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + if (executeInfo[tid].drainState == NotDraining) { + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); - /* Go to DrainCurrentInst if we're between microops - * or waiting on an unbufferable memory operation. - * Otherwise we can go straight to DrainHaltFetch - */ - if (isInbetweenInsts()) - setDrainState(DrainHaltFetch); - else - setDrainState(DrainCurrentInst); + /* Go to DrainCurrentInst if we're between microops + * or waiting on an unbufferable memory operation. + * Otherwise we can go straight to DrainHaltFetch + */ + if (isInbetweenInsts(tid)) + setDrainState(tid, DrainHaltFetch); + else + setDrainState(tid, DrainCurrentInst); + } } - return (isDrained() ? 0 : 1); } bool Execute::isDrained() { - return drainState == DrainAllInsts && - inputBuffer.empty() && - inFlightInsts->empty() && - lsq.isDrained(); + if (!lsq.isDrained()) + return false; + + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + if (executeInfo[tid].drainState != DrainAllInsts || + !inputBuffer[tid].empty() || + !executeInfo[tid].inFlightInsts->empty()) { + + return false; + } + } + + return true; } Execute::~Execute() @@ -1712,13 +1839,14 @@ Execute::~Execute() for (unsigned int i = 0; i < numFuncUnits; i++) delete funcUnits[i]; - delete inFlightInsts; + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) + delete executeInfo[tid].inFlightInsts; } bool Execute::instIsRightStream(MinorDynInstPtr inst) { - return inst->id.streamSeqNum == streamSeqNum; + return inst->id.streamSeqNum == executeInfo[inst->id.threadId].streamSeqNum; } bool @@ -1726,8 +1854,8 @@ Execute::instIsHeadInst(MinorDynInstPtr inst) { bool ret = false; - if (!inFlightInsts->empty()) - ret = inFlightInsts->front().inst->id == inst->id; + if (!executeInfo[inst->id.threadId].inFlightInsts->empty()) + ret = executeInfo[inst->id.threadId].inFlightInsts->front().inst->id == inst->id; return ret; } diff --git a/src/cpu/minor/execute.hh b/src/cpu/minor/execute.hh index 8cd026534..165a5bae4 100644 --- a/src/cpu/minor/execute.hh +++ b/src/cpu/minor/execute.hh @@ -116,13 +116,13 @@ class Execute : public Named LSQ lsq; /** Scoreboard of instruction dependencies */ - Scoreboard scoreboard; + std::vector<Scoreboard> scoreboard; /** The execution functional units */ std::vector<FUPipeline *> funcUnits; public: /* Public for Pipeline to be able to pass it to Decode */ - InputBuffer<ForwardInstData> inputBuffer; + std::vector<InputBuffer<ForwardInstData>> inputBuffer; protected: /** Stage cycle-by-cycle state */ @@ -143,48 +143,75 @@ class Execute : public Named DrainAllInsts /* Discarding all remaining insts */ }; - /** In-order instructions either in FUs or the LSQ */ - Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFlightInsts; - - /** Memory ref instructions still in the FUs */ - Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFUMemInsts; - - /** Index that we've completed upto in getInput data. We can say we're - * popInput when this equals getInput()->width() */ - unsigned int inputIndex; - - /** The last commit was the end of a full instruction so an interrupt - * can safely happen */ - bool lastCommitWasEndOfMacroop; - - /** Structure for reporting insts currently being processed/retired - * for MinorTrace */ - ForwardInstData instsBeingCommitted; - - /** Source of sequence number for instuction streams. Increment this and - * pass to fetch whenever an instruction stream needs to be changed. - * For any more complicated behaviour (e.g. speculation) there'll need - * to be another plan. THREAD, need one for each thread */ - InstSeqNum streamSeqNum; + struct ExecuteThreadInfo { + /** Constructor */ + ExecuteThreadInfo(unsigned int insts_committed) : + inputIndex(0), + lastCommitWasEndOfMacroop(true), + instsBeingCommitted(insts_committed), + streamSeqNum(InstId::firstStreamSeqNum), + lastPredictionSeqNum(InstId::firstPredictionSeqNum), + drainState(NotDraining) + { } + + ExecuteThreadInfo(const ExecuteThreadInfo& other) : + inputIndex(other.inputIndex), + lastCommitWasEndOfMacroop(other.lastCommitWasEndOfMacroop), + instsBeingCommitted(other.instsBeingCommitted), + streamSeqNum(other.streamSeqNum), + lastPredictionSeqNum(other.lastPredictionSeqNum), + drainState(other.drainState) + { } + + /** In-order instructions either in FUs or the LSQ */ + Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFlightInsts; + + /** Memory ref instructions still in the FUs */ + Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFUMemInsts; + + /** Index that we've completed upto in getInput data. We can say we're + * popInput when this equals getInput()->width() */ + unsigned int inputIndex; + + /** The last commit was the end of a full instruction so an interrupt + * can safely happen */ + bool lastCommitWasEndOfMacroop; + + /** Structure for reporting insts currently being processed/retired + * for MinorTrace */ + ForwardInstData instsBeingCommitted; + + /** Source of sequence number for instuction streams. Increment this and + * pass to fetch whenever an instruction stream needs to be changed. + * For any more complicated behaviour (e.g. speculation) there'll need + * to be another plan. */ + InstSeqNum streamSeqNum; + + /** A prediction number for use where one isn't available from an + * instruction. This is harvested from committed instructions. + * This isn't really needed as the streamSeqNum will change on + * a branch, but it minimises disruption in stream identification */ + InstSeqNum lastPredictionSeqNum; + + /** State progression for draining NotDraining -> ... -> DrainAllInsts */ + DrainState drainState; + }; - /** A prediction number for use where one isn't available from an - * instruction. This is harvested from committed instructions. - * This isn't really needed as the streamSeqNum will change on - * a branch, but it minimises disruption in stream identification */ - InstSeqNum lastPredictionSeqNum; + std::vector<ExecuteThreadInfo> executeInfo; - /** State progression for draining NotDraining -> ... -> DrainAllInsts */ - DrainState drainState; + ThreadID interruptPriority; + ThreadID issuePriority; + ThreadID commitPriority; protected: friend std::ostream &operator <<(std::ostream &os, DrainState state); /** Get a piece of data to work on from the inputBuffer, or 0 if there * is no data. */ - const ForwardInstData *getInput(); + const ForwardInstData *getInput(ThreadID tid); /** Pop an element off the input buffer, if there are any */ - void popInput(); + void popInput(ThreadID tid); /** Generate Branch data based (into branch) on an observed (or not) * change in PC while executing an instruction. @@ -193,7 +220,7 @@ class Execute : public Named /** Actually create a branch to communicate to Fetch1/Fetch2 and, * if that is a stream-changing branch update the streamSeqNum */ - void updateBranchData(BranchData::Reason reason, + void updateBranchData(ThreadID tid, BranchData::Reason reason, MinorDynInstPtr inst, const TheISA::PCState &target, BranchData &branch); @@ -224,23 +251,32 @@ class Execute : public Named bool isInterrupted(ThreadID thread_id) const; /** Are we between instructions? Can we be interrupted? */ - bool isInbetweenInsts() const; + bool isInbetweenInsts(ThreadID thread_id) const; /** Act on an interrupt. Returns true if an interrupt was actually * signalled and invoked */ bool takeInterrupt(ThreadID thread_id, BranchData &branch); /** Try and issue instructions from the inputBuffer */ - unsigned int issue(bool only_issue_microops); + unsigned int issue(ThreadID thread_id); /** Try to act on PC-related events. Returns true if any were * executed */ - bool tryPCEvents(); + bool tryPCEvents(ThreadID thread_id); /** Do the stats handling and instruction count and PC event events * related to the new instruction/op counts */ void doInstCommitAccounting(MinorDynInstPtr inst); + /** Check all threads for possible interrupts. If interrupt is taken, + * returns the tid of the thread. interrupted is set if any thread + * has an interrupt, irrespective of if it is taken */ + ThreadID checkInterrupts(BranchData& branch, bool& interrupted); + + /** Checks if a specific thread has an interrupt. No action is taken. + * this is used for determining if a thread should only commit microops */ + bool hasInterrupt(ThreadID thread_id); + /** Commit a single instruction. Returns true if the instruction being * examined was completed (fully executed, discarded, or initiated a * memory access), false if there is still some processing to do. @@ -266,10 +302,16 @@ class Execute : public Named * If discard is true then discard all instructions rather than * committing. * branch is set to any branch raised during commit. */ - void commit(bool only_commit_microops, bool discard, BranchData &branch); + void commit(ThreadID thread_id, bool only_commit_microops, bool discard, + BranchData &branch); /** Set the drain state (with useful debugging messages) */ - void setDrainState(DrainState state); + void setDrainState(ThreadID thread_id, DrainState state); + + /** Use the current threading policy to determine the next thread to + * decode from. */ + ThreadID getCommittingThread(); + ThreadID getIssuingThread(); public: Execute(const std::string &name_, @@ -282,12 +324,6 @@ class Execute : public Named public: - /** Cause Execute to issue an UnpredictedBranch (or WakeupFetch if - * that was passed as the reason) to Fetch1 to wake the - * system up (using the PC from the thread context). */ - void wakeupFetch(BranchData::Reason reason = - BranchData::UnpredictedBranch); - /** Returns the DcachePort owned by this Execute to pass upwards */ MinorCPU::MinorCPUPort &getDcachePort(); diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc index d19d7b042..f4f120534 100644 --- a/src/cpu/minor/fetch1.cc +++ b/src/cpu/minor/fetch1.cc @@ -57,7 +57,7 @@ Fetch1::Fetch1(const std::string &name_, Latch<BranchData>::Output inp_, Latch<ForwardLineData>::Input out_, Latch<BranchData>::Output prediction_, - Reservable &next_stage_input_buffer) : + std::vector<InputBuffer<ForwardLineData>> &next_stage_input_buffer) : Named(name_), cpu(cpu_), inp(inp_), @@ -68,11 +68,8 @@ Fetch1::Fetch1(const std::string &name_, lineSnap(params.fetch1LineSnapWidth), maxLineWidth(params.fetch1LineWidth), fetchLimit(params.fetch1FetchLimit), - state(FetchWaitingForPC), - pc(0), - streamSeqNum(InstId::firstStreamSeqNum), - predictionSeqNum(InstId::firstPredictionSeqNum), - blocked(false), + fetchInfo(params.numThreads), + threadPriority(0), requests(name_ + ".requests", "lines", params.fetch1FetchLimit), transfers(name_ + ".transfers", "lines", params.fetch1FetchLimit), icacheState(IcacheRunning), @@ -114,32 +111,67 @@ Fetch1::Fetch1(const std::string &name_, } } +inline ThreadID +Fetch1::getScheduledThread() +{ + /* Select thread via policy. */ + std::vector<ThreadID> priority_list; + + switch (cpu.threadPolicy) { + case Enums::SingleThreaded: + priority_list.push_back(0); + break; + case Enums::RoundRobin: + priority_list = cpu.roundRobinPriority(threadPriority); + break; + case Enums::Random: + priority_list = cpu.randomPriority(); + break; + default: + panic("Unknown fetch policy"); + } + + for (auto tid : priority_list) { + if (cpu.getContext(tid)->status() == ThreadContext::Active && + !fetchInfo[tid].blocked && + fetchInfo[tid].state == FetchRunning) { + threadPriority = tid; + return tid; + } + } + + return InvalidThreadID; +} + void -Fetch1::fetchLine() +Fetch1::fetchLine(ThreadID tid) { + /* Reference the currently used thread state. */ + Fetch1ThreadInfo &thread = fetchInfo[tid]; + /* If line_offset != 0, a request is pushed for the remainder of the * line. */ /* Use a lower, sizeof(MachInst) aligned address for the fetch */ - Addr aligned_pc = pc.instAddr() & ~((Addr) lineSnap - 1); + Addr aligned_pc = thread.pc.instAddr() & ~((Addr) lineSnap - 1); unsigned int line_offset = aligned_pc % lineSnap; unsigned int request_size = maxLineWidth - line_offset; /* Fill in the line's id */ - InstId request_id(0 /* thread */, - streamSeqNum, predictionSeqNum, + InstId request_id(tid, + thread.streamSeqNum, thread.predictionSeqNum, lineSeqNum); - FetchRequestPtr request = new FetchRequest(*this, request_id, pc); + FetchRequestPtr request = new FetchRequest(*this, request_id, thread.pc); DPRINTF(Fetch, "Inserting fetch into the fetch queue " "%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n", - request_id, aligned_pc, pc, line_offset, request_size); + request_id, aligned_pc, thread.pc, line_offset, request_size); - request->request.setContext(cpu.threads[0]->getTC()->contextId()); + request->request.setContext(cpu.threads[tid]->getTC()->contextId()); request->request.setVirt(0 /* asid */, aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(), /* I've no idea why we need the PC, but give it */ - pc.instAddr()); + thread.pc.instAddr()); DPRINTF(Fetch, "Submitting ITLB request\n"); numFetchesInITLB++; @@ -165,12 +197,12 @@ Fetch1::fetchLine() * reliable 'new' PC if the next line has a new stream sequence number. */ #if THE_ISA == ALPHA_ISA /* Restore the low bits of the PC used as address space flags */ - Addr pc_low_bits = pc.instAddr() & + Addr pc_low_bits = thread.pc.instAddr() & ((Addr) (1 << sizeof(TheISA::MachInst)) - 1); - pc.set(aligned_pc + request_size + pc_low_bits); + thread.pc.set(aligned_pc + request_size + pc_low_bits); #else - pc.set(aligned_pc + request_size); + thread.pc.set(aligned_pc + request_size); #endif } @@ -454,46 +486,58 @@ operator <<(std::ostream &os, Fetch1::FetchState state) void Fetch1::changeStream(const BranchData &branch) { + Fetch1ThreadInfo &thread = fetchInfo[branch.threadId]; + updateExpectedSeqNums(branch); /* Start fetching again if we were stopped */ switch (branch.reason) { case BranchData::SuspendThread: - DPRINTF(Fetch, "Suspending fetch: %s\n", branch); - state = FetchWaitingForPC; + { + if (thread.wakeupGuard) { + DPRINTF(Fetch, "Not suspending fetch due to guard: %s\n", + branch); + } else { + DPRINTF(Fetch, "Suspending fetch: %s\n", branch); + thread.state = FetchWaitingForPC; + } + } break; case BranchData::HaltFetch: DPRINTF(Fetch, "Halting fetch\n"); - state = FetchHalted; + thread.state = FetchHalted; break; default: DPRINTF(Fetch, "Changing stream on branch: %s\n", branch); - state = FetchRunning; + thread.state = FetchRunning; break; } - pc = branch.target; + thread.pc = branch.target; } void Fetch1::updateExpectedSeqNums(const BranchData &branch) { + Fetch1ThreadInfo &thread = fetchInfo[branch.threadId]; + DPRINTF(Fetch, "Updating streamSeqNum from: %d to %d," " predictionSeqNum from: %d to %d\n", - streamSeqNum, branch.newStreamSeqNum, - predictionSeqNum, branch.newPredictionSeqNum); + thread.streamSeqNum, branch.newStreamSeqNum, + thread.predictionSeqNum, branch.newPredictionSeqNum); /* Change the stream */ - streamSeqNum = branch.newStreamSeqNum; + thread.streamSeqNum = branch.newStreamSeqNum; /* Update the prediction. Note that it's possible for this to * actually set the prediction to an *older* value if new * predictions have been discarded by execute */ - predictionSeqNum = branch.newPredictionSeqNum; + thread.predictionSeqNum = branch.newPredictionSeqNum; } void Fetch1::processResponse(Fetch1::FetchRequestPtr response, ForwardLineData &line) { + Fetch1ThreadInfo &thread = fetchInfo[response->id.threadId]; PacketPtr packet = response->packet; /* Pass the prefetch abort (if any) on to Fetch2 in a ForwardLineData @@ -514,7 +558,7 @@ Fetch1::processResponse(Fetch1::FetchRequestPtr response, * can't (currently) selectively remove this stream from the queues */ DPRINTF(Fetch, "Stopping line fetch because of fault: %s\n", response->fault->name()); - state = Fetch1::FetchWaitingForPC; + thread.state = Fetch1::FetchWaitingForPC; } else { line.adoptPacketData(packet); /* Null the response's packet to prevent the response from trying to @@ -532,61 +576,86 @@ Fetch1::evaluate() assert(line_out.isBubble()); - blocked = !nextStageReserve.canReserve(); + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) + fetchInfo[tid].blocked = !nextStageReserve[tid].canReserve(); - /* Are we changing stream? Look to the Execute branches first, then - * to predicted changes of stream from Fetch2 */ - /* @todo, find better way to express ignoring branch predictions */ - if (execute_branch.isStreamChange() && - execute_branch.reason != BranchData::BranchPrediction) - { - if (state == FetchHalted) { - if (execute_branch.reason == BranchData::WakeupFetch) { - DPRINTF(Fetch, "Waking up fetch: %s\n", execute_branch); + /** Are both branches from later stages valid and for the same thread? */ + if (execute_branch.threadId != InvalidThreadID && + execute_branch.threadId == fetch2_branch.threadId) { + + Fetch1ThreadInfo &thread = fetchInfo[execute_branch.threadId]; + + /* Are we changing stream? Look to the Execute branches first, then + * to predicted changes of stream from Fetch2 */ + if (execute_branch.isStreamChange()) { + if (thread.state == FetchHalted) { + DPRINTF(Fetch, "Halted, ignoring branch: %s\n", execute_branch); + } else { changeStream(execute_branch); + } + + if (!fetch2_branch.isBubble()) { + DPRINTF(Fetch, "Ignoring simultaneous prediction: %s\n", + fetch2_branch); + } + + /* The streamSeqNum tagging in request/response ->req should handle + * discarding those requests when we get to them. */ + } else if (thread.state != FetchHalted && fetch2_branch.isStreamChange()) { + /* Handle branch predictions by changing the instruction source + * if we're still processing the same stream (as set by streamSeqNum) + * as the one of the prediction. + */ + if (fetch2_branch.newStreamSeqNum != thread.streamSeqNum) { + DPRINTF(Fetch, "Not changing stream on prediction: %s," + " streamSeqNum mismatch\n", + fetch2_branch); } else { - DPRINTF(Fetch, "Halted, ignoring branch: %s\n", - execute_branch); + changeStream(fetch2_branch); } - } else { - changeStream(execute_branch); } + } else { + /* Fetch2 and Execute branches are for different threads */ + if (execute_branch.threadId != InvalidThreadID && + execute_branch.isStreamChange()) { - if (!fetch2_branch.isBubble()) { - DPRINTF(Fetch, "Ignoring simultaneous prediction: %s\n", - fetch2_branch); + if (fetchInfo[execute_branch.threadId].state == FetchHalted) { + DPRINTF(Fetch, "Halted, ignoring branch: %s\n", execute_branch); + } else { + changeStream(execute_branch); + } } - /* The streamSeqNum tagging in request/response ->req should handle - * discarding those requests when we get to them. */ - } else if (state != FetchHalted && fetch2_branch.isStreamChange()) { - /* Handle branch predictions by changing the instruction source - * if we're still processing the same stream (as set by streamSeqNum) - * as the one of the prediction. - */ - if (fetch2_branch.newStreamSeqNum != streamSeqNum) { - DPRINTF(Fetch, "Not changing stream on prediction: %s," - " streamSeqNum mismatch\n", - fetch2_branch); - } else { - changeStream(fetch2_branch); + if (fetch2_branch.threadId != InvalidThreadID && + fetch2_branch.isStreamChange()) { + + if (fetchInfo[fetch2_branch.threadId].state == FetchHalted) { + DPRINTF(Fetch, "Halted, ignoring branch: %s\n", fetch2_branch); + } else if (fetch2_branch.newStreamSeqNum != fetchInfo[fetch2_branch.threadId].streamSeqNum) { + DPRINTF(Fetch, "Not changing stream on prediction: %s," + " streamSeqNum mismatch\n", fetch2_branch); + } else { + changeStream(fetch2_branch); + } } } - /* Can we fetch? */ - /* The bare minimum requirements for initiating a fetch */ - /* THREAD need to handle multiple threads */ - if (state == FetchRunning && /* We are actually fetching */ - !blocked && /* Space in the Fetch2 inputBuffer */ - /* The thread we're going to fetch for (thread 0), is active */ - cpu.getContext(0)->status() == ThreadContext::Active && - numInFlightFetches() < fetchLimit) - { - fetchLine(); - /* Take up a slot in the fetch queue */ - nextStageReserve.reserve(); + if (numInFlightFetches() < fetchLimit) { + ThreadID fetch_tid = getScheduledThread(); + + if (fetch_tid != InvalidThreadID) { + DPRINTF(Fetch, "Fetching from thread %d\n", fetch_tid); + + /* Generate fetch to selected thread */ + fetchLine(fetch_tid); + /* Take up a slot in the fetch queue */ + nextStageReserve[fetch_tid].reserve(); + } else { + DPRINTF(Fetch, "No active threads available to fetch from\n"); + } } + /* Halting shouldn't prevent fetches in flight from being processed */ /* Step fetches through the icachePort queues and memory system */ stepQueues(); @@ -599,9 +668,9 @@ Fetch1::evaluate() Fetch1::FetchRequestPtr response = transfers.front(); if (response->isDiscardable()) { - nextStageReserve.freeReservation(); + nextStageReserve[response->id.threadId].freeReservation(); - DPRINTF(Fetch, "Discarding translated fetch at it's for" + DPRINTF(Fetch, "Discarding translated fetch as it's for" " an old stream\n"); /* Wake up next cycle just in case there was some other @@ -626,19 +695,49 @@ Fetch1::evaluate() * generate a line output (tested just above) or to initiate a memory * fetch which will signal activity when it returns/needs stepping * between queues */ + + + /* This looks hackish. And it is, but there doesn't seem to be a better + * way to do this. The signal from commit to suspend fetch takes 1 + * clock cycle to propagate to fetch. However, a legitimate wakeup + * may occur between cycles from the memory system. Thus wakeup guard + * prevents us from suspending in that case. */ + + for (auto& thread : fetchInfo) { + thread.wakeupGuard = false; + } +} + +void +Fetch1::wakeupFetch(ThreadID tid) +{ + ThreadContext *thread_ctx = cpu.getContext(tid); + Fetch1ThreadInfo &thread = fetchInfo[tid]; + thread.pc = thread_ctx->pcState(); + thread.state = FetchRunning; + thread.wakeupGuard = true; + DPRINTF(Fetch, "[tid:%d]: Changing stream wakeup %s\n", + tid, thread_ctx->pcState()); + + cpu.wakeupOnEvent(Pipeline::Fetch1StageId); } bool Fetch1::isDrained() { - DPRINTF(Drain, "isDrained %s %s%s\n", - state, - (numInFlightFetches() == 0 ? "" : "inFlightFetches "), - ((*out.inputWire).isBubble() ? "" : "outputtingLine")); - - return state == FetchHalted && - numInFlightFetches() == 0 && - (*out.inputWire).isBubble(); + bool drained = numInFlightFetches() == 0 && (*out.inputWire).isBubble(); + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + Fetch1ThreadInfo &thread = fetchInfo[tid]; + DPRINTF(Drain, "isDrained[tid:%d]: %s %s%s\n", + tid, + thread.state == FetchHalted, + (numInFlightFetches() == 0 ? "" : "inFlightFetches "), + ((*out.inputWire).isBubble() ? "" : "outputtingLine")); + + drained = drained && thread.state == FetchHalted; + } + + return drained; } void @@ -649,26 +748,32 @@ Fetch1::FetchRequest::reportData(std::ostream &os) const bool Fetch1::FetchRequest::isDiscardable() const { + Fetch1ThreadInfo &thread = fetch.fetchInfo[id.threadId]; + /* Can't discard lines in TLB/memory */ return state != InTranslation && state != RequestIssuing && - (id.streamSeqNum != fetch.streamSeqNum || - id.predictionSeqNum != fetch.predictionSeqNum); + (id.streamSeqNum != thread.streamSeqNum || + id.predictionSeqNum != thread.predictionSeqNum); } void Fetch1::minorTrace() const { + // TODO: Un-bork minorTrace for THREADS + // bork bork bork + const Fetch1ThreadInfo &thread = fetchInfo[0]; + std::ostringstream data; - if (blocked) + if (thread.blocked) data << 'B'; else (*out.inputWire).reportData(data); MINORTRACE("state=%s icacheState=%s in_tlb_mem=%s/%s" - " streamSeqNum=%d lines=%s\n", state, icacheState, + " streamSeqNum=%d lines=%s\n", thread.state, icacheState, numFetchesInITLB, numFetchesInMemorySystem, - streamSeqNum, data.str()); + thread.streamSeqNum, data.str()); requests.minorTrace(); transfers.minorTrace(); } diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh index d4a35c468..cf6c9d254 100644 --- a/src/cpu/minor/fetch1.hh +++ b/src/cpu/minor/fetch1.hh @@ -197,7 +197,7 @@ class Fetch1 : public Named Latch<BranchData>::Output prediction; /** Interface to reserve space in the next stage */ - Reservable &nextStageReserve; + std::vector<InputBuffer<ForwardLineData>> &nextStageReserve; /** IcachePort to pass to the CPU. Fetch1 is the only module that uses * it. */ @@ -233,26 +233,53 @@ class Fetch1 : public Named /** Stage cycle-by-cycle state */ - FetchState state; + struct Fetch1ThreadInfo { - /** Fetch PC value. This is updated by branches from Execute, branch - * prediction targets from Fetch2 and by incrementing it as we fetch - * lines subsequent to those two sources. */ - TheISA::PCState pc; + /** Consturctor to initialize all fields. */ + Fetch1ThreadInfo() : + state(FetchWaitingForPC), + pc(TheISA::PCState(0)), + streamSeqNum(InstId::firstStreamSeqNum), + predictionSeqNum(InstId::firstPredictionSeqNum), + blocked(false), + wakeupGuard(false) + { } + + Fetch1ThreadInfo(const Fetch1ThreadInfo& other) : + state(other.state), + pc(other.pc), + streamSeqNum(other.streamSeqNum), + predictionSeqNum(other.predictionSeqNum), + blocked(other.blocked) + { } + + FetchState state; + + /** Fetch PC value. This is updated by branches from Execute, branch + * prediction targets from Fetch2 and by incrementing it as we fetch + * lines subsequent to those two sources. */ + TheISA::PCState pc; - /** Stream sequence number. This changes on request from Execute and is - * used to tag instructions by the fetch stream to which they belong. - * Execute originates new prediction sequence numbers. */ - InstSeqNum streamSeqNum; + /** Stream sequence number. This changes on request from Execute and is + * used to tag instructions by the fetch stream to which they belong. + * Execute originates new prediction sequence numbers. */ + InstSeqNum streamSeqNum; - /** Prediction sequence number. This changes when requests from Execute - * or Fetch2 ask for a change of fetch address and is used to tag lines - * by the prediction to which they belong. Fetch2 originates - * prediction sequence numbers. */ - InstSeqNum predictionSeqNum; + /** Prediction sequence number. This changes when requests from Execute + * or Fetch2 ask for a change of fetch address and is used to tag lines + * by the prediction to which they belong. Fetch2 originates + * prediction sequence numbers. */ + InstSeqNum predictionSeqNum; - /** Blocked indication for report */ - bool blocked; + /** Blocked indication for report */ + bool blocked; + + /** Signal to guard against sleeping first cycle of wakeup */ + bool wakeupGuard; + }; + + std::vector<Fetch1ThreadInfo> fetchInfo; + ThreadID threadPriority; /** State of memory access for head instruction fetch */ enum IcacheState @@ -307,10 +334,15 @@ class Fetch1 : public Named friend std::ostream &operator <<(std::ostream &os, IcacheState state); + + /** Use the current threading policy to determine the next thread to + * fetch from. */ + ThreadID getScheduledThread(); + /** Insert a line fetch into the requests. This can be a partial * line request where the given address has a non-0 offset into a * line. */ - void fetchLine(); + void fetchLine(ThreadID tid); /** Try and issue a fetch for a translated request at the * head of the requests queue. Also tries to move the request @@ -354,7 +386,7 @@ class Fetch1 : public Named Latch<BranchData>::Output inp_, Latch<ForwardLineData>::Input out_, Latch<BranchData>::Output prediction_, - Reservable &next_stage_input_buffer); + std::vector<InputBuffer<ForwardLineData>> &next_stage_input_buffer); public: /** Returns the IcachePort owned by this Fetch1 */ @@ -363,6 +395,9 @@ class Fetch1 : public Named /** Pass on input/buffer data to the output if you can */ void evaluate(); + /** Initiate fetch1 fetching */ + void wakeupFetch(ThreadID tid); + void minorTrace() const; /** Is this stage drained? For Fetch1, draining is initiated by diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc index cb45f16e3..ae02b1c22 100644 --- a/src/cpu/minor/fetch2.cc +++ b/src/cpu/minor/fetch2.cc @@ -58,7 +58,7 @@ Fetch2::Fetch2(const std::string &name, Latch<BranchData>::Output branchInp_, Latch<BranchData>::Input predictionOut_, Latch<ForwardInstData>::Input out_, - Reservable &next_stage_input_buffer) : + std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) : Named(name), cpu(cpu_), inp(inp_), @@ -69,15 +69,8 @@ Fetch2::Fetch2(const std::string &name, outputWidth(params.decodeInputWidth), processMoreThanOneInput(params.fetch2CycleInput), branchPredictor(*params.branchPred), - inputBuffer(name + ".inputBuffer", "lines", params.fetch2InputBufferSize), - inputIndex(0), - pc(TheISA::PCState(0)), - havePC(false), - lastStreamSeqNum(InstId::firstStreamSeqNum), - fetchSeqNum(InstId::firstFetchSeqNum), - expectedStreamSeqNum(InstId::firstStreamSeqNum), - predictionSeqNum(InstId::firstPredictionSeqNum), - blocked(false) + fetchInfo(params.numThreads), + threadPriority(0) { if (outputWidth < 1) fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth); @@ -86,38 +79,46 @@ Fetch2::Fetch2(const std::string &name, fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name, params.fetch2InputBufferSize); } + + /* Per-thread input buffers */ + for (ThreadID tid = 0; tid < params.numThreads; tid++) { + inputBuffer.push_back( + InputBuffer<ForwardLineData>( + name + ".inputBuffer" + std::to_string(tid), "lines", + params.fetch2InputBufferSize)); + } } const ForwardLineData * -Fetch2::getInput() +Fetch2::getInput(ThreadID tid) { /* Get a line from the inputBuffer to work with */ - if (!inputBuffer.empty()) { - return &(inputBuffer.front()); + if (!inputBuffer[tid].empty()) { + return &(inputBuffer[tid].front()); } else { return NULL; } } void -Fetch2::popInput() +Fetch2::popInput(ThreadID tid) { - if (!inputBuffer.empty()) { - inputBuffer.front().freeLine(); - inputBuffer.pop(); + if (!inputBuffer[tid].empty()) { + inputBuffer[tid].front().freeLine(); + inputBuffer[tid].pop(); } - inputIndex = 0; + fetchInfo[tid].inputIndex = 0; } void -Fetch2::dumpAllInput() +Fetch2::dumpAllInput(ThreadID tid) { DPRINTF(Fetch, "Dumping whole input buffer\n"); - while (!inputBuffer.empty()) - popInput(); + while (!inputBuffer[tid].empty()) + popInput(tid); - inputIndex = 0; + fetchInfo[tid].inputIndex = 0; } void @@ -139,9 +140,6 @@ Fetch2::updateBranchPrediction(const BranchData &branch) case BranchData::SuspendThread: /* Don't need to act on suspends */ break; - case BranchData::WakeupFetch: - /* Don't need to act on wakeups, no instruction tied to action. */ - break; case BranchData::HaltFetch: /* Don't need to act on fetch wakeup */ break; @@ -180,6 +178,7 @@ Fetch2::updateBranchPrediction(const BranchData &branch) void Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch) { + Fetch2ThreadInfo &thread = fetchInfo[inst->id.threadId]; TheISA::PCState inst_pc = inst->pc; assert(!inst->predictedTaken); @@ -209,35 +208,37 @@ Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch) if (inst->predictedTaken) { /* Update the predictionSeqNum and remember the streamSeqNum that it * was associated with */ - expectedStreamSeqNum = inst->id.streamSeqNum; + thread.expectedStreamSeqNum = inst->id.streamSeqNum; BranchData new_branch = BranchData(BranchData::BranchPrediction, - inst->id.streamSeqNum, predictionSeqNum + 1, + inst->id.threadId, + inst->id.streamSeqNum, thread.predictionSeqNum + 1, inst->predictedTarget, inst); /* Mark with a new prediction number by the stream number of the * instruction causing the prediction */ - predictionSeqNum++; + thread.predictionSeqNum++; branch = new_branch; DPRINTF(Branch, "Branch predicted taken inst: %s target: %s" " new predictionSeqNum: %d\n", - *inst, inst->predictedTarget, predictionSeqNum); + *inst, inst->predictedTarget, thread.predictionSeqNum); } } void Fetch2::evaluate() { - inputBuffer.setTail(*inp.outputWire); + /* Push input onto appropriate input buffer */ + if (!inp.outputWire->isBubble()) + inputBuffer[inp.outputWire->id.threadId].setTail(*inp.outputWire); + ForwardInstData &insts_out = *out.inputWire; BranchData prediction; BranchData &branch_inp = *branchInp.outputWire; assert(insts_out.isBubble()); - blocked = false; - /* React to branches from Execute to update local branch prediction * structures */ updateBranchPrediction(branch_inp); @@ -247,39 +248,48 @@ Fetch2::evaluate() if (branch_inp.isStreamChange()) { DPRINTF(Fetch, "Dumping all input as a stream changing branch" " has arrived\n"); - dumpAllInput(); - havePC = false; + dumpAllInput(branch_inp.threadId); + fetchInfo[branch_inp.threadId].havePC = false; } + assert(insts_out.isBubble()); /* Even when blocked, clear out input lines with the wrong * prediction sequence number */ - { - const ForwardLineData *line_in = getInput(); + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + Fetch2ThreadInfo &thread = fetchInfo[tid]; + + thread.blocked = !nextStageReserve[tid].canReserve(); + + const ForwardLineData *line_in = getInput(tid); while (line_in && - expectedStreamSeqNum == line_in->id.streamSeqNum && - predictionSeqNum != line_in->id.predictionSeqNum) + thread.expectedStreamSeqNum == line_in->id.streamSeqNum && + thread.predictionSeqNum != line_in->id.predictionSeqNum) { DPRINTF(Fetch, "Discarding line %s" " due to predictionSeqNum mismatch (expected: %d)\n", - line_in->id, predictionSeqNum); + line_in->id, thread.predictionSeqNum); - popInput(); - havePC = false; + popInput(tid); + fetchInfo[tid].havePC = false; if (processMoreThanOneInput) { DPRINTF(Fetch, "Wrapping\n"); - line_in = getInput(); + line_in = getInput(tid); } else { line_in = NULL; } } } - if (!nextStageReserve.canReserve()) { - blocked = true; - } else { - const ForwardLineData *line_in = getInput(); + ThreadID tid = getScheduledThread(); + DPRINTF(Fetch, "Scheduled Thread: %d\n", tid); + + assert(insts_out.isBubble()); + if (tid != InvalidThreadID) { + Fetch2ThreadInfo &fetch_info = fetchInfo[tid]; + + const ForwardLineData *line_in = getInput(tid); unsigned int output_index = 0; @@ -288,7 +298,7 @@ Fetch2::evaluate() * for faulting lines */ while (line_in && (line_in->isFault() || - inputIndex < line_in->lineWidth) && /* More input */ + fetch_info.inputIndex < line_in->lineWidth) && /* More input */ output_index < outputWidth && /* More output to fill */ prediction.isBubble() /* No predicted branch */) { @@ -298,26 +308,26 @@ Fetch2::evaluate() /* Discard line due to prediction sequence number being wrong but * without the streamSeqNum number having changed */ bool discard_line = - expectedStreamSeqNum == line_in->id.streamSeqNum && - predictionSeqNum != line_in->id.predictionSeqNum; + fetch_info.expectedStreamSeqNum == line_in->id.streamSeqNum && + fetch_info.predictionSeqNum != line_in->id.predictionSeqNum; /* Set the PC if the stream changes. Setting havePC to false in * a previous cycle handles all other change of flow of control * issues */ - bool set_pc = lastStreamSeqNum != line_in->id.streamSeqNum; + bool set_pc = fetch_info.lastStreamSeqNum != line_in->id.streamSeqNum; - if (!discard_line && (!havePC || set_pc)) { + if (!discard_line && (!fetch_info.havePC || set_pc)) { /* Set the inputIndex to be the MachInst-aligned offset * from lineBaseAddr of the new PC value */ - inputIndex = + fetch_info.inputIndex = (line_in->pc.instAddr() & BaseCPU::PCMask) - line_in->lineBaseAddr; DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x" " lineBaseAddr: 0x%x lineWidth: 0x%x\n", - line_in->pc, inputIndex, line_in->lineBaseAddr, + line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr, line_in->lineWidth); - pc = line_in->pc; - havePC = true; + fetch_info.pc = line_in->pc; + fetch_info.havePC = true; decoder->reset(); } @@ -330,7 +340,8 @@ Fetch2::evaluate() * stream */ DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)" " due to predictionSeqNum mismatch (expected: %d)\n", - line_in->id, inputIndex, predictionSeqNum); + line_in->id, fetch_info.inputIndex, + fetch_info.predictionSeqNum); } else if (line_in->isFault()) { /* Pack a fault as a MinorDynInst with ->fault set */ @@ -339,13 +350,13 @@ Fetch2::evaluate() dyn_inst = new MinorDynInst(line_in->id); /* Fetch and prediction sequence numbers originate here */ - dyn_inst->id.fetchSeqNum = fetchSeqNum; - dyn_inst->id.predictionSeqNum = predictionSeqNum; + dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum; + dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum; /* To complete the set, test that exec sequence number has * not been set */ assert(dyn_inst->id.execSeqNum == 0); - dyn_inst->pc = pc; + dyn_inst->pc = fetch_info.pc; /* Pack a faulting instruction but allow other * instructions to be generated. (Fetch2 makes no @@ -361,13 +372,14 @@ Fetch2::evaluate() * assign */ inst_word = TheISA::gtoh( *(reinterpret_cast<TheISA::MachInst *> - (line + inputIndex))); + (line + fetch_info.inputIndex))); if (!decoder->instReady()) { - decoder->moreBytes(pc, - line_in->lineBaseAddr + inputIndex, inst_word); - DPRINTF(Fetch, "Offering MachInst to decoder" - " addr: 0x%x\n", line_in->lineBaseAddr + inputIndex); + decoder->moreBytes(fetch_info.pc, + line_in->lineBaseAddr + fetch_info.inputIndex, + inst_word); + DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n", + line_in->lineBaseAddr + fetch_info.inputIndex); } /* Maybe make the above a loop to accomodate ISAs with @@ -379,8 +391,8 @@ Fetch2::evaluate() dyn_inst = new MinorDynInst(line_in->id); /* Fetch and prediction sequence numbers originate here */ - dyn_inst->id.fetchSeqNum = fetchSeqNum; - dyn_inst->id.predictionSeqNum = predictionSeqNum; + dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum; + dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum; /* To complete the set, test that exec sequence number * has not been set */ assert(dyn_inst->id.execSeqNum == 0); @@ -388,17 +400,19 @@ Fetch2::evaluate() /* Note that the decoder can update the given PC. * Remember not to assign it until *after* calling * decode */ - StaticInstPtr decoded_inst = decoder->decode(pc); + StaticInstPtr decoded_inst = decoder->decode(fetch_info.pc); dyn_inst->staticInst = decoded_inst; - dyn_inst->pc = pc; + dyn_inst->pc = fetch_info.pc; + DPRINTF(Fetch, "decoder inst %s\n", *dyn_inst); + DPRINTF(Fetch, "Instruction extracted from line %s" " lineWidth: %d output_index: %d inputIndex: %d" " pc: %s inst: %s\n", line_in->id, - line_in->lineWidth, output_index, inputIndex, - pc, *dyn_inst); + line_in->lineWidth, output_index, fetch_info.inputIndex, + fetch_info.pc, *dyn_inst); #if THE_ISA == X86_ISA || THE_ISA == ARM_ISA /* In SE mode, it's possible to branch to a microop when @@ -415,12 +429,12 @@ Fetch2::evaluate() * the case that, after a branch, the first un-advanced PC * may be pointing to a microop other than 0. Once * advanced, however, the microop number *must* be 0 */ - pc.upc(0); - pc.nupc(1); + fetch_info.pc.upc(0); + fetch_info.pc.nupc(1); #endif /* Advance PC for the next instruction */ - TheISA::advancePC(pc, decoded_inst); + TheISA::advancePC(fetch_info.pc, decoded_inst); /* Predict any branches and issue a branch if * necessary */ @@ -432,22 +446,23 @@ Fetch2::evaluate() /* Step on the pointer into the line if there's no * complete instruction waiting */ if (decoder->needMoreBytes()) { - inputIndex += sizeof(TheISA::MachInst); + fetch_info.inputIndex += sizeof(TheISA::MachInst); DPRINTF(Fetch, "Updated inputIndex value PC: %s" " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n", - line_in->pc, inputIndex, line_in->lineBaseAddr, + line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr, line_in->lineWidth); } } if (dyn_inst) { /* Step to next sequence number */ - fetchSeqNum++; + fetch_info.fetchSeqNum++; /* Correctly size the output before writing */ - if (output_index == 0) + if (output_index == 0) { insts_out.resize(outputWidth); + } /* Pack the generated dynamic instruction into the output */ insts_out.insts[output_index] = dyn_inst; output_index++; @@ -463,7 +478,7 @@ Fetch2::evaluate() /* Remember the streamSeqNum of this line so we can tell when * we change stream */ - lastStreamSeqNum = line_in->id.streamSeqNum; + fetch_info.lastStreamSeqNum = line_in->id.streamSeqNum; /* Asked to discard line or there was a branch or fault */ if (!prediction.isBubble() || /* The remains of a @@ -471,33 +486,35 @@ Fetch2::evaluate() line_in->isFault() /* A line which is just a fault */) { DPRINTF(Fetch, "Discarding all input on branch/fault\n"); - dumpAllInput(); - havePC = false; + dumpAllInput(tid); + fetch_info.havePC = false; line_in = NULL; } else if (discard_line) { /* Just discard one line, one's behind it may have new * stream sequence numbers. There's a DPRINTF above * for this event */ - popInput(); - havePC = false; + popInput(tid); + fetch_info.havePC = false; line_in = NULL; - } else if (inputIndex == line_in->lineWidth) { + } else if (fetch_info.inputIndex == line_in->lineWidth) { /* Got to end of a line, pop the line but keep PC * in case this is a line-wrapping inst. */ - popInput(); + popInput(tid); line_in = NULL; } if (!line_in && processMoreThanOneInput) { DPRINTF(Fetch, "Wrapping\n"); - line_in = getInput(); + line_in = getInput(tid); } } /* The rest of the output (if any) should already have been packed * with bubble instructions by insts_out's initialisation */ } - + if (tid == InvalidThreadID) { + assert(insts_out.isBubble()); + } /** Reserve a slot in the next stage and output data */ *predictionOut.inputWire = prediction; @@ -506,24 +523,66 @@ Fetch2::evaluate() if (!insts_out.isBubble()) { /* Note activity of following buffer */ cpu.activityRecorder->activity(); - nextStageReserve.reserve(); + insts_out.threadId = tid; + nextStageReserve[tid].reserve(); } /* If we still have input to process and somewhere to put it, * mark stage as active */ - if (getInput() && nextStageReserve.canReserve()) - cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId); + for (ThreadID i = 0; i < cpu.numThreads; i++) + { + if (getInput(i) && nextStageReserve[i].canReserve()) { + cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId); + break; + } + } /* Make sure the input (if any left) is pushed */ - inputBuffer.pushTail(); + if (!inp.outputWire->isBubble()) + inputBuffer[inp.outputWire->id.threadId].pushTail(); +} + +inline ThreadID +Fetch2::getScheduledThread() +{ + /* Select thread via policy. */ + std::vector<ThreadID> priority_list; + + switch (cpu.threadPolicy) { + case Enums::SingleThreaded: + priority_list.push_back(0); + break; + case Enums::RoundRobin: + priority_list = cpu.roundRobinPriority(threadPriority); + break; + case Enums::Random: + priority_list = cpu.randomPriority(); + break; + default: + panic("Unknown fetch policy"); + } + + for (auto tid : priority_list) { + if (cpu.getContext(tid)->status() == ThreadContext::Active && + getInput(tid) && !fetchInfo[tid].blocked) { + threadPriority = tid; + return tid; + } + } + + return InvalidThreadID; } bool Fetch2::isDrained() { - return inputBuffer.empty() && - (*inp.outputWire).isBubble() && - (*predictionOut.inputWire).isBubble(); + for (const auto &buffer : inputBuffer) { + if (!buffer.empty()) + return false; + } + + return (*inp.outputWire).isBubble() && + (*predictionOut.inputWire).isBubble(); } void @@ -531,14 +590,14 @@ Fetch2::minorTrace() const { std::ostringstream data; - if (blocked) + if (fetchInfo[0].blocked) data << 'B'; else (*out.inputWire).reportData(data); MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n", - inputIndex, havePC, predictionSeqNum, data.str()); - inputBuffer.minorTrace(); + fetchInfo[0].inputIndex, fetchInfo[0].havePC, fetchInfo[0].predictionSeqNum, data.str()); + inputBuffer[0].minorTrace(); } } diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh index 2fc38b377..33c683b82 100644 --- a/src/cpu/minor/fetch2.hh +++ b/src/cpu/minor/fetch2.hh @@ -78,7 +78,7 @@ class Fetch2 : public Named Latch<ForwardInstData>::Input out; /** Interface to reserve space in the next stage */ - Reservable &nextStageReserve; + std::vector<InputBuffer<ForwardInstData>> &nextStageReserve; /** Width of output of this stage/input of next in instructions */ unsigned int outputWidth; @@ -92,61 +92,90 @@ class Fetch2 : public Named public: /* Public so that Pipeline can pass it to Fetch1 */ - InputBuffer<ForwardLineData> inputBuffer; + std::vector<InputBuffer<ForwardLineData>> inputBuffer; protected: /** Data members after this line are cycle-to-cycle state */ - /** Index into an incompletely processed input line that instructions - * are to be extracted from */ - unsigned int inputIndex; - - /** Remembered program counter value. Between contiguous lines, this - * is just updated with advancePC. For lines following changes of - * stream, a new PC must be loaded and havePC be set. - * havePC is needed to accomodate instructions which span across - * lines meaning that Fetch2 and the decoder need to remember a PC - * value and a partially-offered instruction from the previous line */ - TheISA::PCState pc; - - /** PC is currently valid. Initially false, gets set to true when a - * change-of-stream line is received and false again when lines are - * discarded for any reason */ - bool havePC; - - /** Stream sequence number of the last seen line used to identify changes - * of instruction stream */ - InstSeqNum lastStreamSeqNum; - - /** Fetch2 is the source of fetch sequence numbers. These represent the - * sequence that instructions were extracted from fetched lines. */ - InstSeqNum fetchSeqNum; - - /** Stream sequence number remembered from last time the predictionSeqNum - * changed. Lines should only be discarded when their predictionSeqNums - * disagree with Fetch2::predictionSeqNum *and* they are from the same - * stream that bore that prediction number */ - InstSeqNum expectedStreamSeqNum; - - /** Fetch2 is the source of prediction sequence numbers. These represent - * predicted changes of control flow sources from branch prediction in - * Fetch2. */ - InstSeqNum predictionSeqNum; - - /** Blocked indication for report */ - bool blocked; + struct Fetch2ThreadInfo { + + /** Default constructor */ + Fetch2ThreadInfo() : + inputIndex(0), + pc(TheISA::PCState(0)), + havePC(false), + lastStreamSeqNum(InstId::firstStreamSeqNum), + fetchSeqNum(InstId::firstFetchSeqNum), + expectedStreamSeqNum(InstId::firstStreamSeqNum), + predictionSeqNum(InstId::firstPredictionSeqNum), + blocked(false) + { } + + Fetch2ThreadInfo(const Fetch2ThreadInfo& other) : + inputIndex(other.inputIndex), + pc(other.pc), + havePC(other.havePC), + lastStreamSeqNum(other.lastStreamSeqNum), + expectedStreamSeqNum(other.expectedStreamSeqNum), + predictionSeqNum(other.predictionSeqNum), + blocked(other.blocked) + { } + + /** Index into an incompletely processed input line that instructions + * are to be extracted from */ + unsigned int inputIndex; + + + /** Remembered program counter value. Between contiguous lines, this + * is just updated with advancePC. For lines following changes of + * stream, a new PC must be loaded and havePC be set. + * havePC is needed to accomodate instructions which span across + * lines meaning that Fetch2 and the decoder need to remember a PC + * value and a partially-offered instruction from the previous line */ + TheISA::PCState pc; + + /** PC is currently valid. Initially false, gets set to true when a + * change-of-stream line is received and false again when lines are + * discarded for any reason */ + bool havePC; + + /** Stream sequence number of the last seen line used to identify + * changes of instruction stream */ + InstSeqNum lastStreamSeqNum; + + /** Fetch2 is the source of fetch sequence numbers. These represent the + * sequence that instructions were extracted from fetched lines. */ + InstSeqNum fetchSeqNum; + + /** Stream sequence number remembered from last time the + * predictionSeqNum changed. Lines should only be discarded when their + * predictionSeqNums disagree with Fetch2::predictionSeqNum *and* they + * are from the same stream that bore that prediction number */ + InstSeqNum expectedStreamSeqNum; + + /** Fetch2 is the source of prediction sequence numbers. These + * represent predicted changes of control flow sources from branch + * prediction in Fetch2. */ + InstSeqNum predictionSeqNum; + + /** Blocked indication for report */ + bool blocked; + }; + + std::vector<Fetch2ThreadInfo> fetchInfo; + ThreadID threadPriority; protected: /** Get a piece of data to work on from the inputBuffer, or 0 if there * is no data. */ - const ForwardLineData *getInput(); + const ForwardLineData *getInput(ThreadID tid); /** Pop an element off the input buffer, if there are any */ - void popInput(); + void popInput(ThreadID tid); /** Dump the whole contents of the input buffer. Useful after a * prediction changes control flow */ - void dumpAllInput(); + void dumpAllInput(ThreadID tid); /** Update local branch prediction structures from feedback from * Execute. */ @@ -157,6 +186,10 @@ class Fetch2 : public Named * carries the prediction to Fetch1 */ void predictBranch(MinorDynInstPtr inst, BranchData &branch); + /** Use the current threading policy to determine the next thread to + * fetch from. */ + ThreadID getScheduledThread(); + public: Fetch2(const std::string &name, MinorCPU &cpu_, @@ -165,7 +198,7 @@ class Fetch2 : public Named Latch<BranchData>::Output branchInp_, Latch<BranchData>::Input predictionOut_, Latch<ForwardInstData>::Input out_, - Reservable &next_stage_input_buffer); + std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer); public: /** Pass on input/buffer data to the output if you can */ diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc index b5c0bc974..5995a52c2 100644 --- a/src/cpu/minor/lsq.cc +++ b/src/cpu/minor/lsq.cc @@ -216,13 +216,14 @@ operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state) void LSQ::clearMemBarrier(MinorDynInstPtr inst) { - bool is_last_barrier = inst->id.execSeqNum >= lastMemBarrier; + bool is_last_barrier = + inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId]; DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n", (is_last_barrier ? "last" : "a"), *inst); if (is_last_barrier) - lastMemBarrier = 0; + lastMemBarrier[inst->id.threadId] = 0; } void @@ -676,7 +677,8 @@ LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, while (ret == NoAddrRangeCoverage && i != slots.rend()) { LSQRequestPtr slot = *i; - if (slot->packet) { + if (slot->packet && + slot->inst->id.threadId == request->inst->id.threadId) { AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); if (coverage != NoAddrRangeCoverage) { @@ -1042,8 +1044,9 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request) request->issuedToMemory = true; } - if (tryToSend(request)) + if (tryToSend(request)) { moveFromRequestsToTransfers(request); + } } else { request->setState(LSQRequest::Complete); moveFromRequestsToTransfers(request); @@ -1145,6 +1148,9 @@ LSQ::tryToSend(LSQRequestPtr request) } } + if (ret) + threadSnoop(request); + return ret; } @@ -1293,7 +1299,7 @@ LSQ::LSQ(std::string name_, std::string dcache_port_name_, cpu(cpu_), execute(execute_), dcachePort(dcache_port_name_, *this, cpu_), - lastMemBarrier(0), + lastMemBarrier(cpu.numThreads, 0), state(MemoryRunning), inMemorySystemLimit(in_memory_system_limit), lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)), @@ -1526,7 +1532,7 @@ LSQ::minorTrace() const MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d" " lastMemBarrier=%d\n", state, numAccessesInDTLB, numAccessesInMemorySystem, - numStoresInTransfers, lastMemBarrier); + numStoresInTransfers, lastMemBarrier[0]); requests.minorTrace(); transfers.minorTrace(); storeBuffer.minorTrace(); @@ -1565,12 +1571,12 @@ void LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) { assert(inst->isInst() && inst->staticInst->isMemBarrier()); - assert(inst->id.execSeqNum > lastMemBarrier); + assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]); /* Remember the barrier. We only have a notion of one * barrier so this may result in some mem refs being * delayed if they are between barriers */ - lastMemBarrier = inst->id.execSeqNum; + lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum; } void @@ -1616,10 +1622,40 @@ LSQ::recvTimingSnoopReq(PacketPtr pkt) /* LLSC operations in Minor can't be speculative and are executed from * the head of the requests queue. We shouldn't need to do more than * this action on snoops. */ + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { + cpu.wakeup(tid); + } + } - /* THREAD */ if (pkt->isInvalidate() || pkt->isWrite()) { - TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask); + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, + cacheBlockMask); + } + } +} + +void +LSQ::threadSnoop(LSQRequestPtr request) +{ + /* LLSC operations in Minor can't be speculative and are executed from + * the head of the requests queue. We shouldn't need to do more than + * this action on snoops. */ + ThreadID req_tid = request->inst->id.threadId; + PacketPtr pkt = request->packet; + + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + if (tid != req_tid) { + if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { + cpu.wakeup(tid); + } + + if (pkt->isInvalidate() || pkt->isWrite()) { + TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, + cacheBlockMask); + } + } } } diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh index 09fb30d03..e0b72177c 100644 --- a/src/cpu/minor/lsq.hh +++ b/src/cpu/minor/lsq.hh @@ -537,7 +537,7 @@ class LSQ : public Named /** Most recent execSeqNum of a memory barrier instruction or * 0 if there are no in-flight barriers. Useful as a * dependency for early-issued memory operations */ - InstSeqNum lastMemBarrier; + std::vector<InstSeqNum> lastMemBarrier; public: /** Retry state of last issued memory transfer */ @@ -640,6 +640,9 @@ class LSQ : public Named /** Can a request be sent to the memory system */ bool canSendToMemorySystem(); + /** Snoop other threads monitors on memory system accesses */ + void threadSnoop(LSQRequestPtr request); + public: LSQ(std::string name_, std::string dcache_port_name_, MinorCPU &cpu_, Execute &execute_, @@ -691,7 +694,8 @@ class LSQ : public Named void issuedMemBarrierInst(MinorDynInstPtr inst); /** Get the execSeqNum of the last issued memory barrier */ - InstSeqNum getLastMemBarrier() const { return lastMemBarrier; } + InstSeqNum getLastMemBarrier(ThreadID thread_id) const + { return lastMemBarrier[thread_id]; } /** Is there nothing left in the LSQ */ bool isDrained(); diff --git a/src/cpu/minor/pipe_data.cc b/src/cpu/minor/pipe_data.cc index 447f9c0e7..208c9c9f4 100644 --- a/src/cpu/minor/pipe_data.cc +++ b/src/cpu/minor/pipe_data.cc @@ -71,9 +71,6 @@ operator <<(std::ostream &os, BranchData::Reason reason) case BranchData::SuspendThread: os << "SuspendThread"; break; - case BranchData::WakeupFetch: - os << "WakeupFetch"; - break; case BranchData::HaltFetch: os << "HaltFetch"; break; @@ -102,7 +99,6 @@ BranchData::isStreamChange(const BranchData::Reason reason) case BadlyPredictedBranch: case SuspendThread: case Interrupt: - case WakeupFetch: case HaltFetch: ret = true; break; @@ -123,7 +119,6 @@ BranchData::isBranch(const BranchData::Reason reason) case CorrectlyPredictedBranch: case SuspendThread: case Interrupt: - case WakeupFetch: case HaltFetch: ret = false; break; @@ -228,8 +223,8 @@ ForwardLineData::reportData(std::ostream &os) const os << id; } -ForwardInstData::ForwardInstData(unsigned int width) : - numInsts(width) +ForwardInstData::ForwardInstData(unsigned int width, ThreadID tid) : + numInsts(width), threadId(tid) { bubbleFill(); } diff --git a/src/cpu/minor/pipe_data.hh b/src/cpu/minor/pipe_data.hh index 4468cb89e..e514be2f9 100644 --- a/src/cpu/minor/pipe_data.hh +++ b/src/cpu/minor/pipe_data.hh @@ -91,8 +91,6 @@ class BranchData /* : public ReportIF, public BubbleIF */ * count it as stream changing itself and expect pc to be the PC * of the next instruction */ SuspendThread, - /* Wakeup fetching from Halted */ - WakeupFetch, /* Branch from an interrupt (no instruction) */ Interrupt, /* Stop fetching in anticipation of of draining */ @@ -112,6 +110,9 @@ class BranchData /* : public ReportIF, public BubbleIF */ /** Explanation for this branch */ Reason reason; + /** ThreadID associated with branch */ + ThreadID threadId; + /** Sequence number of new stream/prediction to be adopted */ InstSeqNum newStreamSeqNum; InstSeqNum newPredictionSeqNum; @@ -124,18 +125,20 @@ class BranchData /* : public ReportIF, public BubbleIF */ public: BranchData() : - reason(NoBranch), newStreamSeqNum(0), + reason(NoBranch), threadId(InvalidThreadID), newStreamSeqNum(0), newPredictionSeqNum(0), target(TheISA::PCState(0)), inst(MinorDynInst::bubble()) { } BranchData( Reason reason_, + ThreadID thread_id, InstSeqNum new_stream_seq_num, InstSeqNum new_prediction_seq_num, TheISA::PCState target, MinorDynInstPtr inst_) : reason(reason_), + threadId(thread_id), newStreamSeqNum(new_stream_seq_num), newPredictionSeqNum(new_prediction_seq_num), target(target), @@ -258,8 +261,12 @@ class ForwardInstData /* : public ReportIF, public BubbleIF */ /** The number of insts slots that can be expected to be valid insts */ unsigned int numInsts; + /** Thread associated with these instructions */ + ThreadID threadId; + public: - explicit ForwardInstData(unsigned int width = 0); + explicit ForwardInstData(unsigned int width = 0, + ThreadID tid = InvalidThreadID); ForwardInstData(const ForwardInstData &src); diff --git a/src/cpu/minor/pipeline.cc b/src/cpu/minor/pipeline.cc index 39b7f31f9..8c04e3949 100644 --- a/src/cpu/minor/pipeline.cc +++ b/src/cpu/minor/pipeline.cc @@ -187,9 +187,9 @@ Pipeline::getDataPort() } void -Pipeline::wakeupFetch() +Pipeline::wakeupFetch(ThreadID tid) { - execute.wakeupFetch(); + fetch1.wakeupFetch(tid); } bool @@ -212,6 +212,11 @@ void Pipeline::drainResume() { DPRINTF(Drain, "Drain resume\n"); + + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + fetch1.wakeupFetch(tid); + } + execute.drainResume(); } diff --git a/src/cpu/minor/pipeline.hh b/src/cpu/minor/pipeline.hh index 2e1aa9921..9b6ca0d32 100644 --- a/src/cpu/minor/pipeline.hh +++ b/src/cpu/minor/pipeline.hh @@ -112,7 +112,7 @@ class Pipeline : public Ticked public: /** Wake up the Fetch unit. This is needed on thread activation esp. * after quiesce wakeup */ - void wakeupFetch(); + void wakeupFetch(ThreadID tid); /** Try to drain the CPU */ bool drain(); diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index 44fe2fcae..8f20c5ff9 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -261,7 +261,7 @@ quiesceSkip(ThreadContext *tc) EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent(); - Tick resume = curTick() + 1; + Tick resume = cpu->nextCycle() + 1; cpu->reschedule(quiesceEvent, resume, true); diff --git a/util/minorview/minor.pic b/util/minorview/minor.pic index 52731d66f..fd32a3ef3 100644 --- a/util/minorview/minor.pic +++ b/util/minorview/minor.pic @@ -115,9 +115,9 @@ macro predictionFrame: decoder=frame stripDir=vert dataElement=predictionSeqNum # name ::= ? alphanumeric name with dots ? # value ::= "(<char-except-">)*", <char-except-' '>* } -Fi: fetch2.inputBuffer inputBuffer decoder=lines -Di: decode.inputBuffer inputBuffer decoder=insts hideId=E -Ei: execute.inputBuffer inputBuffer stripDir=horiz decoder=insts border=mid +Fi: fetch2.inputBuffer0 inputBuffer decoder=lines +Di: decode.inputBuffer0 inputBuffer decoder=insts hideId=E +Ei: execute.inputBuffer0 inputBuffer stripDir=horiz decoder=insts border=mid F1: fetch1 streamFrame blankStrips=11 name="Fetch1" fe: fetch1 decoder=lines border=thin name="Line" F2: fetch2 predictionFrame blankStrips=11 name="Fetch2" @@ -146,9 +146,9 @@ f3: execute.fu.3 fu shorten=2 name=Div f4: execute.fu.4 fu shorten=2 name=Float f5: execute.fu.5 fu shorten=2 name=Mem f6: execute.fu.6 fu shorten=2 name=Misc -iq: execute.inFlightInsts fifo decoder=insts name="inFlightInsts" -im: execute.inFUMemInsts fifo decoder=insts name="inFU..." -sc: execute.scoreboard name="scoreboard" decoder=indexedCounts \ +iq: execute.inFlightInsts0 fifo decoder=insts name="inFlightInsts" +im: execute.inFUMemInsts0 fifo decoder=insts name="inFU..." +sc: execute.scoreboard0 name="scoreboard" decoder=indexedCounts \ dataElement=busy border=mid name="scoreboard" strips=38 stripelems=3 sa: activity dataElement=stages activity name="Stage activity" ac: activity dataElement=activity decoder=counts border=mid name="Activity" |