/* * Copyright (c) 2013-2014,2016 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall * not be construed as granting a license to any other intellectual * property including but not limited to intellectual property relating * to a hardware implementation of the functionality of the software * licensed hereunder. You may use the software subject to the license * terms below provided that you ensure that this notice is replicated * unmodified and in its entirety in all distributions of the software, * modified or unmodified, in source code or in binary form. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Andrew Bardsley */ #include "cpu/minor/fetch2.hh" #include #include "arch/decoder.hh" #include "arch/utility.hh" #include "cpu/minor/pipeline.hh" #include "cpu/pred/bpred_unit.hh" #include "debug/Branch.hh" #include "debug/Fetch.hh" #include "debug/MinorTrace.hh" namespace Minor { Fetch2::Fetch2(const std::string &name, MinorCPU &cpu_, MinorCPUParams ¶ms, Latch::Output inp_, Latch::Output branchInp_, Latch::Input predictionOut_, Latch::Input out_, std::vector> &next_stage_input_buffer) : Named(name), cpu(cpu_), inp(inp_), branchInp(branchInp_), predictionOut(predictionOut_), out(out_), nextStageReserve(next_stage_input_buffer), outputWidth(params.decodeInputWidth), processMoreThanOneInput(params.fetch2CycleInput), branchPredictor(*params.branchPred), fetchInfo(params.numThreads), threadPriority(0) { if (outputWidth < 1) fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth); if (params.fetch2InputBufferSize < 1) { fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name, params.fetch2InputBufferSize); } /* Per-thread input buffers */ for (ThreadID tid = 0; tid < params.numThreads; tid++) { inputBuffer.push_back( InputBuffer( name + ".inputBuffer" + std::to_string(tid), "lines", params.fetch2InputBufferSize)); } } const ForwardLineData * Fetch2::getInput(ThreadID tid) { /* Get a line from the inputBuffer to work with */ if (!inputBuffer[tid].empty()) { return &(inputBuffer[tid].front()); } else { return NULL; } } void Fetch2::popInput(ThreadID tid) { if (!inputBuffer[tid].empty()) { inputBuffer[tid].front().freeLine(); inputBuffer[tid].pop(); } fetchInfo[tid].inputIndex = 0; } void Fetch2::dumpAllInput(ThreadID tid) { DPRINTF(Fetch, "Dumping whole input buffer\n"); while (!inputBuffer[tid].empty()) popInput(tid); fetchInfo[tid].inputIndex = 0; } void Fetch2::updateBranchPrediction(const BranchData &branch) { MinorDynInstPtr inst = branch.inst; /* Don't even consider instructions we didn't try to predict or faults */ if (inst->isFault() || !inst->triedToPredict) return; switch (branch.reason) { case BranchData::NoBranch: /* No data to update */ break; case BranchData::Interrupt: /* Never try to predict interrupts */ break; case BranchData::SuspendThread: /* Don't need to act on suspends */ break; case BranchData::HaltFetch: /* Don't need to act on fetch wakeup */ break; case BranchData::BranchPrediction: /* Shouldn't happen. Fetch2 is the only source of * BranchPredictions */ break; case BranchData::UnpredictedBranch: /* Unpredicted branch or barrier */ DPRINTF(Branch, "Unpredicted branch seen inst: %s\n", *inst); branchPredictor.squash(inst->id.fetchSeqNum, branch.target, true, inst->id.threadId); // Update after squashing to accomodate O3CPU // using the branch prediction code. branchPredictor.update(inst->id.fetchSeqNum, inst->id.threadId); break; case BranchData::CorrectlyPredictedBranch: /* Predicted taken, was taken */ DPRINTF(Branch, "Branch predicted correctly inst: %s\n", *inst); branchPredictor.update(inst->id.fetchSeqNum, inst->id.threadId); break; case BranchData::BadlyPredictedBranch: /* Predicted taken, not taken */ DPRINTF(Branch, "Branch mis-predicted inst: %s\n", *inst); branchPredictor.squash(inst->id.fetchSeqNum, branch.target /* Not used */, false, inst->id.threadId); // Update after squashing to accomodate O3CPU // using the branch prediction code. branchPredictor.update(inst->id.fetchSeqNum, inst->id.threadId); break; case BranchData::BadlyPredictedBranchTarget: /* Predicted taken, was taken but to a different target */ DPRINTF(Branch, "Branch mis-predicted target inst: %s target: %s\n", *inst, branch.target); branchPredictor.squash(inst->id.fetchSeqNum, branch.target, true, inst->id.threadId); break; } } void Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch) { Fetch2ThreadInfo &thread = fetchInfo[inst->id.threadId]; TheISA::PCState inst_pc = inst->pc; assert(!inst->predictedTaken); /* Skip non-control/sys call instructions */ if (inst->staticInst->isControl() || inst->staticInst->isSyscall()) { /* Tried to predict */ inst->triedToPredict = true; DPRINTF(Branch, "Trying to predict for inst: %s\n", *inst); if (branchPredictor.predict(inst->staticInst, inst->id.fetchSeqNum, inst_pc, inst->id.threadId)) { inst->predictedTaken = true; inst->predictedTarget = inst_pc; branch.target = inst_pc; } } else { DPRINTF(Branch, "Not attempting prediction for inst: %s\n", *inst); } /* If we predict taken, set branch and update sequence numbers */ if (inst->predictedTaken) { /* Update the predictionSeqNum and remember the streamSeqNum that it * was associated with */ thread.expectedStreamSeqNum = inst->id.streamSeqNum; BranchData new_branch = BranchData(BranchData::BranchPrediction, inst->id.threadId, inst->id.streamSeqNum, thread.predictionSeqNum + 1, inst->predictedTarget, inst); /* Mark with a new prediction number by the stream number of the * instruction causing the prediction */ thread.predictionSeqNum++; branch = new_branch; DPRINTF(Branch, "Branch predicted taken inst: %s target: %s" " new predictionSeqNum: %d\n", *inst, inst->predictedTarget, thread.predictionSeqNum); } } void Fetch2::evaluate() { /* Push input onto appropriate input buffer */ if (!inp.outputWire->isBubble()) inputBuffer[inp.outputWire->id.threadId].setTail(*inp.outputWire); ForwardInstData &insts_out = *out.inputWire; BranchData prediction; BranchData &branch_inp = *branchInp.outputWire; assert(insts_out.isBubble()); /* React to branches from Execute to update local branch prediction * structures */ updateBranchPrediction(branch_inp); /* If a branch arrives, don't try and do anything about it. Only * react to your own predictions */ if (branch_inp.isStreamChange()) { DPRINTF(Fetch, "Dumping all input as a stream changing branch" " has arrived\n"); dumpAllInput(branch_inp.threadId); fetchInfo[branch_inp.threadId].havePC = false; } assert(insts_out.isBubble()); /* Even when blocked, clear out input lines with the wrong * prediction sequence number */ for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { Fetch2ThreadInfo &thread = fetchInfo[tid]; thread.blocked = !nextStageReserve[tid].canReserve(); const ForwardLineData *line_in = getInput(tid); while (line_in && thread.expectedStreamSeqNum == line_in->id.streamSeqNum && thread.predictionSeqNum != line_in->id.predictionSeqNum) { DPRINTF(Fetch, "Discarding line %s" " due to predictionSeqNum mismatch (expected: %d)\n", line_in->id, thread.predictionSeqNum); popInput(tid); fetchInfo[tid].havePC = false; if (processMoreThanOneInput) { DPRINTF(Fetch, "Wrapping\n"); line_in = getInput(tid); } else { line_in = NULL; } } } ThreadID tid = getScheduledThread(); DPRINTF(Fetch, "Scheduled Thread: %d\n", tid); assert(insts_out.isBubble()); if (tid != InvalidThreadID) { Fetch2ThreadInfo &fetch_info = fetchInfo[tid]; const ForwardLineData *line_in = getInput(tid); unsigned int output_index = 0; /* Pack instructions into the output while we can. This may involve * using more than one input line. Note that lineWidth will be 0 * for faulting lines */ while (line_in && (line_in->isFault() || fetch_info.inputIndex < line_in->lineWidth) && /* More input */ output_index < outputWidth && /* More output to fill */ prediction.isBubble() /* No predicted branch */) { ThreadContext *thread = cpu.getContext(line_in->id.threadId); TheISA::Decoder *decoder = thread->getDecoderPtr(); /* Discard line due to prediction sequence number being wrong but * without the streamSeqNum number having changed */ bool discard_line = fetch_info.expectedStreamSeqNum == line_in->id.streamSeqNum && fetch_info.predictionSeqNum != line_in->id.predictionSeqNum; /* Set the PC if the stream changes. Setting havePC to false in * a previous cycle handles all other change of flow of control * issues */ bool set_pc = fetch_info.lastStreamSeqNum != line_in->id.streamSeqNum; if (!discard_line && (!fetch_info.havePC || set_pc)) { /* Set the inputIndex to be the MachInst-aligned offset * from lineBaseAddr of the new PC value */ fetch_info.inputIndex = (line_in->pc.instAddr() & BaseCPU::PCMask) - line_in->lineBaseAddr; DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x" " lineBaseAddr: 0x%x lineWidth: 0x%x\n", line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr, line_in->lineWidth); fetch_info.pc = line_in->pc; fetch_info.havePC = true; decoder->reset(); } /* The generated instruction. Leave as NULL if no instruction * is to be packed into the output */ MinorDynInstPtr dyn_inst = NULL; if (discard_line) { /* Rest of line was from an older prediction in the same * stream */ DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)" " due to predictionSeqNum mismatch (expected: %d)\n", line_in->id, fetch_info.inputIndex, fetch_info.predictionSeqNum); } else if (line_in->isFault()) { /* Pack a fault as a MinorDynInst with ->fault set */ /* Make a new instruction and pick up the line, stream, * prediction, thread ids from the incoming line */ dyn_inst = new MinorDynInst(line_in->id); /* Fetch and prediction sequence numbers originate here */ dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum; dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum; /* To complete the set, test that exec sequence number has * not been set */ assert(dyn_inst->id.execSeqNum == 0); dyn_inst->pc = fetch_info.pc; /* Pack a faulting instruction but allow other * instructions to be generated. (Fetch2 makes no * immediate judgement about streamSeqNum) */ dyn_inst->fault = line_in->fault; DPRINTF(Fetch, "Fault being passed output_index: " "%d: %s\n", output_index, dyn_inst->fault->name()); } else { uint8_t *line = line_in->line; TheISA::MachInst inst_word; /* The instruction is wholly in the line, can just * assign */ inst_word = TheISA::gtoh( *(reinterpret_cast (line + fetch_info.inputIndex))); if (!decoder->instReady()) { decoder->moreBytes(fetch_info.pc, line_in->lineBaseAddr + fetch_info.inputIndex, inst_word); DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n", line_in->lineBaseAddr + fetch_info.inputIndex); } /* Maybe make the above a loop to accomodate ISAs with * instructions longer than sizeof(MachInst) */ if (decoder->instReady()) { /* Make a new instruction and pick up the line, stream, * prediction, thread ids from the incoming line */ dyn_inst = new MinorDynInst(line_in->id); /* Fetch and prediction sequence numbers originate here */ dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum; dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum; /* To complete the set, test that exec sequence number * has not been set */ assert(dyn_inst->id.execSeqNum == 0); /* Note that the decoder can update the given PC. * Remember not to assign it until *after* calling * decode */ StaticInstPtr decoded_inst = decoder->decode(fetch_info.pc); dyn_inst->staticInst = decoded_inst; dyn_inst->pc = fetch_info.pc; DPRINTF(Fetch, "decoder inst %s\n", *dyn_inst); // Collect some basic inst class stats if (decoded_inst->isLoad()) loadInstructions++; else if (decoded_inst->isStore()) storeInstructions++; else if (decoded_inst->isVector()) vecInstructions++; else if (decoded_inst->isFloating()) fpInstructions++; else if (decoded_inst->isInteger()) intInstructions++; DPRINTF(Fetch, "Instruction extracted from line %s" " lineWidth: %d output_index: %d inputIndex: %d" " pc: %s inst: %s\n", line_in->id, line_in->lineWidth, output_index, fetch_info.inputIndex, fetch_info.pc, *dyn_inst); #if THE_ISA == X86_ISA || THE_ISA == ARM_ISA /* In SE mode, it's possible to branch to a microop when * replaying faults such as page faults (or simply * intra-microcode branches in X86). Unfortunately, * as Minor has micro-op decomposition in a separate * pipeline stage from instruction decomposition, the * following advancePC (which may follow a branch with * microPC() != 0) *must* see a fresh macroop. This * kludge should be improved with an addition to PCState * but I offer it in this form for the moment * * X86 can branch within microops so we need to deal with * the case that, after a branch, the first un-advanced PC * may be pointing to a microop other than 0. Once * advanced, however, the microop number *must* be 0 */ fetch_info.pc.upc(0); fetch_info.pc.nupc(1); #endif /* Advance PC for the next instruction */ TheISA::advancePC(fetch_info.pc, decoded_inst); /* Predict any branches and issue a branch if * necessary */ predictBranch(dyn_inst, prediction); } else { DPRINTF(Fetch, "Inst not ready yet\n"); } /* Step on the pointer into the line if there's no * complete instruction waiting */ if (decoder->needMoreBytes()) { fetch_info.inputIndex += sizeof(TheISA::MachInst); DPRINTF(Fetch, "Updated inputIndex value PC: %s" " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n", line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr, line_in->lineWidth); } } if (dyn_inst) { /* Step to next sequence number */ fetch_info.fetchSeqNum++; /* Correctly size the output before writing */ if (output_index == 0) { insts_out.resize(outputWidth); } /* Pack the generated dynamic instruction into the output */ insts_out.insts[output_index] = dyn_inst; output_index++; /* Output MinorTrace instruction info for * pre-microop decomposition macroops */ if (DTRACE(MinorTrace) && !dyn_inst->isFault() && dyn_inst->staticInst->isMacroop()) { dyn_inst->minorTraceInst(*this); } } /* Remember the streamSeqNum of this line so we can tell when * we change stream */ fetch_info.lastStreamSeqNum = line_in->id.streamSeqNum; /* Asked to discard line or there was a branch or fault */ if (!prediction.isBubble() || /* The remains of a line with a prediction in it */ line_in->isFault() /* A line which is just a fault */) { DPRINTF(Fetch, "Discarding all input on branch/fault\n"); dumpAllInput(tid); fetch_info.havePC = false; line_in = NULL; } else if (discard_line) { /* Just discard one line, one's behind it may have new * stream sequence numbers. There's a DPRINTF above * for this event */ popInput(tid); fetch_info.havePC = false; line_in = NULL; } else if (fetch_info.inputIndex == line_in->lineWidth) { /* Got to end of a line, pop the line but keep PC * in case this is a line-wrapping inst. */ popInput(tid); line_in = NULL; } if (!line_in && processMoreThanOneInput) { DPRINTF(Fetch, "Wrapping\n"); line_in = getInput(tid); } } /* The rest of the output (if any) should already have been packed * with bubble instructions by insts_out's initialisation */ } if (tid == InvalidThreadID) { assert(insts_out.isBubble()); } /** Reserve a slot in the next stage and output data */ *predictionOut.inputWire = prediction; /* If we generated output, reserve space for the result in the next stage * and mark the stage as being active this cycle */ if (!insts_out.isBubble()) { /* Note activity of following buffer */ cpu.activityRecorder->activity(); insts_out.threadId = tid; nextStageReserve[tid].reserve(); } /* If we still have input to process and somewhere to put it, * mark stage as active */ for (ThreadID i = 0; i < cpu.numThreads; i++) { if (getInput(i) && nextStageReserve[i].canReserve()) { cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId); break; } } /* Make sure the input (if any left) is pushed */ if (!inp.outputWire->isBubble()) inputBuffer[inp.outputWire->id.threadId].pushTail(); } inline ThreadID Fetch2::getScheduledThread() { /* Select thread via policy. */ std::vector priority_list; switch (cpu.threadPolicy) { case Enums::SingleThreaded: priority_list.push_back(0); break; case Enums::RoundRobin: priority_list = cpu.roundRobinPriority(threadPriority); break; case Enums::Random: priority_list = cpu.randomPriority(); break; default: panic("Unknown fetch policy"); } for (auto tid : priority_list) { if (getInput(tid) && !fetchInfo[tid].blocked) { threadPriority = tid; return tid; } } return InvalidThreadID; } bool Fetch2::isDrained() { for (const auto &buffer : inputBuffer) { if (!buffer.empty()) return false; } return (*inp.outputWire).isBubble() && (*predictionOut.inputWire).isBubble(); } void Fetch2::regStats() { using namespace Stats; intInstructions .name(name() + ".int_instructions") .desc("Number of integer instructions successfully decoded") .flags(total); fpInstructions .name(name() + ".fp_instructions") .desc("Number of floating point instructions successfully decoded") .flags(total); vecInstructions .name(name() + ".vec_instructions") .desc("Number of SIMD instructions successfully decoded") .flags(total); loadInstructions .name(name() + ".load_instructions") .desc("Number of memory load instructions successfully decoded") .flags(total); storeInstructions .name(name() + ".store_instructions") .desc("Number of memory store instructions successfully decoded") .flags(total); } void Fetch2::minorTrace() const { std::ostringstream data; if (fetchInfo[0].blocked) data << 'B'; else (*out.inputWire).reportData(data); MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n", fetchInfo[0].inputIndex, fetchInfo[0].havePC, fetchInfo[0].predictionSeqNum, data.str()); inputBuffer[0].minorTrace(); } }