From 7940c10ace28d5b93a61d4d278e6647e0c497149 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 2 Jun 2006 18:15:20 -0400 Subject: Fixes to get compiling to work. This is mainly fixing up some includes; changing functions within the XCs; changing MemReqPtrs to Requests or Packets where appropriate. Currently the O3 and Ozone CPUs do not work in the new memory system; I still need to fix up the ports to work and handle responses properly. This check-in is so that the merge between m5 and newmem is no longer outstanding. src/SConscript: Need to include FU Pool for new CPU model. I'll try to figure out a cleaner way to handle this in the future. src/base/traceflags.py: Include new traces flags, fix up merge mess up. src/cpu/SConscript: Include the base_dyn_inst.cc as one of othe sources. Don't compile the Ozone CPU for now. src/cpu/base.cc: Remove an extra } from the merge. src/cpu/base_dyn_inst.cc: Fixes to make compiling work. Don't instantiate the OzoneCPU for now. src/cpu/base_dyn_inst.hh: src/cpu/o3/2bit_local_pred.cc: src/cpu/o3/alpha_cpu_builder.cc: src/cpu/o3/alpha_cpu_impl.hh: src/cpu/o3/alpha_dyn_inst.hh: src/cpu/o3/alpha_params.hh: src/cpu/o3/bpred_unit.cc: src/cpu/o3/btb.hh: src/cpu/o3/commit.hh: src/cpu/o3/commit_impl.hh: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/fetch.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/free_list.hh: src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/regfile.hh: src/cpu/o3/sat_counter.hh: src/cpu/op_class.hh: src/cpu/ozone/cpu.hh: src/cpu/checker/cpu.cc: src/cpu/checker/cpu.hh: src/cpu/checker/exec_context.hh: src/cpu/checker/o3_cpu_builder.cc: src/cpu/ozone/cpu_impl.hh: src/mem/request.hh: src/cpu/o3/fu_pool.hh: src/cpu/o3/lsq.hh: src/cpu/o3/lsq_unit.hh: src/cpu/o3/lsq_unit_impl.hh: src/cpu/o3/thread_state.hh: src/cpu/ozone/back_end.hh: src/cpu/ozone/dyn_inst.cc: src/cpu/ozone/dyn_inst.hh: src/cpu/ozone/front_end.hh: src/cpu/ozone/inorder_back_end.hh: src/cpu/ozone/lw_back_end.hh: src/cpu/ozone/lw_lsq.hh: src/cpu/ozone/ozone_impl.hh: src/cpu/ozone/thread_state.hh: Fixes to get compiling to work. src/cpu/o3/alpha_cpu.hh: Fixes to get compiling to work. Float reg accessors have changed, as well as MemReqPtrs to RequestPtrs. src/cpu/o3/alpha_dyn_inst_impl.hh: Fixes to get compiling to work. Pass in the packet to the completeAcc function. Fix up syscall function. --HG-- rename : cpu/activity.cc => src/cpu/activity.cc rename : cpu/activity.hh => src/cpu/activity.hh rename : cpu/checker/cpu.cc => src/cpu/checker/cpu.cc rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh rename : cpu/checker/cpu_builder.cc => src/cpu/checker/cpu_builder.cc rename : cpu/checker/exec_context.hh => src/cpu/checker/exec_context.hh rename : cpu/checker/o3_cpu_builder.cc => src/cpu/checker/o3_cpu_builder.cc rename : cpu/o3/dep_graph.hh => src/cpu/o3/dep_graph.hh rename : cpu/o3/fu_pool.cc => src/cpu/o3/fu_pool.cc rename : cpu/o3/fu_pool.hh => src/cpu/o3/fu_pool.hh rename : cpu/o3/lsq.cc => src/cpu/o3/lsq.cc rename : cpu/o3/lsq.hh => src/cpu/o3/lsq.hh rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh rename : cpu/o3/lsq_unit.cc => src/cpu/o3/lsq_unit.cc rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh rename : cpu/o3/scoreboard.cc => src/cpu/o3/scoreboard.cc rename : cpu/o3/scoreboard.hh => src/cpu/o3/scoreboard.hh rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh rename : cpu/ozone/back_end.cc => src/cpu/ozone/back_end.cc rename : cpu/ozone/back_end.hh => src/cpu/ozone/back_end.hh rename : cpu/ozone/back_end_impl.hh => src/cpu/ozone/back_end_impl.hh rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc rename : cpu/ozone/dyn_inst.cc => src/cpu/ozone/dyn_inst.cc rename : cpu/ozone/dyn_inst.hh => src/cpu/ozone/dyn_inst.hh rename : cpu/ozone/dyn_inst_impl.hh => src/cpu/ozone/dyn_inst_impl.hh rename : cpu/ozone/front_end.cc => src/cpu/ozone/front_end.cc rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh rename : cpu/ozone/inorder_back_end.cc => src/cpu/ozone/inorder_back_end.cc rename : cpu/ozone/inorder_back_end.hh => src/cpu/ozone/inorder_back_end.hh rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh rename : cpu/ozone/inst_queue.cc => src/cpu/ozone/inst_queue.cc rename : cpu/ozone/inst_queue.hh => src/cpu/ozone/inst_queue.hh rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh rename : cpu/ozone/lsq_unit.cc => src/cpu/ozone/lsq_unit.cc rename : cpu/ozone/lsq_unit.hh => src/cpu/ozone/lsq_unit.hh rename : cpu/ozone/lsq_unit_impl.hh => src/cpu/ozone/lsq_unit_impl.hh rename : cpu/ozone/lw_back_end.cc => src/cpu/ozone/lw_back_end.cc rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh rename : cpu/ozone/lw_lsq.cc => src/cpu/ozone/lw_lsq.cc rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh rename : cpu/ozone/null_predictor.hh => src/cpu/ozone/null_predictor.hh rename : cpu/ozone/ozone_impl.hh => src/cpu/ozone/ozone_impl.hh rename : cpu/ozone/rename_table.cc => src/cpu/ozone/rename_table.cc rename : cpu/ozone/rename_table.hh => src/cpu/ozone/rename_table.hh rename : cpu/ozone/rename_table_impl.hh => src/cpu/ozone/rename_table_impl.hh rename : cpu/ozone/simple_impl.hh => src/cpu/ozone/simple_impl.hh rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh rename : cpu/quiesce_event.cc => src/cpu/quiesce_event.cc rename : cpu/quiesce_event.hh => src/cpu/quiesce_event.hh rename : cpu/thread_state.hh => src/cpu/thread_state.hh rename : python/m5/objects/FUPool.py => src/python/m5/objects/FUPool.py rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py rename : python/m5/objects/SimpleOzoneCPU.py => src/python/m5/objects/SimpleOzoneCPU.py extra : convert_revision : ca7f0fbf65ee1a70d482fb4eda9a1840c7f9b8f8 --- src/cpu/ozone/front_end_impl.hh | 920 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 920 insertions(+) create mode 100644 src/cpu/ozone/front_end_impl.hh (limited to 'src/cpu/ozone/front_end_impl.hh') diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh new file mode 100644 index 000000000..ffbcf3340 --- /dev/null +++ b/src/cpu/ozone/front_end_impl.hh @@ -0,0 +1,920 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "base/statistics.hh" +#include "cpu/exec_context.hh" +#include "cpu/exetrace.hh" +#include "cpu/ozone/front_end.hh" +#include "mem/mem_interface.hh" +#include "sim/byte_swap.hh" + +using namespace TheISA; + +template +FrontEnd::FrontEnd(Params *params) + : branchPred(params), + icacheInterface(params->icacheInterface), + instBufferSize(0), + maxInstBufferSize(params->maxInstBufferSize), + width(params->frontEndWidth), + freeRegs(params->numPhysicalRegs), + numPhysRegs(params->numPhysicalRegs), + serializeNext(false), + interruptPending(false) +{ + switchedOut = false; + + status = Idle; + + memReq = NULL; + // Size of cache block. + cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + assert(isPowerOf2(cacheBlkSize)); + + // Create mask to get rid of offset bits. + cacheBlkMask = (cacheBlkSize - 1); + + // Create space to store a cache line. + cacheData = new uint8_t[cacheBlkSize]; + + fetchCacheLineNextCycle = true; + + cacheBlkValid = false; + +#if !FULL_SYSTEM +// pTable = params->pTable; +#endif + fetchFault = NoFault; +} + +template +std::string +FrontEnd::name() const +{ + return cpu->name() + ".frontend"; +} + +template +void +FrontEnd::setCommBuffer(TimeBuffer *_comm) +{ + comm = _comm; + // @todo: Hardcoded for now. Allow this to be set by a latency. + fromCommit = comm->getWire(-1); +} + +template +void +FrontEnd::setXC(ExecContext *xc_ptr) +{ + xc = xc_ptr; +} + +template +void +FrontEnd::regStats() +{ + icacheStallCycles + .name(name() + ".icacheStallCycles") + .desc("Number of cycles fetch is stalled on an Icache miss") + .prereq(icacheStallCycles); + + fetchedInsts + .name(name() + ".fetchedInsts") + .desc("Number of instructions fetch has processed") + .prereq(fetchedInsts); + + fetchedBranches + .name(name() + ".fetchedBranches") + .desc("Number of fetched branches") + .prereq(fetchedBranches); + + predictedBranches + .name(name() + ".predictedBranches") + .desc("Number of branches that fetch has predicted taken") + .prereq(predictedBranches); + + fetchCycles + .name(name() + ".fetchCycles") + .desc("Number of cycles fetch has run and was not squashing or" + " blocked") + .prereq(fetchCycles); + + fetchIdleCycles + .name(name() + ".fetchIdleCycles") + .desc("Number of cycles fetch was idle") + .prereq(fetchIdleCycles); + + fetchSquashCycles + .name(name() + ".fetchSquashCycles") + .desc("Number of cycles fetch has spent squashing") + .prereq(fetchSquashCycles); + + fetchBlockedCycles + .name(name() + ".fetchBlockedCycles") + .desc("Number of cycles fetch has spent blocked") + .prereq(fetchBlockedCycles); + + fetchedCacheLines + .name(name() + ".fetchedCacheLines") + .desc("Number of cache lines fetched") + .prereq(fetchedCacheLines); + + fetchIcacheSquashes + .name(name() + ".fetchIcacheSquashes") + .desc("Number of outstanding Icache misses that were squashed") + .prereq(fetchIcacheSquashes); + + fetchNisnDist + .init(/* base value */ 0, + /* last value */ width, + /* bucket size */ 1) + .name(name() + ".rateDist") + .desc("Number of instructions fetched each cycle (Total)") + .flags(Stats::pdf); + + idleRate + .name(name() + ".idleRate") + .desc("Percent of cycles fetch was idle") + .prereq(idleRate); + idleRate = fetchIdleCycles * 100 / cpu->numCycles; + + branchRate + .name(name() + ".branchRate") + .desc("Number of branch fetches per cycle") + .flags(Stats::total); + branchRate = fetchedBranches / cpu->numCycles; + + fetchRate + .name(name() + ".rate") + .desc("Number of inst fetches per cycle") + .flags(Stats::total); + fetchRate = fetchedInsts / cpu->numCycles; + + IFQCount + .name(name() + ".IFQ:count") + .desc("cumulative IFQ occupancy") + ; + + IFQFcount + .name(name() + ".IFQ:fullCount") + .desc("cumulative IFQ full count") + .flags(Stats::total) + ; + + IFQOccupancy + .name(name() + ".IFQ:occupancy") + .desc("avg IFQ occupancy (inst's)") + ; + IFQOccupancy = IFQCount / cpu->numCycles; + + IFQLatency + .name(name() + ".IFQ:latency") + .desc("avg IFQ occupant latency (cycle's)") + .flags(Stats::total) + ; + + IFQFullRate + .name(name() + ".IFQ:fullRate") + .desc("fraction of time (cycles) IFQ was full") + .flags(Stats::total); + ; + IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles; + + dispatchCountStat + .name(name() + ".DIS:count") + .desc("cumulative count of dispatched insts") + .flags(Stats::total) + ; + + dispatchedSerializing + .name(name() + ".DIS:serializingInsts") + .desc("count of serializing insts dispatched") + .flags(Stats::total) + ; + + dispatchedTempSerializing + .name(name() + ".DIS:tempSerializingInsts") + .desc("count of temporary serializing insts dispatched") + .flags(Stats::total) + ; + + dispatchSerializeStallCycles + .name(name() + ".DIS:serializeStallCycles") + .desc("count of cycles dispatch stalled for serializing inst") + .flags(Stats::total) + ; + + dispatchRate + .name(name() + ".DIS:rate") + .desc("dispatched insts per cycle") + .flags(Stats::total) + ; + dispatchRate = dispatchCountStat / cpu->numCycles; + + regIntFull + .name(name() + ".REG:int:full") + .desc("number of cycles where there were no INT registers") + ; + + regFpFull + .name(name() + ".REG:fp:full") + .desc("number of cycles where there were no FP registers") + ; + IFQLatency = IFQOccupancy / dispatchRate; + + branchPred.regStats(); +} + +template +void +FrontEnd::tick() +{ + if (switchedOut) + return; + + // @todo: Maybe I want to just have direct communication... + if (fromCommit->doneSeqNum) { + branchPred.update(fromCommit->doneSeqNum, 0); + } + + IFQCount += instBufferSize; + IFQFcount += instBufferSize == maxInstBufferSize; + + // Fetch cache line + if (status == IcacheMissComplete) { + cacheBlkValid = true; + + status = Running; + if (barrierInst) + status = SerializeBlocked; + if (freeRegs <= 0) + status = RenameBlocked; + checkBE(); + } else if (status == IcacheMissStall) { + DPRINTF(FE, "Still in Icache miss stall.\n"); + icacheStallCycles++; + return; + } + + if (status == RenameBlocked || status == SerializeBlocked || + status == TrapPending || status == BEBlocked) { + // Will cause a one cycle bubble between changing state and + // restarting. + DPRINTF(FE, "In blocked status.\n"); + + fetchBlockedCycles++; + + if (status == SerializeBlocked) { + dispatchSerializeStallCycles++; + } + updateStatus(); + return; + } else if (status == QuiescePending) { + DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n"); + return; + } else if (status != IcacheMissComplete) { + if (fetchCacheLineNextCycle) { + Fault fault = fetchCacheLine(); + if (fault != NoFault) { + handleFault(fault); + fetchFault = fault; + return; + } + fetchCacheLineNextCycle = false; + } + // If miss, stall until it returns. + if (status == IcacheMissStall) { + // Tell CPU to not tick me for now. + return; + } + } + + fetchCycles++; + + int num_inst = 0; + + // Otherwise loop and process instructions. + // One way to hack infinite width is to set width and maxInstBufferSize + // both really high. Inelegant, but probably will work. + while (num_inst < width && + instBufferSize < maxInstBufferSize) { + // Get instruction from cache line. + DynInstPtr inst = getInstFromCacheline(); + + if (!inst) { + // PC is no longer in the cache line, end fetch. + // Might want to check this at the end of the cycle so that + // there's no cycle lost to checking for a new cache line. + DPRINTF(FE, "Need to get new cache line\n"); + fetchCacheLineNextCycle = true; + break; + } + + processInst(inst); + + if (status == SerializeBlocked) { + break; + } + + // Possibly push into a time buffer that estimates the front end + // latency + instBuffer.push_back(inst); + ++instBufferSize; + ++num_inst; + +#if FULL_SYSTEM + if (inst->isQuiesce()) { + warn("%lli: Quiesce instruction encountered, halting fetch!", curTick); + status = QuiescePending; + break; + } +#endif + + if (inst->predTaken()) { + // Start over with tick? + break; + } else if (freeRegs <= 0) { + DPRINTF(FE, "Ran out of free registers to rename to!\n"); + status = RenameBlocked; + break; + } else if (serializeNext) { + break; + } + } + + fetchNisnDist.sample(num_inst); + checkBE(); + + DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free " + "Regs %i\n", num_inst, instBufferSize, freeRegs); +} + +template +Fault +FrontEnd::fetchCacheLine() +{ + // Read a cache line, based on the current PC. +#if FULL_SYSTEM + // Flag to say whether or not address is physical addr. + unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + Fault fault = NoFault; + + if (interruptPending && flags == 0) { + return fault; + } + + // Align the fetch PC so it's at the start of a cache block. + Addr fetch_PC = icacheBlockAlignPC(PC); + + DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC); + + // Setup the memReq to do a read of the first isntruction's address. + // Set the appropriate read size and flags as well. + memReq = new MemReq(); + + memReq->asid = 0; + memReq->thread_num = 0; + memReq->data = new uint8_t[64]; + memReq->xc = xc; + memReq->cmd = Read; + memReq->reset(fetch_PC, cacheBlkSize, flags); + + // Translate the instruction request. + fault = cpu->translateInstReq(memReq); + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (icacheInterface && fault == NoFault) { +#if FULL_SYSTEM + if (cpu->system->memctrl->badaddr(memReq->paddr) || + memReq->flags & UNCACHEABLE) { + DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " + "misspeculating path!", + memReq->paddr); + return TheISA::genMachineCheckFault(); + } +#endif + + memReq->completionEvent = NULL; + + memReq->time = curTick; + fault = cpu->mem->read(memReq, cacheData); + + MemAccessResult res = icacheInterface->access(memReq); + + // If the cache missed then schedule an event to wake + // up this stage once the cache miss completes. + if (icacheInterface->doEvents() && res != MA_HIT) { + memReq->completionEvent = new ICacheCompletionEvent(memReq, this); + + status = IcacheMissStall; + + cacheBlkValid = false; + + DPRINTF(FE, "Cache miss.\n"); + } else { + DPRINTF(FE, "Cache hit.\n"); + + cacheBlkValid = true; + +// memcpy(cacheData, memReq->data, memReq->size); + } + } + + // Note that this will set the cache block PC a bit earlier than it should + // be set. + cacheBlkPC = fetch_PC; + + ++fetchedCacheLines; + + DPRINTF(FE, "Done fetching cache line.\n"); + + return fault; +} + +template +void +FrontEnd::processInst(DynInstPtr &inst) +{ + if (processBarriers(inst)) { + return; + } + + Addr inst_PC = inst->readPC(); + + if (!inst->isControl()) { + inst->setPredTarg(inst->readNextPC()); + } else { + fetchedBranches++; + if (branchPred.predict(inst, inst_PC, inst->threadNumber)) { + predictedBranches++; + } + } + + Addr next_PC = inst->readPredTarg(); + + DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC " + "%#x\n", inst->seqNum, inst_PC, next_PC); + +// inst->setNextPC(next_PC); + + // Not sure where I should set this + PC = next_PC; + + renameInst(inst); +} + +template +bool +FrontEnd::processBarriers(DynInstPtr &inst) +{ + if (serializeNext) { + inst->setSerializeBefore(); + serializeNext = false; + } else if (!inst->isSerializing() && + !inst->isIprAccess() && + !inst->isStoreConditional()) { + return false; + } + + if ((inst->isIprAccess() || inst->isSerializeBefore()) && + !inst->isSerializeHandled()) { + DPRINTF(FE, "Serialize before instruction encountered.\n"); + + if (!inst->isTempSerializeBefore()) { + dispatchedSerializing++; + inst->setSerializeHandled(); + } else { + dispatchedTempSerializing++; + } + + // Change status over to SerializeBlocked so that other stages know + // what this is blocked on. + status = SerializeBlocked; + + barrierInst = inst; + return true; + } else if ((inst->isStoreConditional() || inst->isSerializeAfter()) + && !inst->isSerializeHandled()) { + DPRINTF(FE, "Serialize after instruction encountered.\n"); + + inst->setSerializeHandled(); + + dispatchedSerializing++; + + serializeNext = true; + return false; + } + return false; +} + +template +void +FrontEnd::handleFault(Fault &fault) +{ + DPRINTF(FE, "Fault at fetch, telling commit\n"); + + // We're blocked on the back end until it handles this fault. + status = TrapPending; + + // Get a sequence number. + InstSeqNum inst_seq = getAndIncrementInstSeq(); + // We will use a nop in order to carry the fault. + ExtMachInst ext_inst = TheISA::NoopMachInst; + + // Create a new DynInst from the dummy nop. + DynInstPtr instruction = new DynInst(ext_inst, PC, + PC+sizeof(MachInst), + inst_seq, cpu); + instruction->setPredTarg(instruction->readNextPC()); +// instruction->setThread(tid); + +// instruction->setASID(tid); + + instruction->setState(thread); + + instruction->traceData = NULL; + + instruction->fault = fault; + instruction->setCanIssue(); + instBuffer.push_back(instruction); + ++instBufferSize; +} + +template +void +FrontEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC, + const bool is_branch, const bool branch_taken) +{ + DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n", + squash_num, next_PC); + + if (fetchFault != NoFault) + fetchFault = NoFault; + + while (!instBuffer.empty() && + instBuffer.back()->seqNum > squash_num) { + DynInstPtr inst = instBuffer.back(); + + DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n", + inst->seqNum, inst->readPC()); + + inst->clearDependents(); + + instBuffer.pop_back(); + --instBufferSize; + + freeRegs+= inst->numDestRegs(); + } + + // Copy over rename table from the back end. + renameTable.copyFrom(backEnd->renameTable); + + PC = next_PC; + + // Update BP with proper information. + if (is_branch) { + branchPred.squash(squash_num, next_PC, branch_taken, 0); + } else { + branchPred.squash(squash_num, 0); + } + + // Clear the icache miss if it's outstanding. + if (status == IcacheMissStall && icacheInterface) { + DPRINTF(FE, "Squashing outstanding Icache miss.\n"); + memReq = NULL; + } + + if (status == SerializeBlocked) { + assert(barrierInst->seqNum > squash_num); + barrierInst = NULL; + } + + // Unless this squash originated from the front end, we're probably + // in running mode now. + // Actually might want to make this latency dependent. + status = Running; + fetchCacheLineNextCycle = true; +} + +template +typename Impl::DynInstPtr +FrontEnd::getInst() +{ + if (instBufferSize == 0) { + return NULL; + } + + DynInstPtr inst = instBuffer.front(); + + instBuffer.pop_front(); + + --instBufferSize; + + dispatchCountStat++; + + return inst; +} + +template +void +FrontEnd::processCacheCompletion(MemReqPtr &req) +{ + DPRINTF(FE, "Processing cache completion\n"); + + // Do something here. + if (status != IcacheMissStall || + req != memReq || + switchedOut) { + DPRINTF(FE, "Previous fetch was squashed.\n"); + fetchIcacheSquashes++; + return; + } + + status = IcacheMissComplete; + +/* if (checkStall(tid)) { + fetchStatus[tid] = Blocked; + } else { + fetchStatus[tid] = IcacheMissComplete; + } +*/ +// memcpy(cacheData, memReq->data, memReq->size); + + // Reset the completion event to NULL. +// memReq->completionEvent = NULL; + memReq = NULL; +} + +template +void +FrontEnd::addFreeRegs(int num_freed) +{ + if (status == RenameBlocked && freeRegs + num_freed > 0) { + status = Running; + } + + DPRINTF(FE, "Adding %i freed registers\n", num_freed); + + freeRegs+= num_freed; + +// assert(freeRegs <= numPhysRegs); + if (freeRegs > numPhysRegs) + freeRegs = numPhysRegs; +} + +template +bool +FrontEnd::updateStatus() +{ + bool serialize_block = !backEnd->robEmpty() || instBufferSize; + bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); + bool ret_val = false; + + if (status == SerializeBlocked && !serialize_block) { + status = SerializeComplete; + ret_val = true; + } + + if (status == BEBlocked && !be_block) { + if (barrierInst) { + status = SerializeBlocked; + } else { + status = Running; + } + ret_val = true; + } + return ret_val; +} + +template +void +FrontEnd::checkBE() +{ + bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); + if (be_block) { + if (status == Running || status == Idle) { + status = BEBlocked; + } + } +} + +template +typename Impl::DynInstPtr +FrontEnd::getInstFromCacheline() +{ + if (status == SerializeComplete) { + DynInstPtr inst = barrierInst; + status = Running; + barrierInst = NULL; + inst->clearSerializeBefore(); + return inst; + } + + InstSeqNum inst_seq; + MachInst inst; + // @todo: Fix this magic number used here to handle word offset (and + // getting rid of PAL bit) + unsigned offset = (PC & cacheBlkMask) & ~3; + + // PC of inst is not in this cache block + if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) { + return NULL; + } + + ////////////////////////// + // Fetch one instruction + ////////////////////////// + + // Get a sequence number. + inst_seq = getAndIncrementInstSeq(); + + // Make sure this is a valid index. + assert(offset <= cacheBlkSize - sizeof(MachInst)); + + // Get the instruction from the array of the cache line. + inst = htog(*reinterpret_cast(&cacheData[offset])); + + ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC); + + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst), + inst_seq, cpu); + + instruction->setState(thread); + + DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n", + inst_seq, instruction->readPC(), + instruction->staticInst->disassemble(PC)); + + instruction->traceData = + Trace::getInstRecord(curTick, xc, cpu, + instruction->staticInst, + instruction->readPC(), 0); + + // Increment stat of fetched instructions. + ++fetchedInsts; + + return instruction; +} + +template +void +FrontEnd::renameInst(DynInstPtr &inst) +{ + DynInstPtr src_inst = NULL; + int num_src_regs = inst->numSrcRegs(); + if (num_src_regs == 0) { + inst->setCanIssue(); + } else { + for (int i = 0; i < num_src_regs; ++i) { + src_inst = renameTable[inst->srcRegIdx(i)]; + + inst->setSrcInst(src_inst, i); + + DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n", + inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum); + + if (src_inst->isResultReady()) { + DPRINTF(FE, "Reg ready.\n"); + inst->markSrcRegReady(i); + } else { + DPRINTF(FE, "Adding to dependent list.\n"); + src_inst->addDependent(inst); + } + } + } + + for (int i = 0; i < inst->numDestRegs(); ++i) { + RegIndex idx = inst->destRegIdx(i); + + DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously " + "[sn:%lli]\n", + (int)inst->destRegIdx(i), inst->seqNum, + renameTable[idx]->seqNum); + + inst->setPrevDestInst(renameTable[idx], i); + + renameTable[idx] = inst; + --freeRegs; + } +} + +template +void +FrontEnd::wakeFromQuiesce() +{ + DPRINTF(FE, "Waking up from quiesce\n"); + // Hopefully this is safe + status = Running; +} + +template +void +FrontEnd::switchOut() +{ + switchedOut = true; + cpu->signalSwitched(); +} + +template +void +FrontEnd::doSwitchOut() +{ + memReq = NULL; + squash(0, 0); + instBuffer.clear(); + instBufferSize = 0; + status = Idle; +} + +template +void +FrontEnd::takeOverFrom(ExecContext *old_xc) +{ + assert(freeRegs == numPhysRegs); + fetchCacheLineNextCycle = true; + + cacheBlkValid = false; + +#if !FULL_SYSTEM +// pTable = params->pTable; +#endif + fetchFault = NoFault; + serializeNext = false; + barrierInst = NULL; + status = Running; + switchedOut = false; + interruptPending = false; +} + +template +void +FrontEnd::dumpInsts() +{ + cprintf("instBuffer size: %i\n", instBuffer.size()); + + InstBuffIt buff_it = instBuffer.begin(); + + for (int num = 0; buff_it != instBuffer.end(); num++) { + cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" + "Squashed:%i\n\n", + num, (*buff_it)->readPC(), (*buff_it)->threadNumber, + (*buff_it)->seqNum, (*buff_it)->isIssued(), + (*buff_it)->isSquashed()); + buff_it++; + } +} + +template +FrontEnd::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe) + : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +FrontEnd::ICacheCompletionEvent::process() +{ + frontEnd->processCacheCompletion(req); +} + +template +const char * +FrontEnd::ICacheCompletionEvent::description() +{ + return "ICache completion event"; +} -- cgit v1.2.3