summaryrefslogtreecommitdiff
path: root/cpu/beta_cpu/fetch_impl.hh
diff options
context:
space:
mode:
Diffstat (limited to 'cpu/beta_cpu/fetch_impl.hh')
-rw-r--r--cpu/beta_cpu/fetch_impl.hh599
1 files changed, 599 insertions, 0 deletions
diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh
new file mode 100644
index 000000000..90caf9ffe
--- /dev/null
+++ b/cpu/beta_cpu/fetch_impl.hh
@@ -0,0 +1,599 @@
+// Remove this later; used only for debugging.
+#define OPCODE(X) (X >> 26) & 0x3f
+
+
+#include "arch/alpha/byte_swap.hh"
+#include "cpu/exetrace.hh"
+#include "mem/base_mem.hh"
+#include "mem/mem_interface.hh"
+#include "mem/mem_req.hh"
+#include "cpu/beta_cpu/fetch.hh"
+
+#include "sim/universe.hh"
+
+template<class Impl>
+SimpleFetch<Impl>::CacheCompletionEvent
+::CacheCompletionEvent(SimpleFetch *_fetch)
+ : Event(&mainEventQueue),
+ fetch(_fetch)
+{
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::CacheCompletionEvent::process()
+{
+ fetch->processCacheCompletion();
+}
+
+template<class Impl>
+const char *
+SimpleFetch<Impl>::CacheCompletionEvent::description()
+{
+ return "SimpleFetch cache completion event";
+}
+
+template<class Impl>
+SimpleFetch<Impl>::SimpleFetch(Params &params)
+ : //cacheCompletionEvent(this),
+ icacheInterface(params.icacheInterface),
+ branchPred(params),
+ decodeToFetchDelay(params.decodeToFetchDelay),
+ renameToFetchDelay(params.renameToFetchDelay),
+ iewToFetchDelay(params.iewToFetchDelay),
+ commitToFetchDelay(params.commitToFetchDelay),
+ fetchWidth(params.fetchWidth)
+{
+ // Set status to idle.
+ _status = Idle;
+
+ // Create a new memory request.
+ memReq = new MemReq();
+ // Not sure of this parameter. I think it should be based on the
+ // thread number.
+#ifndef FULL_SYSTEM
+ memReq->asid = params.asid;
+#else
+ memReq->asid = 0;
+#endif // FULL_SYSTEM
+ memReq->data = new uint8_t[64];
+
+ // Size of cache block.
+ cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+
+ // Create mask to get rid of offset bits.
+ cacheBlkMask = (cacheBlkSize - 1);
+
+ // Get the size of an instruction.
+ instSize = sizeof(MachInst);
+
+ // Create space to store a cache line.
+ cacheData = new uint8_t[cacheBlkSize];
+}
+
+template <class Impl>
+void
+SimpleFetch<Impl>::regStats()
+{
+ icacheStallCycles
+ .name(name() + ".icacheStallCycles")
+ .desc("Number of cycles fetch is stalled on an Icache miss")
+ .prereq(icacheStallCycles);
+
+ fetchedInsts
+ .name(name() + ".fetchedInsts")
+ .desc("Number of instructions fetch has processed")
+ .prereq(fetchedInsts);
+ predictedBranches
+ .name(name() + ".predictedBranches")
+ .desc("Number of branches that fetch has predicted taken")
+ .prereq(predictedBranches);
+ fetchCycles
+ .name(name() + ".fetchCycles")
+ .desc("Number of cycles fetch has run and was not squashing or"
+ " blocked")
+ .prereq(fetchCycles);
+ fetchSquashCycles
+ .name(name() + ".fetchSquashCycles")
+ .desc("Number of cycles fetch has spent squashing")
+ .prereq(fetchSquashCycles);
+ fetchBlockedCycles
+ .name(name() + ".fetchBlockedCycles")
+ .desc("Number of cycles fetch has spent blocked")
+ .prereq(fetchBlockedCycles);
+ fetchedCacheLines
+ .name(name() + ".fetchedCacheLines")
+ .desc("Number of cache lines fetched")
+ .prereq(fetchedCacheLines);
+
+ fetch_nisn_dist
+ .init(/* base value */ 0,
+ /* last value */ fetchWidth,
+ /* bucket size */ 1)
+ .name(name() + ".FETCH:rate_dist")
+ .desc("Number of instructions fetched each cycle (Total)")
+ .flags(Stats::pdf)
+ ;
+
+ branchPred.regStats();
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+ DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n");
+ cpu = cpu_ptr;
+ // This line will be removed eventually.
+ memReq->xc = cpu->xcBase();
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
+{
+ DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n");
+ timeBuffer = time_buffer;
+
+ // Create wires to get information from proper places in time buffer.
+ fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
+ fromRename = timeBuffer->getWire(-renameToFetchDelay);
+ fromIEW = timeBuffer->getWire(-iewToFetchDelay);
+ fromCommit = timeBuffer->getWire(-commitToFetchDelay);
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
+{
+ DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n");
+ fetchQueue = fq_ptr;
+
+ // Create wire to write information to proper place in fetch queue.
+ toDecode = fetchQueue->getWire(0);
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::processCacheCompletion()
+{
+ DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n");
+
+ // Only change the status if it's still waiting on the icache access
+ // to return.
+ // Can keep track of how many cache accesses go unused due to
+ // misspeculation here.
+ // How to handle an outstanding miss which gets cancelled due to squash,
+ // then a new icache miss gets scheduled?
+ if (_status == IcacheMissStall)
+ _status = IcacheMissComplete;
+}
+
+#if 0
+template <class Impl>
+inline void
+SimpleFetch<Impl>::recordGlobalHist(DynInstPtr &inst)
+{
+ inst->setGlobalHist(branchPred.BPReadGlobalHist());
+}
+#endif
+
+template <class Impl>
+bool
+SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
+{
+ // Do branch prediction check here.
+ // A bit of a misnomer...next_PC is actually the current PC until
+ // this function updates it.
+ bool predict_taken;
+
+ if (!inst->isControl()) {
+ next_PC = next_PC + instSize;
+ inst->setPredTarg(next_PC);
+ return false;
+ }
+
+ predict_taken = branchPred.predict(inst, next_PC);
+
+ if (predict_taken) {
+ ++predictedBranches;
+ }
+
+ return predict_taken;
+}
+
+template <class Impl>
+Fault
+SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC)
+{
+ // Check if the instruction exists within the cache.
+ // If it does, then proceed on to read the instruction and the rest
+ // of the instructions in the cache line until either the end of the
+ // cache line or a predicted taken branch is encountered.
+
+#ifdef FULL_SYSTEM
+ // Flag to say whether or not address is physical addr.
+ unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
+#else
+ unsigned flags = 0;
+#endif // FULL_SYSTEM
+
+ Fault fault = No_Fault;
+
+ // Align the fetch PC so it's at the start of a cache block.
+ fetch_PC = icacheBlockAlignPC(fetch_PC);
+
+ // Setup the memReq to do a read of the first isntruction's address.
+ // Set the appropriate read size and flags as well.
+ memReq->cmd = Read;
+ memReq->reset(fetch_PC, cacheBlkSize, flags);
+
+ // Translate the instruction request.
+ // Should this function be
+ // in the CPU class ? Probably...ITB/DTB should exist within the
+ // CPU.
+
+ fault = cpu->translateInstReq(memReq);
+
+ // In the case of faults, the fetch stage may need to stall and wait
+ // on what caused the fetch (ITB or Icache miss).
+
+ // If translation was successful, attempt to read the first
+ // instruction.
+ if (fault == No_Fault) {
+ DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
+ fault = cpu->mem->read(memReq, cacheData);
+ // This read may change when the mem interface changes.
+
+ fetchedCacheLines++;
+ }
+
+ // Now do the timing access to see whether or not the instruction
+ // exists within the cache.
+ if (icacheInterface && fault == No_Fault) {
+ DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
+ memReq->completionEvent = NULL;
+
+ memReq->time = curTick;
+
+ MemAccessResult result = icacheInterface->access(memReq);
+
+ // If the cache missed (in this model functional and timing
+ // memories are different), then schedule an event to wake
+ // up this stage once the cache miss completes.
+ if (result != MA_HIT && icacheInterface->doEvents()) {
+ memReq->completionEvent = new CacheCompletionEvent(this);
+// lastIcacheStall = curTick;
+
+ // How does current model work as far as individual
+ // stages scheduling/unscheduling?
+ // Perhaps have only the main CPU scheduled/unscheduled,
+ // and have it choose what stages to run appropriately.
+
+ DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
+ _status = IcacheMissStall;
+ }
+ }
+
+ return fault;
+}
+
+template <class Impl>
+inline void
+SimpleFetch<Impl>::doSquash(const Addr &new_PC)
+{
+ DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
+
+ cpu->setNextPC(new_PC + instSize);
+ cpu->setPC(new_PC);
+
+ // Clear the icache miss if it's outstanding.
+ if (_status == IcacheMissStall && icacheInterface) {
+ DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n");
+ // @todo: Use an actual thread number here.
+ icacheInterface->squash(0);
+ }
+
+ _status = Squashing;
+
+ ++fetchSquashCycles;
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
+ const InstSeqNum &seq_num)
+{
+ DPRINTF(Fetch, "Fetch: Squashing from decode.\n");
+
+ doSquash(new_PC);
+
+ // Tell the CPU to remove any instructions that are in flight between
+ // fetch and decode.
+ cpu->removeInstsUntil(seq_num);
+
+}
+
+template <class Impl>
+void
+SimpleFetch<Impl>::squash(const Addr &new_PC)
+{
+ DPRINTF(Fetch, "Fetch: Squash from commit.\n");
+
+ doSquash(new_PC);
+
+ // Tell the CPU to remove any instructions that are not in the ROB.
+ cpu->removeInstsNotInROB();
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::tick()
+{
+ // Check squash signals from commit.
+ if (fromCommit->commitInfo.squash) {
+ DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
+ "from commit.\n");
+
+ // In any case, squash.
+ squash(fromCommit->commitInfo.nextPC);
+
+ // Also check if there's a mispredict that happened.
+ if (fromCommit->commitInfo.branchMispredict) {
+ branchPred.squash(fromCommit->commitInfo.doneSeqNum,
+ fromCommit->commitInfo.nextPC,
+ fromCommit->commitInfo.branchTaken);
+ } else {
+ branchPred.squash(fromCommit->commitInfo.doneSeqNum);
+ }
+
+ return;
+ } else if (fromCommit->commitInfo.doneSeqNum) {
+ // Update the branch predictor if it wasn't a squashed instruction
+ // that was braodcasted.
+ branchPred.update(fromCommit->commitInfo.doneSeqNum);
+ }
+
+ // Check ROB squash signals from commit.
+ if (fromCommit->commitInfo.robSquashing) {
+ DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
+
+ // Continue to squash.
+ _status = Squashing;
+
+ ++fetchSquashCycles;
+ return;
+ }
+
+ // Check squash signals from decode.
+ if (fromDecode->decodeInfo.squash) {
+ DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
+ "from decode.\n");
+
+ // Update the branch predictor.
+ if (fromDecode->decodeInfo.branchMispredict) {
+ branchPred.squash(fromDecode->decodeInfo.doneSeqNum,
+ fromDecode->decodeInfo.nextPC,
+ fromDecode->decodeInfo.branchTaken);
+ } else {
+ branchPred.squash(fromDecode->decodeInfo.doneSeqNum);
+ }
+
+ if (_status != Squashing) {
+ // Squash unless we're already squashing?
+ squashFromDecode(fromDecode->decodeInfo.nextPC,
+ fromDecode->decodeInfo.doneSeqNum);
+ return;
+ }
+ }
+
+ // Check if any of the stall signals are high.
+ if (fromDecode->decodeInfo.stall ||
+ fromRename->renameInfo.stall ||
+ fromIEW->iewInfo.stall ||
+ fromCommit->commitInfo.stall)
+ {
+ // Block stage, regardless of current status.
+
+ DPRINTF(Fetch, "Fetch: Stalling stage.\n");
+ DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
+ "Commit: %i\n",
+ fromDecode->decodeInfo.stall,
+ fromRename->renameInfo.stall,
+ fromIEW->iewInfo.stall,
+ fromCommit->commitInfo.stall);
+
+ _status = Blocked;
+
+ ++fetchBlockedCycles;
+ return;
+ } else if (_status == Blocked) {
+ // Unblock stage if status is currently blocked and none of the
+ // stall signals are being held high.
+ _status = Running;
+
+ ++fetchBlockedCycles;
+ return;
+ }
+
+ // If fetch has reached this point, then there are no squash signals
+ // still being held high. Check if fetch is in the squashing state;
+ // if so, fetch can switch to running.
+ // Similarly, there are no blocked signals still being held high.
+ // Check if fetch is in the blocked state; if so, fetch can switch to
+ // running.
+ if (_status == Squashing) {
+ DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n");
+
+ // Switch status to running
+ _status = Running;
+
+ ++fetchSquashCycles;
+ } else if (_status != IcacheMissStall) {
+ DPRINTF(Fetch, "Fetch: Running stage.\n");
+
+ ++fetchCycles;
+
+ fetch();
+ }
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::fetch()
+{
+ //////////////////////////////////////////
+ // Start actual fetch
+ //////////////////////////////////////////
+
+ // The current PC.
+ Addr fetch_PC = cpu->readPC();
+
+ // Fault code for memory access.
+ Fault fault = No_Fault;
+
+ // If returning from the delay of a cache miss, then update the status
+ // to running, otherwise do the cache access. Possibly move this up
+ // to tick() function.
+ if (_status == IcacheMissComplete) {
+ DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");
+
+ // Reset the completion event to NULL.
+ memReq->completionEvent = NULL;
+
+ _status = Running;
+ } else {
+ DPRINTF(Fetch, "Fetch: Attempting to translate and read "
+ "instruction, starting at PC %08p.\n",
+ fetch_PC);
+
+ fault = fetchCacheLine(fetch_PC);
+ }
+
+ // If we had a stall due to an icache miss, then return. It'd
+ // be nicer if this were handled through the kind of fault that
+ // is returned by the function.
+ if (_status == IcacheMissStall) {
+ return;
+ }
+
+ // As far as timing goes, the CPU will need to send an event through
+ // the MemReq in order to be woken up once the memory access completes.
+ // Probably have a status on a per thread basis so each thread can
+ // block independently and be woken up independently.
+
+ Addr next_PC = fetch_PC;
+ InstSeqNum inst_seq;
+ MachInst inst;
+ unsigned offset = fetch_PC & cacheBlkMask;
+ unsigned fetched;
+
+ if (fault == No_Fault) {
+ // If the read of the first instruction was successful, then grab the
+ // instructions from the rest of the cache line and put them into the
+ // queue heading to decode.
+
+ DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");
+
+ //////////////////////////
+ // Fetch first instruction
+ //////////////////////////
+
+ // Need to keep track of whether or not a predicted branch
+ // ended this fetch block.
+ bool predicted_branch = false;
+
+ for (fetched = 0;
+ offset < cacheBlkSize &&
+ fetched < fetchWidth &&
+ !predicted_branch;
+ ++fetched)
+ {
+
+ // Get a sequence number.
+ inst_seq = cpu->getAndIncrementInstSeq();
+
+ // Make sure this is a valid index.
+ assert(offset <= cacheBlkSize - instSize);
+
+ // Get the instruction from the array of the cache line.
+ inst = htoa(*reinterpret_cast<MachInst *>
+ (&cacheData[offset]));
+
+ // Create a new DynInst from the instruction fetched.
+ DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC,
+ inst_seq, cpu);
+
+ DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
+ inst_seq, instruction->readPC());
+
+ DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
+ OPCODE(inst));
+
+ instruction->traceData =
+ Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
+ instruction->staticInst,
+ instruction->readPC(), 0);
+
+ predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
+
+ // Add instruction to the CPU's list of instructions.
+ cpu->addInst(instruction);
+
+ // Write the instruction to the first slot in the queue
+ // that heads to decode.
+ toDecode->insts[fetched] = instruction;
+
+ toDecode->size++;
+
+ // Increment stat of fetched instructions.
+ ++fetchedInsts;
+
+ // Move to the next instruction, unless we have a branch.
+ fetch_PC = next_PC;
+
+ offset+= instSize;
+ }
+
+ fetch_nisn_dist.sample(fetched);
+ }
+
+ // Now that fetching is completed, update the PC to signify what the next
+ // cycle will be. Might want to move this to the beginning of this
+ // function so that the PC updates at the beginning of everything.
+ // Or might want to leave setting the PC to the main CPU, with fetch
+ // only changing the nextPC (will require correct determination of
+ // next PC).
+ if (fault == No_Fault) {
+ DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC);
+ cpu->setPC(next_PC);
+ cpu->setNextPC(next_PC + instSize);
+ } else {
+ // If the issue was an icache miss, then we can just return and
+ // wait until it is handled.
+ if (_status == IcacheMissStall) {
+ return;
+ }
+
+ // Handle the fault.
+ // This stage will not be able to continue until all the ROB
+ // slots are empty, at which point the fault can be handled.
+ // The only other way it can wake up is if a squash comes along
+ // and changes the PC. Not sure how to handle that case...perhaps
+ // have it handled by the upper level CPU class which peeks into the
+ // time buffer and sees if a squash comes along, in which case it
+ // changes the status.
+
+ DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n");
+
+ _status = Blocked;
+#ifdef FULL_SYSTEM
+// cpu->trap(fault);
+ // Send a signal to the ROB indicating that there's a trap from the
+ // fetch stage that needs to be handled. Need to indicate that
+ // there's a fault, and the fault type.
+#else // !FULL_SYSTEM
+ fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());
+#endif // FULL_SYSTEM
+ }
+}