1 files changed, 555 insertions, 0 deletions
diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh
new file mode 100644
index 000000000..918d2dad2
--- /dev/null
+++ b/cpu/beta_cpu/fetch_impl.hh
@@ -0,0 +1,555 @@
+// Todo: Rewrite this.  Add in branch prediction.  Fix up if squashing comes
+// from decode; only the correct instructions should be killed.  This will
+// probably require changing the CPU's instList functions to take a seqNum
+// instead of a dyninst.  With probe path, should be able to specify
+// size of data to fetch.  Will be able to get full cache line.
+
+// Remove this later.
+#define OPCODE(X)                       (X >> 26) & 0x3f
+
+#include "cpu/exetrace.hh"
+#include "mem/base_mem.hh"
+#include "mem/mem_interface.hh"
+#include "mem/mem_req.hh"
+#include "cpu/beta_cpu/fetch.hh"
+
+#include "sim/universe.hh"
+
+template<class Impl>
+SimpleFetch<Impl>::CacheCompletionEvent
+::CacheCompletionEvent(SimpleFetch *_fetch)
+    : Event(&mainEventQueue),
+      fetch(_fetch)
+{
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::CacheCompletionEvent::process()
+{
+    fetch->processCacheCompletion();
+}
+
+template<class Impl>
+const char *
+SimpleFetch<Impl>::CacheCompletionEvent::description()
+{
+    return "SimpleFetch cache completion event";
+}
+
+template<class Impl>
+SimpleFetch<Impl>::SimpleFetch(Params &params)
+    : cacheCompletionEvent(this),
+      icacheInterface(params.icacheInterface),
+      decodeToFetchDelay(params.decodeToFetchDelay),
+      renameToFetchDelay(params.renameToFetchDelay),
+      iewToFetchDelay(params.iewToFetchDelay),
+      commitToFetchDelay(params.commitToFetchDelay),
+      fetchWidth(params.fetchWidth),
+      inst(0)
+{
+    // Set status to idle.
+    _status = Idle;
+
+    // Create a new memory request.
+    memReq = new MemReq();
+    // Not sure of this parameter.  I think it should be based on the
+    // thread number.
+#ifndef FULL_SYSTEM
+    memReq->asid = params.asid;
+#else
+    memReq->asid = 0;
+#endif // FULL_SYSTEM
+    memReq->data = new uint8_t[64];
+
+    // Size of cache block.
+    blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+
+    // Create mask to get rid of offset bits.
+    cacheBlockMask = ~((int)log2(blkSize) - 1);
+
+    // Get the size of an instruction.
+    instSize = sizeof(MachInst);
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+    DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n");
+    cpu = cpu_ptr;
+    // This line will be removed eventually.
+    memReq->xc = cpu->xcBase();
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
+{
+    DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n");
+    timeBuffer = time_buffer;
+
+    // Create wires to get information from proper places in time buffer.
+    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
+    fromRename = timeBuffer->getWire(-renameToFetchDelay);
+    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
+    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
+{
+    DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n");
+    fetchQueue = fq_ptr;
+
+    // Create wire to write information to proper place in fetch queue.
+    toDecode = fetchQueue->getWire(0);
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::processCacheCompletion()
+{
+    DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n");
+
+    // Only change the status if it's still waiting on the icache access
+    // to return.
+    // Can keep track of how many cache accesses go unused due to
+    // misspeculation here.
+    // How to handle an outstanding miss which gets cancelled due to squash,
+    // then a new icache miss gets scheduled?
+    if (_status == IcacheMissStall)
+        _status = IcacheMissComplete;
+}
+
+// Note that in the SimpleFetch<>, will most likely have to provide the
+// template parameters to BP and BTB.
+template<class Impl>
+void
+SimpleFetch<Impl>::squash(Addr new_PC)
+{
+    DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
+    cpu->setNextPC(new_PC + instSize);
+    cpu->setPC(new_PC);
+
+    _status = Squashing;
+
+    // Clear out the instructions that are no longer valid.
+    // Actually maybe slightly unrealistic to kill instructions that are
+    // in flight like that between stages.  Perhaps just have next
+    // stage ignore those instructions or something.  In the cycle where it's
+    // returning from squashing, the other stages can just ignore the inputs
+    // for that cycle.
+
+    // Tell the CPU to remove any instructions that aren't currently
+    // in the ROB (instructions in flight that were killed).
+    cpu->removeInstsNotInROB();
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::tick()
+{
+#if 0
+    if (fromCommit->commitInfo.squash) {
+        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
+                "from commit.\n");
+
+        // In any case, squash.
+        squash(fromCommit->commitInfo.nextPC);
+        return;
+    }
+
+    if (fromDecode->decodeInfo.squash) {
+        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
+                "from decode.\n");
+
+        // Squash unless we're already squashing?
+        squash(fromDecode->decodeInfo.nextPC);
+        return;
+    }
+
+    if (fromCommit->commitInfo.robSquashing) {
+        DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
+
+        // Continue to squash.
+        _status = Squashing;
+        return;
+    }
+
+    if (fromDecode->decodeInfo.stall ||
+        fromRename->renameInfo.stall ||
+        fromIEW->iewInfo.stall ||
+        fromCommit->commitInfo.stall)
+    {
+        DPRINTF(Fetch, "Fetch: Stalling stage.\n");
+        DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
+                "Commit: %i\n",
+                fromDecode->decodeInfo.stall,
+                fromRename->renameInfo.stall,
+                fromIEW->iewInfo.stall,
+                fromCommit->commitInfo.stall);
+        // What to do if we're already in an icache stall?
+    }
+#endif
+
+    if (_status != Blocked &&
+        _status != Squashing &&
+        _status != IcacheMissStall) {
+        DPRINTF(Fetch, "Fetch: Running stage.\n");
+
+        fetch();
+    } else if (_status == Blocked) {
+        // If still being told to stall, do nothing.
+        if (fromDecode->decodeInfo.stall ||
+            fromRename->renameInfo.stall ||
+            fromIEW->iewInfo.stall ||
+            fromCommit->commitInfo.stall)
+        {
+            DPRINTF(Fetch, "Fetch: Stalling stage.\n");
+            DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
+                    "Commit: %i\n",
+                    fromDecode->decodeInfo.stall,
+                    fromRename->renameInfo.stall,
+                    fromIEW->iewInfo.stall,
+                    fromCommit->commitInfo.stall);
+        } else {
+
+            DPRINTF(Fetch, "Fetch: Done blocking.\n");
+            _status = Running;
+        }
+
+        if (fromCommit->commitInfo.squash) {
+            DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
+                    "from commit.\n");
+            squash(fromCommit->commitInfo.nextPC);
+            return;
+        } else if (fromDecode->decodeInfo.squash) {
+            DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
+                    "from decode.\n");
+            squash(fromDecode->decodeInfo.nextPC);
+            return;
+        } else if (fromCommit->commitInfo.robSquashing) {
+            DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
+            _status = Squashing;
+            return;
+        }
+    } else if (_status == Squashing) {
+        // If there are no squash signals then change back to running.
+        // Note that when a squash starts happening, commitInfo.squash will
+        // be high.  But if the squash is still in progress, then only
+        // commitInfo.robSquashing will be high.
+        if (!fromCommit->commitInfo.squash &&
+            !fromCommit->commitInfo.robSquashing) {
+
+            DPRINTF(Fetch, "Fetch: Done squashing.\n");
+            _status = Running;
+        } else if (fromCommit->commitInfo.squash) {
+            // If there's a new squash, then start squashing again.
+            squash(fromCommit->commitInfo.nextPC);
+        } else {
+            // Purely a debugging statement.
+            DPRINTF(Fetch, "Fetch: ROB still squashing.\n");
+        }
+    }
+
+}
+
+template<class Impl>
+void
+SimpleFetch<Impl>::fetch()
+{
+    //////////////////////////////////////////
+    // Check backwards communication
+    //////////////////////////////////////////
+
+    // If branch prediction is incorrect, squash any instructions,
+    // update PC, and do not fetch anything this cycle.
+
+    // Might want to put all the PC changing stuff in one area.
+    // Normally should also check here to see if there is branch
+    // misprediction info to update with.
+    if (fromCommit->commitInfo.squash) {
+        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
+                "from commit.\n");
+        squash(fromCommit->commitInfo.nextPC);
+        return;
+    } else if (fromDecode->decodeInfo.squash) {
+        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
+                "from decode.\n");
+        squash(fromDecode->decodeInfo.nextPC);
+        return;
+    } else if (fromCommit->commitInfo.robSquashing) {
+        DPRINTF(Fetch, "Fetch: ROB still squashing.\n");
+        _status = Squashing;
+        return;
+    }
+
+    // If being told to stall, do nothing.
+    if (fromDecode->decodeInfo.stall ||
+        fromRename->renameInfo.stall ||
+        fromIEW->iewInfo.stall ||
+        fromCommit->commitInfo.stall)
+    {
+        DPRINTF(Fetch, "Fetch: Stalling stage.\n");
+        DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
+                "Commit: %i\n",
+                fromDecode->decodeInfo.stall,
+                fromRename->renameInfo.stall,
+                fromIEW->iewInfo.stall,
+                fromCommit->commitInfo.stall);
+        _status = Blocked;
+        return;
+    }
+
+    //////////////////////////////////////////
+    // Start actual fetch
+    //////////////////////////////////////////
+
+    // If nothing else outstanding, attempt to read instructions.
+
+#ifdef FULL_SYSTEM
+    // Flag to say whether or not address is physical addr.
+    unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
+#else
+    unsigned flags = 0;
+#endif // FULL_SYSTEM
+
+    // The current PC.
+    Addr PC = cpu->readPC();
+
+    // Fault code for memory access.
+    Fault fault = No_Fault;
+
+    // If returning from the delay of a cache miss, then update the status
+    // to running, otherwise do the cache access.
+    if (_status == IcacheMissComplete) {
+        DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");
+
+        // Reset the completion event to NULL.
+        memReq->completionEvent = NULL;
+
+        _status = Running;
+    } else {
+        DPRINTF(Fetch, "Fetch: Attempting to translate and read "
+                       "instruction, starting at PC %08p.\n",
+                PC);
+
+        // Otherwise check if the instruction exists within the cache.
+        // If it does, then proceed on to read the instruction and the rest
+        // of the instructions in the cache line until either the end of the
+        // cache line or a predicted taken branch is encountered.
+        // Note that this simply checks if the first instruction exists
+        // within the cache, assuming the rest of the cache line also exists
+        // within the cache.
+
+        // Setup the memReq to do a read of the first isntruction's address.
+        // Set the appropriate read size and flags as well.
+        memReq->cmd = Read;
+        memReq->reset(PC, instSize, flags);
+
+        // Translate the instruction request.
+        // Should this function be
+        // in the CPU class ?  Probably...ITB/DTB should exist within the
+        // CPU.
+
+        fault = cpu->translateInstReq(memReq);
+
+        // In the case of faults, the fetch stage may need to stall and wait
+        // on what caused the fetch (ITB or Icache miss).
+
+        // If translation was successful, attempt to read the first
+        // instruction.
+        if (fault == No_Fault) {
+            DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
+            fault = cpu->mem->read(memReq, inst);
+            // This read may change when the mem interface changes.
+        }
+
+        // Now do the timing access to see whether or not the instruction
+        // exists within the cache.
+        if (icacheInterface && fault == No_Fault) {
+            DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
+            memReq->completionEvent = NULL;
+
+            memReq->time = curTick;
+
+            MemAccessResult result = icacheInterface->access(memReq);
+
+            // If the cache missed (in this model functional and timing
+            // memories are different), then schedule an event to wake
+            // up this stage once the cache miss completes.
+            if (result != MA_HIT && icacheInterface->doEvents()) {
+                memReq->completionEvent = &cacheCompletionEvent;
+//        	    lastIcacheStall = curTick;
+
+                // How does current model work as far as individual
+                // stages scheduling/unscheduling?
+                // Perhaps have only the main CPU scheduled/unscheduled,
+                // and have it choose what stages to run appropriately.
+
+                DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
+                _status = IcacheMissStall;
+                return;
+            }
+        }
+    }
+
+    // As far as timing goes, the CPU will need to send an event through
+    // the MemReq in order to be woken up once the memory access completes.
+    // Probably have a status on a per thread basis so each thread can
+    // block independently and be woken up independently.
+
+    Addr next_PC = 0;
+    InstSeqNum inst_seq;
+
+    // If the read of the first instruction was successful, then grab the
+    // instructions from the rest of the cache line and put them into the
+    // queue heading to decode.
+    if (fault == No_Fault) {
+        DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");
+
+        // Need to keep track of whether or not a predicted branch
+        // ended this fetch block.
+        bool predicted_branch = false;
+
+        // Might want to keep track of various stats.
+//        numLinesFetched++;
+
+        // Get a sequence number.
+        inst_seq = cpu->getAndIncrementInstSeq();
+
+        // Because the first instruction was already fetched, create the
+        // DynInst and put it into the queue to decode.
+        DynInst *instruction = new DynInst(inst, PC, PC+instSize, inst_seq,
+                                           cpu);
+        DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
+                instruction, instruction->readPC());
+        DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
+                OPCODE(inst));
+
+        instruction->traceData =
+            Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
+                                 instruction->staticInst,
+                                 instruction->readPC(), 0);
+
+        cpu->addInst(instruction);
+
+        // Write the instruction to the first slot in the queue
+        // that heads to decode.
+        toDecode->insts[0] = instruction;
+
+        // Now update the PC to fetch the next instruction in the cache
+        // line.
+        PC = PC + instSize;
+
+        // Obtain the index into the cache line by getting only the low
+        // order bits.
+        int line_index = PC & cacheBlockMask;
+
+        // Take instructions and put them into the queue heading to decode.
+        // Then read the next instruction in the cache line.  Continue
+        // until either all of the fetch bandwidth is used (not an issue for
+        // non-SMT), or the end of the cache line is reached.  Note that
+        // this assumes standard cachelines, and not something like a trace
+        // cache where lines might not end at cache-line size aligned
+        // addresses.
+        // @todo: Fix the horrible amount of translates/reads that must
+        // take place due to reading an entire cacheline.  Ideally it
+        // should all take place at once, return an array of binary
+        // instructions, which can then be used to get all the instructions
+        // needed.  Figure out if I can roll it back into one loop.
+        for (int fetched = 1;
+             line_index < blkSize && fetched < fetchWidth;
+             line_index+=instSize, ++fetched)
+        {
+            // Reset the mem request to setup the read of the next
+            // instruction.
+            memReq->reset(PC, instSize, flags);
+
+            // Translate the instruction request.
+            fault = cpu->translateInstReq(memReq);
+
+            // Read instruction.
+            if (fault == No_Fault) {
+                fault = cpu->mem->read(memReq, inst);
+            }
+
+            // Check if there was a fault.
+            if (fault != No_Fault) {
+                panic("Fetch: Read of instruction faulted when it should "
+                      "succeed; most likely exceeding cache line.\n");
+            }
+
+            // Get a sequence number.
+            inst_seq = cpu->getAndIncrementInstSeq();
+
+            // Create the actual DynInst.  Parameters are:
+            // DynInst(instruction, PC, predicted PC, CPU pointer).
+            // Because this simple model has no branch prediction, the
+            // predicted PC will simply be PC+sizeof(MachInst).
+            // Update to actually use a branch predictor to predict the
+            // target in the future.
+            DynInst *instruction = new DynInst(inst, PC, PC+instSize,
+                                               inst_seq, cpu);
+            DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
+                    instruction, instruction->readPC());
+            DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
+                    OPCODE(inst));
+
+            cpu->addInst(instruction);
+
+            // Write the instruction to the proper slot in the queue
+            // that heads to decode.
+            toDecode->insts[fetched] = instruction;
+
+            // Might want to keep track of various stats.
+//             numInstsFetched++;
+
+            // Now update the PC to fetch the next instruction in the cache
+            // line.
+            PC = PC + instSize;
+        }
+
+        // If no branches predicted taken, then increment PC with
+        // fall-through path.  This simple model always predicts not
+        // taken.
+        if (!predicted_branch) {
+            next_PC = PC;
+        }
+    }
+
+    // Now that fetching is completed, update the PC to signify what the next
+    // cycle will be.  Might want to move this to the beginning of this
+    // function so that the PC updates at the beginning of everything.
+    // Or might want to leave setting the PC to the main CPU, with fetch
+    // only changing the nextPC (will require correct determination of
+    // next PC).
+    if (fault == No_Fault) {
+        DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC);
+        cpu->setPC(next_PC);
+        cpu->setNextPC(next_PC + instSize);
+    } else {
+        // Handle the fault.
+        // This stage will not be able to continue until all the ROB
+        // slots are empty, at which point the fault can be handled.
+        // The only other way it can wake up is if a squash comes along
+        // and changes the PC.  Not sure how to handle that case...perhaps
+        // have it handled by the upper level CPU class which peeks into the
+        // time buffer and sees if a squash comes along, in which case it
+        // changes the status.
+
+        DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n");
+
+        _status = Blocked;
+#ifdef FULL_SYSTEM
+        // Trap will probably need a pointer to the CPU to do accessing.
+        // Or an exec context. --Write ProxyExecContext eventually.
+        // Avoid using this for now as the xc really shouldn't be in here.
+        cpu->trap(fault);
+#else // !FULL_SYSTEM
+        fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());
+#endif // FULL_SYSTEM
+    }
+}