diff options
Diffstat (limited to 'cpu/beta_cpu')
55 files changed, 11817 insertions, 0 deletions
diff --git a/cpu/beta_cpu/2bit_local_pred.cc b/cpu/beta_cpu/2bit_local_pred.cc new file mode 100644 index 000000000..ef7f23d49 --- /dev/null +++ b/cpu/beta_cpu/2bit_local_pred.cc @@ -0,0 +1,132 @@ +#include "base/trace.hh" +#include "cpu/beta_cpu/2bit_local_pred.hh" + +DefaultBP::SatCounter::SatCounter(unsigned bits) + : maxVal((1 << bits) - 1), counter(0) +{ +} + +DefaultBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val) + : maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { + panic("BP: Initial counter value exceeds max size."); + } +} + +void +DefaultBP::SatCounter::increment() +{ + if(counter < maxVal) { + ++counter; + } +} + +void +DefaultBP::SatCounter::decrement() +{ + if(counter > 0) { + --counter; + } +} + +DefaultBP::DefaultBP(unsigned _localPredictorSize, + unsigned _localCtrBits, + unsigned _instShiftAmt) + : localPredictorSize(_localPredictorSize), + localCtrBits(_localCtrBits), + instShiftAmt(_instShiftAmt) +{ + // Should do checks here to make sure sizes are correct (powers of 2). + + // Setup the index mask. + indexMask = localPredictorSize - 1; + + DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask); + + // Setup the array of counters for the local predictor. + localCtrs = new SatCounter[localPredictorSize](localCtrBits); + + DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n", + localPredictorSize); + + DPRINTF(Fetch, "Branch predictor: local counter bits: %i\n", localCtrBits); + + DPRINTF(Fetch, "Branch predictor: instruction shift amount: %i\n", + instShiftAmt); +} + +inline +bool +DefaultBP::getPrediction(uint8_t &count) +{ + // Get the MSB of the count + return (count >> (localCtrBits - 1)); +} + +inline +unsigned +DefaultBP::getLocalIndex(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & indexMask; +} + +bool +DefaultBP::lookup(Addr &branch_addr) +{ + bool taken; + uint8_t local_prediction; + unsigned local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + assert(local_predictor_idx < localPredictorSize); + + local_prediction = localCtrs[local_predictor_idx].read(); + + DPRINTF(Fetch, "Branch predictor: prediction is %i.\n", + (int)local_prediction); + + taken = getPrediction(local_prediction); + +#if 0 + // Speculative update. + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +#endif + + return taken; +} + +void +DefaultBP::update(Addr &branch_addr, bool taken) +{ + unsigned local_predictor_idx; + + // Update the local predictor. + local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + assert(local_predictor_idx < localPredictorSize); + + // Increment or decrement twice to undo speculative update, then + // properly update + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); +// localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); +// localCtrs[local_predictor_idx].decrement(); + } +} diff --git a/cpu/beta_cpu/2bit_local_pred.hh b/cpu/beta_cpu/2bit_local_pred.hh new file mode 100644 index 000000000..32a7972d0 --- /dev/null +++ b/cpu/beta_cpu/2bit_local_pred.hh @@ -0,0 +1,99 @@ +#ifndef __2BIT_LOCAL_PRED_HH__ +#define __2BIT_LOCAL_PRED_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +class DefaultBP +{ + public: + /** + * Default branch predictor constructor. + */ + DefaultBP(unsigned localPredictorSize, unsigned localCtrBits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, bool taken); + + private: + + inline bool getPrediction(uint8_t &count); + + inline unsigned getLocalIndex(Addr &PC); + + /** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ + class SatCounter + { + public: + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ + SatCounter(unsigned bits); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ + SatCounter(unsigned bits, unsigned initial_val); + + /** + * Increments the counter's current value. + */ + void increment(); + + /** + * Decrements the counter's current value. + */ + void decrement(); + + /** + * Read the counter's value. + */ + uint8_t read() + { + return counter; + } + + private: + uint8_t maxVal; + uint8_t counter; + }; + + /** Array of counters that make up the local predictor. */ + SatCounter *localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + + /** Number of bits of the local predictor's counters. */ + unsigned localCtrBits; + + /** Number of bits to shift the PC when calculating index. */ + unsigned instShiftAmt; + + /** Mask to get index bits. */ + unsigned indexMask; +}; + +#endif // __2BIT_LOCAL_PRED_HH__ diff --git a/cpu/beta_cpu/alpha_dyn_inst.cc b/cpu/beta_cpu/alpha_dyn_inst.cc new file mode 100644 index 000000000..1bfcb8420 --- /dev/null +++ b/cpu/beta_cpu/alpha_dyn_inst.cc @@ -0,0 +1,7 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +// Force instantiation of AlphaDynInst for all the implementations that +// are needed. +template AlphaDynInst<AlphaSimpleImpl>; diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh new file mode 100644 index 000000000..584e027d7 --- /dev/null +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -0,0 +1,80 @@ +//Todo: + +#ifndef __CPU_BETA_CPU_ALPHA_DYN_INST_HH__ +#define __CPU_BETA_CPU_ALPHA_DYN_INST_HH__ + +#include "cpu/base_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/inst_seq.hh" + +/** + * Mostly implementation specific AlphaDynInst. It is templated in case there + * are other implementations that are similar enough to be able to use this + * class without changes. This is mainly useful if there are multiple similar + * CPU implementations of the same ISA. + */ + +template <class Impl> +class AlphaDynInst : public BaseDynInst<Impl> +{ + public: + /** Typedef for the CPU. */ + typedef typename Impl::FullCPU FullCPU; + + /** Typedef to get the ISA. */ + typedef typename Impl::ISA ISA; + + /** Binary machine instruction type. */ + typedef typename ISA::MachInst MachInst; + /** Memory address type. */ + typedef typename ISA::Addr Addr; + /** Logical register index type. */ + typedef typename ISA::RegIndex RegIndex; + /** Integer register index type. */ + typedef typename ISA::IntReg IntReg; + + enum { + MaxInstSrcRegs = ISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = ISA::MaxInstDestRegs, //< Max dest regs + }; + + public: + /** BaseDynInst constructor given a binary instruction. */ + AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + AlphaDynInst(StaticInstPtr<AlphaISA> &_staticInst); + + /** Executes the instruction. */ + Fault execute() + { + fault = staticInst->execute(this, traceData); + return fault; + } + + public: + uint64_t readUniq(); + void setUniq(uint64_t val); + + uint64_t readFpcr(); + void setFpcr(uint64_t val); + +#ifdef FULL_SYSTEM + uint64_t readIpr(int idx, Fault &fault); + Fault setIpr(int idx, uint64_t val); + Fault hwrei(); + int readIntrFlag(); + void setIntrFlag(int val); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); +#else + void syscall(); +#endif + +}; + +#endif // __CPU_BETA_CPU_ALPHA_DYN_INST_HH__ + diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh new file mode 100644 index 000000000..8311067db --- /dev/null +++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh @@ -0,0 +1,109 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +template <class Impl> +AlphaDynInst<Impl>::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst<AlphaSimpleImpl>(inst, PC, Pred_PC, seq_num, cpu) +{ +} + +template <class Impl> +AlphaDynInst<Impl>::AlphaDynInst(StaticInstPtr<AlphaISA> &_staticInst) + : BaseDynInst<AlphaSimpleImpl>(_staticInst) +{ +} + +template <class Impl> +uint64_t +AlphaDynInst<Impl>::readUniq() +{ + return cpu->readUniq(); +} + +template <class Impl> +void +AlphaDynInst<Impl>::setUniq(uint64_t val) +{ + cpu->setUniq(val); +} + +template <class Impl> +uint64_t +AlphaDynInst<Impl>::readFpcr() +{ + return cpu->readFpcr(); +} + +template <class Impl> +void +AlphaDynInst<Impl>::setFpcr(uint64_t val) +{ + cpu->setFpcr(val); +} + +#ifdef FULL_SYSTEM +template <class Impl> +uint64_t +AlphaDynInst<Impl>::readIpr(int idx, Fault &fault) +{ + return cpu->readIpr(idx, fault); +} + +template <class Impl> +Fault +AlphaDynInst<Impl>::setIpr(int idx, uint64_t val) +{ + return cpu->setIpr(idx, val); +} + +template <class Impl> +Fault +AlphaDynInst<Impl>::hwrei() +{ + return cpu->hwrei(); +} + +template <class Impl> +int +AlphaDynInst<Impl>::readIntrFlag() +{ +return cpu->readIntrFlag(); +} + +template <class Impl> +void +AlphaDynInst<Impl>::setIntrFlag(int val) +{ + cpu->setIntrFlag(val); +} + +template <class Impl> +bool +AlphaDynInst<Impl>::inPalMode() +{ + return cpu->inPalMode(); +} + +template <class Impl> +void +AlphaDynInst<Impl>::trap(Fault fault) +{ + cpu->trap(fault); +} + +template <class Impl> +bool +AlphaDynInst<Impl>::simPalCheck(int palFunc) +{ + return cpu->simPalCheck(palFunc); +} +#else +template <class Impl> +void +AlphaDynInst<Impl>::syscall() +{ + cpu->syscall(); +} +#endif + diff --git a/cpu/beta_cpu/alpha_full_cpu.cc b/cpu/beta_cpu/alpha_full_cpu.cc new file mode 100644 index 000000000..80c4bdec8 --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu.cc @@ -0,0 +1,9 @@ + +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_full_cpu_impl.hh" +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +// Force instantiation of AlphaFullCPU for all the implemntations that are +// needed. Consider merging this and alpha_dyn_inst.cc, and maybe all +// classes that depend on a certain impl, into one file (alpha_impl.cc?). +template AlphaFullCPU<AlphaSimpleImpl>; diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh new file mode 100644 index 000000000..92eebc82a --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -0,0 +1,249 @@ +// Todo: Find all the stuff in ExecContext and ev5 that needs to be +// specifically designed for this CPU. +// Read and write are horribly hacked up between not being sure where to +// copy their code from, and Ron's memory changes. + +#ifndef __CPU_BETA_CPU_ALPHA_FULL_CPU_HH__ +#define __CPU_BETA_CPU_ALPHA_FULL_CPU_HH__ + +// To include: comm, full cpu, ITB/DTB if full sys, +#include "cpu/beta_cpu/full_cpu.hh" + +template <class Impl> +class AlphaFullCPU : public FullBetaCPU<Impl> +{ + public: + typedef typename Impl::ISA AlphaISA; + typedef typename Impl::Params Params; + + public: + AlphaFullCPU(Params ¶ms); + +#ifdef FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#endif + + public: + void regStats(); + +#ifdef FULL_SYSTEM + bool inPalMode(); + + //Note that the interrupt stuff from the base CPU might be somewhat + //ISA specific (ie NumInterruptLevels). These functions might not + //be needed in FullCPU though. +// void post_interrupt(int int_num, int index); +// void clear_interrupt(int int_num, int index); +// void clear_interrupts(); + + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return No_Fault; + } + + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + +#endif + + // Later on may want to remove this misc stuff from the regfile and + // have it handled at this level. Might prove to be an issue when + // trying to rename source/destination registers... + uint64_t readUniq() + { + return regFile.readUniq(); + } + + void setUniq(uint64_t val) + { + regFile.setUniq(val); + } + + uint64_t readFpcr() + { + return regFile.readFpcr(); + } + + void setFpcr(uint64_t val) + { + regFile.setFpcr(val); + } + +#ifdef FULL_SYSTEM + uint64_t *getIPR(); + uint64_t readIpr(int idx, Fault &fault); + Fault setIpr(int idx, uint64_t val); + int readIntrFlag(); + void setIntrFlag(int val); + Fault hwrei(); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); + + void processInterrupts(); +#endif + + +#ifndef FULL_SYSTEM + // Need to change these into regfile calls that directly set a certain + // register. Actually, these functions should handle most of this + // functionality by themselves; should look up the rename and then + // set the register. + IntReg getSyscallArg(int i) + { + return xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i]; + } + + // used to shift args for indirect syscall + void setSyscallArg(int i, IntReg val) + { + xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i] = val; + } + + void setSyscallReturn(int64_t return_value) + { + // check for error condition. Alpha syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + const int RegA3 = 19; // only place this is used + if (return_value >= 0) { + // no error + xc->regs.intRegFile[RegA3] = 0; + xc->regs.intRegFile[AlphaISA::ReturnValueReg] = return_value; + } else { + // got an error, return details + xc->regs.intRegFile[RegA3] = (IntReg) -1; + xc->regs.intRegFile[AlphaISA::ReturnValueReg] = -return_value; + } + } + + void syscall(); + void squashStages(); + +#endif + + void copyToXC(); + void copyFromXC(); + + public: +#ifdef FULL_SYSTEM + bool palShadowEnabled; + + // Not sure this is used anywhere. + void intr_post(RegFile *regs, Fault fault, Addr pc); + // Actually used within exec files. Implement properly. + void swap_palshadow(RegFile *regs, bool use_shadow); + // Called by CPU constructor. Can implement as I please. + void initCPU(RegFile *regs); + // Called by initCPU. Implement as I please. + void initIPRs(RegFile *regs); +#endif + + + template <class T> + Fault read(MemReqPtr &req, T &data) + { +#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM) + if (req->flags & LOCKED) { + MiscRegFile *cregs = &req->xc->regs.miscRegs; + cregs->lock_addr = req->paddr; + cregs->lock_flag = true; + } +#endif + + Fault error; + error = mem->read(req, data); + data = htoa(data); + return error; + } + + + template <class T> + Fault write(MemReqPtr &req, T &data) + { +#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM) + + MiscRegFile *cregs; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + cregs = &xc->regs.miscRegs; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + req->xc->storeCondFailures = 0;//Needed? [RGD] + } else { + req->result = cregs->lock_flag; + if (!cregs->lock_flag || + ((cregs->lock_addr & ~0xf) != (req->paddr & ~0xf))) { + cregs->lock_flag = false; + if (((++req->xc->storeCondFailures) % 100000) == 0) { + std::cerr << "Warning: " + << req->xc->storeCondFailures + << " consecutive store conditional failures " + << "on cpu " << cpu_id + << std::endl; + } + return No_Fault; + } + else req->xc->storeCondFailures = 0; + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < system->execContexts.size(); i++){ + cregs = &system->execContexts[i]->regs.miscRegs; + if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) { + cregs->lock_flag = false; + } + } + +#endif + + return mem->write(req, (T)htoa(data)); + } + +}; + +#endif // __CPU_BETA_CPU_ALPHA_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc new file mode 100644 index 000000000..f37081232 --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc @@ -0,0 +1,367 @@ +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_full_cpu.hh" + +#include "mem/cache/base_cache.hh" + +#include "base/inifile.hh" +#include "base/loader/symtab.hh" +#include "base/misc.hh" +#include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" +#include "cpu/exetrace.hh" +#include "mem/base_mem.hh" +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/debug.hh" +#include "sim/host.hh" +#include "sim/process.hh" +#include "sim/sim_events.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#ifdef FULL_SYSTEM +#include "base/remote_gdb.hh" +#include "dev/alpha_access.h" +#include "dev/pciareg.h" +#include "mem/functional_mem/memory_control.hh" +#include "mem/functional_mem/physical_memory.hh" +#include "sim/system.hh" +#include "targetarch/alpha_memory.hh" +#include "targetarch/vtophys.hh" +#else // !FULL_SYSTEM +#include "eio/eio.hh" +#include "mem/functional_mem/functional_memory.hh" +#endif // FULL_SYSTEM + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU) + + Param<int> numThreads; + +#ifdef FULL_SYSTEM +SimObjectParam<System *> system; +SimObjectParam<AlphaITB *> itb; +SimObjectParam<AlphaDTB *> dtb; +Param<int> mult; +#else +SimObjectVectorParam<Process *> workload; +SimObjectParam<Process *> process; +Param<short> asid; +#endif // FULL_SYSTEM +SimObjectParam<FunctionalMemory *> mem; + +Param<Counter> max_insts_any_thread; +Param<Counter> max_insts_all_threads; +Param<Counter> max_loads_any_thread; +Param<Counter> max_loads_all_threads; + +SimObjectParam<BaseCache *> icache; +SimObjectParam<BaseCache *> dcache; + +Param<unsigned> decodeToFetchDelay; +Param<unsigned> renameToFetchDelay; +Param<unsigned> iewToFetchDelay; +Param<unsigned> commitToFetchDelay; +Param<unsigned> fetchWidth; + +Param<unsigned> renameToDecodeDelay; +Param<unsigned> iewToDecodeDelay; +Param<unsigned> commitToDecodeDelay; +Param<unsigned> fetchToDecodeDelay; +Param<unsigned> decodeWidth; + +Param<unsigned> iewToRenameDelay; +Param<unsigned> commitToRenameDelay; +Param<unsigned> decodeToRenameDelay; +Param<unsigned> renameWidth; + +Param<unsigned> commitToIEWDelay; +Param<unsigned> renameToIEWDelay; +Param<unsigned> issueToExecuteDelay; +Param<unsigned> issueWidth; +Param<unsigned> executeWidth; +Param<unsigned> executeIntWidth; +Param<unsigned> executeFloatWidth; +Param<unsigned> executeBranchWidth; +Param<unsigned> executeMemoryWidth; + +Param<unsigned> iewToCommitDelay; +Param<unsigned> renameToROBDelay; +Param<unsigned> commitWidth; +Param<unsigned> squashWidth; + +#if 0 +Param<unsigned> localPredictorSize; +Param<unsigned> localPredictorCtrBits; +#endif +Param<unsigned> local_predictor_size; +Param<unsigned> local_ctr_bits; +Param<unsigned> local_history_table_size; +Param<unsigned> local_history_bits; +Param<unsigned> global_predictor_size; +Param<unsigned> global_ctr_bits; +Param<unsigned> global_history_bits; +Param<unsigned> choice_predictor_size; +Param<unsigned> choice_ctr_bits; + +Param<unsigned> BTBEntries; +Param<unsigned> BTBTagSize; + +Param<unsigned> RASSize; + +Param<unsigned> LQEntries; +Param<unsigned> SQEntries; +Param<unsigned> LFSTSize; +Param<unsigned> SSITSize; + +Param<unsigned> numPhysIntRegs; +Param<unsigned> numPhysFloatRegs; +Param<unsigned> numIQEntries; +Param<unsigned> numROBEntries; + +Param<unsigned> instShiftAmt; + +Param<bool> defReg; + +END_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) + + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#ifdef FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), + INIT_PARAM_DFLT(mult, "System clock multiplier", 1), +#else + INIT_PARAM(workload, "Processes to run"), + INIT_PARAM_DFLT(process, "Process to run", NULL), + INIT_PARAM(asid, "Address space ID"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + +#if 0 + INIT_PARAM(localPredictorSize, "Size of the local predictor in entries. " + "Must be a power of 2."), + INIT_PARAM(localPredictorCtrBits, "Number of bits per counter for bpred"), +#endif + INIT_PARAM(local_predictor_size, "Size of local predictor"), + INIT_PARAM(local_ctr_bits, "Bits per counter"), + INIT_PARAM(local_history_table_size, "Size of local history table"), + INIT_PARAM(local_history_bits, "Bits for the local history"), + INIT_PARAM(global_predictor_size, "Size of global predictor"), + INIT_PARAM(global_ctr_bits, "Bits per counter"), + INIT_PARAM(global_history_bits, "Bits of history"), + INIT_PARAM(choice_predictor_size, "Size of choice predictor"), + INIT_PARAM(choice_ctr_bits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + + INIT_PARAM(defReg, "Defer registration") + +END_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) + +CREATE_SIM_OBJECT(BaseFullCPU) +{ + AlphaFullCPU<AlphaSimpleImpl> *cpu; + +#ifdef FULL_SYSTEM + if (mult != 1) + panic("Processor clock multiplier must be 1?\n"); + + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + + Process *actual_process; + + if (process == NULL) { + actual_process = workload[0]; + } else { + actual_process = process; + } + +#endif + + AlphaSimpleParams params; + + params.name = getInstanceName(); + params.numberOfThreads = actual_num_threads; + +#ifdef FULL_SYSTEM + params._system = system; + params.itb = itb; + params.dtb = dtb; + params.freq = ticksPerSecond * mult; +#else + params.workload = workload; + params.process = actual_process; + params.asid = asid; +#endif // FULL_SYSTEM + + params.mem = mem; + + params.maxInstsAnyThread = max_insts_any_thread; + params.maxInstsAllThreads = max_insts_all_threads; + params.maxLoadsAnyThread = max_loads_any_thread; + params.maxLoadsAllThreads = max_loads_all_threads; + + // + // Caches + // + params.icacheInterface = icache ? icache->getInterface() : NULL; + params.dcacheInterface = dcache ? dcache->getInterface() : NULL; + + params.decodeToFetchDelay = decodeToFetchDelay; + params.renameToFetchDelay = renameToFetchDelay; + params.iewToFetchDelay = iewToFetchDelay; + params.commitToFetchDelay = commitToFetchDelay; + params.fetchWidth = fetchWidth; + + params.renameToDecodeDelay = renameToDecodeDelay; + params.iewToDecodeDelay = iewToDecodeDelay; + params.commitToDecodeDelay = commitToDecodeDelay; + params.fetchToDecodeDelay = fetchToDecodeDelay; + params.decodeWidth = decodeWidth; + + params.iewToRenameDelay = iewToRenameDelay; + params.commitToRenameDelay = commitToRenameDelay; + params.decodeToRenameDelay = decodeToRenameDelay; + params.renameWidth = renameWidth; + + params.commitToIEWDelay = commitToIEWDelay; + params.renameToIEWDelay = renameToIEWDelay; + params.issueToExecuteDelay = issueToExecuteDelay; + params.issueWidth = issueWidth; + params.executeWidth = executeWidth; + params.executeIntWidth = executeIntWidth; + params.executeFloatWidth = executeFloatWidth; + params.executeBranchWidth = executeBranchWidth; + params.executeMemoryWidth = executeMemoryWidth; + + params.iewToCommitDelay = iewToCommitDelay; + params.renameToROBDelay = renameToROBDelay; + params.commitWidth = commitWidth; + params.squashWidth = squashWidth; +#if 0 + params.localPredictorSize = localPredictorSize; + params.localPredictorCtrBits = localPredictorCtrBits; +#endif + params.local_predictor_size = local_predictor_size; + params.local_ctr_bits = local_ctr_bits; + params.local_history_table_size = local_history_table_size; + params.local_history_bits = local_history_bits; + params.global_predictor_size = global_predictor_size; + params.global_ctr_bits = global_ctr_bits; + params.global_history_bits = global_history_bits; + params.choice_predictor_size = choice_predictor_size; + params.choice_ctr_bits = choice_ctr_bits; + + params.BTBEntries = BTBEntries; + params.BTBTagSize = BTBTagSize; + + params.RASSize = RASSize; + + params.LQEntries = LQEntries; + params.SQEntries = SQEntries; + params.SSITSize = SSITSize; + params.LFSTSize = LFSTSize; + + params.numPhysIntRegs = numPhysIntRegs; + params.numPhysFloatRegs = numPhysFloatRegs; + params.numIQEntries = numIQEntries; + params.numROBEntries = numROBEntries; + + params.instShiftAmt = 2; + + params.defReg = defReg; + + cpu = new AlphaFullCPU<AlphaSimpleImpl>(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("AlphaFullCPU", BaseFullCPU) + diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh new file mode 100644 index 000000000..611a0d80d --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh @@ -0,0 +1,711 @@ + +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "mem/cache/cache.hh" // for dynamic cast +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/beta_cpu/alpha_params.hh" +#include "cpu/beta_cpu/comm.hh" + +template <class Impl> +AlphaFullCPU<Impl>::AlphaFullCPU(Params ¶ms) + : FullBetaCPU<AlphaSimpleImpl>(params) +{ + DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); + + fetch.setCPU(this); + decode.setCPU(this); + rename.setCPU(this); + iew.setCPU(this); + commit.setCPU(this); + + rob.setCPU(this); +} + +template <class Impl> +void +AlphaFullCPU<Impl>::regStats() +{ + // Register stats for everything that has stats. + fullCPURegStats(); + fetch.regStats(); + decode.regStats(); + rename.regStats(); + iew.regStats(); + commit.regStats(); +} + +#ifndef FULL_SYSTEM + +template <class Impl> +void +AlphaFullCPU<Impl>::syscall() +{ + DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); + + // Commit stage needs to run as well. + commit.tick(); + + squashStages(); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++funcExeInst; + + // Copy over all important state to xc once all the unrolling is done. + copyToXC(); + + process->syscall(xc); + + // Copy over all important state back to CPU. + copyFromXC(); + + // Decrease funcExeInst by one as the normal commit will handle + // incrememnting it. + --funcExeInst; +} + +// This is not a pretty function, and should only be used if it is necessary +// to fake having everything squash all at once (ie for non-full system +// syscalls). Maybe put this at the FullCPU level? +template <class Impl> +void +AlphaFullCPU<Impl>::squashStages() +{ + InstSeqNum rob_head = rob.readHeadSeqNum(); + + // Now hack the time buffer to put this sequence number in the places + // where the stages might read it. + for (int i = 0; i < 5; ++i) + { + timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; + } + + fetch.squash(rob.readHeadNextPC()); + fetchQueue.advance(); + + decode.squash(); + decodeQueue.advance(); + + rename.squash(); + renameQueue.advance(); + renameQueue.advance(); + + // Be sure to advance the IEW queues so that the commit stage doesn't + // try to set an instruction as completed at the same time that it + // might be deleting it. + iew.squash(); + iewQueue.advance(); + iewQueue.advance(); + + rob.squash(rob_head); + commit.setSquashing(); + + // Now hack the time buffer to clear the sequence numbers in the places + // where the stages might read it.? + for (int i = 0; i < 5; ++i) + { + timeBuffer.access(-i)->commitInfo.doneSeqNum = 0; + } + +} + +#endif // FULL_SYSTEM + +template <class Impl> +void +AlphaFullCPU<Impl>::copyToXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = renameMap.lookup(i); + xc->regs.intRegFile[i] = regFile.readIntReg(renamed_reg); + DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", + renamed_reg, regFile.intRegFile[renamed_reg]); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + xc->regs.floatRegFile.d[i] = regFile.readFloatRegDouble(renamed_reg); + xc->regs.floatRegFile.q[i] = regFile.readFloatRegInt(renamed_reg); + } + + xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr; + xc->regs.miscRegs.uniq = regFile.miscRegs.uniq; + xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag; + xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr; + + xc->regs.pc = rob.readHeadPC(); + xc->regs.npc = xc->regs.pc+4; + + xc->func_exe_inst = funcExeInst; +} + +// This function will probably mess things up unless the ROB is empty and +// there are no instructions in the pipeline. +template <class Impl> +void +AlphaFullCPU<Impl>::copyFromXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = renameMap.lookup(i); + + DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, regFile.intRegFile[renamed_reg], + xc->regs.intRegFile[i]); + + regFile.setIntReg(renamed_reg, xc->regs.intRegFile[i]); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + regFile.setFloatRegDouble(renamed_reg, xc->regs.floatRegFile.d[i]); + regFile.setFloatRegInt(renamed_reg, xc->regs.floatRegFile.q[i]); + } + + // Then loop through the misc registers. + regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + + // Then finally set the PC and the next PC. +// regFile.pc = xc->regs.pc; +// regFile.npc = xc->regs.npc; + + funcExeInst = xc->func_exe_inst; +} + +#ifdef FULL_SYSTEM + +template <class Impl> +uint64_t * +AlphaFullCPU<Impl>::getIpr() +{ + return regFile.getIpr(); +} + +template <class Impl> +uint64_t +AlphaFullCPU<Impl>::readIpr(int idx, Fault &fault) +{ + uint64_t *ipr = getIpr(); + uint64_t retval = 0; // return value, default 0 + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PALtemp23: + case AlphaISA::IPR_PAL_BASE: + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + case AlphaISA::IPR_ISR: + case AlphaISA::IPR_EXC_ADDR: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_MCSR: + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + case AlphaISA::IPR_SIRR: + case AlphaISA::IPR_ICSR: + case AlphaISA::IPR_ICM: + case AlphaISA::IPR_DTB_CM: + case AlphaISA::IPR_IPLR: + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_PMCTR: + // no side-effect + retval = ipr[idx]; + break; + + case AlphaISA::IPR_CC: + retval |= ipr[idx] & ULL(0xffffffff00000000); + retval |= curTick & ULL(0x00000000ffffffff); + break; + + case AlphaISA::IPR_VA: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_VA_FORM: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_IFAULT_VA_FORM: + case AlphaISA::IPR_EXC_MASK: + case AlphaISA::IPR_EXC_SUM: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_DTB_PTE: + { + AlphaISA::PTE &pte = dtb->index(!misspeculating()); + + retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; + retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; + retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; + retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; + retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; + retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; + retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; + } + break; + + // write only registers + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + case AlphaISA::IPR_ALT_MODE: + case AlphaISA::IPR_DTB_IA: + case AlphaISA::IPR_DTB_IAP: + case AlphaISA::IPR_ITB_IA: + case AlphaISA::IPR_ITB_IAP: + fault = Unimplemented_Opcode_Fault; + break; + + default: + // invalid IPR + fault = Unimplemented_Opcode_Fault; + break; + } + + return retval; +} + +template <class Impl> +Fault +AlphaFullCPU<Impl>::setIpr(int idx, uint64_t val) +{ + uint64_t *ipr = getIpr(); + uint64_t old; + + if (misspeculating()) + return No_Fault; + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PAL_BASE: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_PMCTR: + // write entire quad w/ no side-effect + ipr[idx] = val; + break; + + case AlphaISA::IPR_CC_CTL: + // This IPR resets the cycle counter. We assume this only + // happens once... let's verify that. + assert(ipr[idx] == 0); + ipr[idx] = 1; + break; + + case AlphaISA::IPR_CC: + // This IPR only writes the upper 64 bits. It's ok to write + // all 64 here since we mask out the lower 32 in rpcc (see + // isa_desc). + ipr[idx] = val; + break; + + case AlphaISA::IPR_PALtemp23: + // write entire quad w/ no side-effect + old = ipr[idx]; + ipr[idx] = val; + kernelStats.context(old, val); + break; + + case AlphaISA::IPR_DTB_PTE: + // write entire quad w/ no side-effect, tag is forthcoming + ipr[idx] = val; + break; + + case AlphaISA::IPR_EXC_ADDR: + // second least significant bit in PC is always zero + ipr[idx] = val & ~2; + break; + + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + // only write least significant four bits - privilege mask + ipr[idx] = val & 0xf; + break; + + case AlphaISA::IPR_IPLR: +#ifdef DEBUG + if (break_ipl != -1 && break_ipl == (val & 0x1f)) + debug_break(); +#endif + + // only write least significant five bits - interrupt level + ipr[idx] = val & 0x1f; + kernelStats.swpipl(ipr[idx]); + break; + + case AlphaISA::IPR_DTB_CM: + kernelStats.mode((val & 0x18) != 0); + + case AlphaISA::IPR_ICM: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_ALT_MODE: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_MCSR: + // more here after optimization... + ipr[idx] = val; + break; + + case AlphaISA::IPR_SIRR: + // only write software interrupt mask + ipr[idx] = val & 0x7fff0; + break; + + case AlphaISA::IPR_ICSR: + ipr[idx] = val & ULL(0xffffff0300); + break; + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_MVPTBR: + ipr[idx] = val & ULL(0xffffffffc0000000); + break; + + case AlphaISA::IPR_DC_TEST_CTL: + ipr[idx] = val & 0x1ffb; + break; + + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + ipr[idx] = val & 0x3f; + break; + + case AlphaISA::IPR_ITB_ASN: + ipr[idx] = val & 0x7f0; + break; + + case AlphaISA::IPR_DTB_ASN: + ipr[idx] = val & ULL(0xfe00000000000000); + break; + + case AlphaISA::IPR_EXC_SUM: + case AlphaISA::IPR_EXC_MASK: + // any write to this register clears it + ipr[idx] = 0; + break; + + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_SL_RCV: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_ITB_PTE_TEMP: + case AlphaISA::IPR_DTB_PTE_TEMP: + // read-only registers + return Unimplemented_Opcode_Fault; + + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + // the following are write only + ipr[idx] = val; + break; + + case AlphaISA::IPR_DTB_IA: + // really a control write + ipr[idx] = 0; + + dtb->flushAll(); + break; + + case AlphaISA::IPR_DTB_IAP: + // really a control write + ipr[idx] = 0; + + dtb->flushProcesses(); + break; + + case AlphaISA::IPR_DTB_IS: + // really a control write + ipr[idx] = val; + + dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN])); + break; + + case AlphaISA::IPR_DTB_TAG: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]); + + // insert new TAG/PTE value into data TLB + dtb->insert(val, pte); + } + break; + + case AlphaISA::IPR_ITB_PTE: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (ITB_PTE_GH(val) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = ITB_PTE_PPN(val); + pte.xre = ITB_PTE_XRE(val); + pte.xwe = 0; + pte.fonr = ITB_PTE_FONR(val); + pte.fonw = ITB_PTE_FONW(val); + pte.asma = ITB_PTE_ASMA(val); + pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]); + + // insert new TAG/PTE value into data TLB + itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte); + } + break; + + case AlphaISA::IPR_ITB_IA: + // really a control write + ipr[idx] = 0; + + itb->flushAll(); + break; + + case AlphaISA::IPR_ITB_IAP: + // really a control write + ipr[idx] = 0; + + itb->flushProcesses(); + break; + + case AlphaISA::IPR_ITB_IS: + // really a control write + ipr[idx] = val; + + itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN])); + break; + + default: + // invalid IPR + return Unimplemented_Opcode_Fault; + } + + // no error... + return No_Fault; + +} + +template <class Impl> +int +AlphaFullCPU<Impl>::readIntrFlag() +{ + return regs.intrflag; +} + +template <class Impl> +void +AlphaFullCPU<Impl>::setIntrFlag(int val) +{ + regs.intrflag = val; +} + +// Can force commit stage to squash and stuff. +template <class Impl> +Fault +AlphaFullCPU<Impl>::hwrei() +{ + uint64_t *ipr = getIpr(); + + if (!PC_PAL(regs.pc)) + return Unimplemented_Opcode_Fault; + + setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]); + + if (!misspeculating()) { + kernelStats.hwrei(); + + if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0) + AlphaISA::swap_palshadow(®s, false); + + AlphaISA::check_interrupts = true; + } + + // FIXME: XXX check for interrupts? XXX + return No_Fault; +} + +template <class Impl> +bool +AlphaFullCPU<Impl>::inPalMode() +{ + return PC_PAL(readPC()); +} + +template <class Impl> +bool +AlphaFullCPU<Impl>::simPalCheck(int palFunc) +{ + kernelStats.callpal(palFunc); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + new SimExitEvent("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (system->breakpoint()) + return false; + break; + } + + return true; +} + +// Probably shouldn't be able to switch to the trap handler as quickly as +// this. Also needs to get the exception restart address from the commit +// stage. +template <class Impl> +void +AlphaFullCPU<Impl>::trap(Fault fault) +{ + uint64_t PC = commit.readPC(); + + DPRINTF(Fault, "Fault %s\n", FaultName(fault)); + Stats::recordEvent(csprintf("Fault %s", FaultName(fault))); + + assert(!misspeculating()); + kernelStats.fault(fault); + + if (fault == Arithmetic_Fault) + panic("Arithmetic traps are unimplemented!"); + + AlphaISA::InternalProcReg *ipr = getIpr(); + + // exception restart address - Get the commit PC + if (fault != Interrupt_Fault || !PC_PAL(PC)) + ipr[AlphaISA::IPR_EXC_ADDR] = PC; + + if (fault == Pal_Fault || fault == Arithmetic_Fault /* || + fault == Interrupt_Fault && !PC_PAL(regs.pc) */) { + // traps... skip faulting instruction + ipr[AlphaISA::IPR_EXC_ADDR] += 4; + } + + if (!PC_PAL(PC)) + AlphaISA::swap_palshadow(®s, true); + + setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] ); + setNextPC(PC + sizeof(MachInst)); +} + +template <class Impl> +void +AlphaFullCPU<Impl>::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that exists + // within isa_fullsys_traits.hh. +} + +// swap_palshadow swaps in the values of the shadow registers and +// swaps them with the values of the physical registers that map to the +// same logical index. +template <class Impl> +void +AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow) +{ + if (palShadowEnabled == use_shadow) + panic("swap_palshadow: wrong PAL shadow state"); + + palShadowEnabled = use_shadow; + + // Will have to lookup in rename map to get physical registers, then + // swap. + for (int i = 0; i < AlphaISA::NumIntRegs; i++) { + if (reg_redir[i]) { + AlphaISA::IntReg temp = regs->intRegFile[i]; + regs->intRegFile[i] = regs->palregs[i]; + regs->palregs[i] = temp; + } + } +} + +#endif // FULL_SYSTEM diff --git a/cpu/beta_cpu/alpha_impl.hh b/cpu/beta_cpu/alpha_impl.hh new file mode 100644 index 000000000..81a1aba9b --- /dev/null +++ b/cpu/beta_cpu/alpha_impl.hh @@ -0,0 +1,54 @@ +#ifndef __CPU_BETA_CPU_ALPHA_IMPL_HH__ +#define __CPU_BETA_CPU_ALPHA_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" + +#include "cpu/beta_cpu/cpu_policy.hh" +#include "cpu/beta_cpu/alpha_params.hh" + +// Forward declarations. +template <class Impl> +class AlphaDynInst; + +template <class Impl> +class AlphaFullCPU; + +/** Implementation specific struct that defines several key things to the + * CPU, the stages within the CPU, the time buffers, and the DynInst. + * The struct defines the ISA, the CPU policy, the specific DynInst, the + * specific FullCPU, and all of the structs from the time buffers to do + * communication. + * This is one of the key things that must be defined for each hardware + * specific CPU implementation. + */ +struct AlphaSimpleImpl +{ + /** The ISA to be used. */ + typedef AlphaISA ISA; + + /** The type of MachInst. */ + typedef ISA::MachInst MachInst; + + /** The CPU policy to be used (ie fetch, decode, etc.). */ + typedef SimpleCPUPolicy<AlphaSimpleImpl> CPUPol; + + /** The DynInst to be used. */ + typedef AlphaDynInst<AlphaSimpleImpl> DynInst; + + /** The refcounted DynInst pointer to be used. In most cases this is + * what should be used, and not DynInst *. + */ + typedef RefCountingPtr<DynInst> DynInstPtr; + + /** The FullCPU to be used. */ + typedef AlphaFullCPU<AlphaSimpleImpl> FullCPU; + + /** The Params to be passed to each stage. */ + typedef AlphaSimpleParams Params; + + enum { + MaxWidth = 8 + }; +}; + +#endif // __CPU_BETA_CPU_ALPHA_IMPL_HH__ diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh new file mode 100644 index 000000000..ecde4b016 --- /dev/null +++ b/cpu/beta_cpu/alpha_params.hh @@ -0,0 +1,135 @@ +#ifndef __ALPHA_SIMPLE_PARAMS_HH__ +#define __ALPHA_SIMPLE_PARAMS_HH__ + +#include "cpu/beta_cpu/full_cpu.hh" + +//Forward declarations +class System; +class AlphaITB; +class AlphaDTB; +class FunctionalMemory; +class Process; +class MemInterface; + +/** + * This file defines the parameters that will be used for the AlphaFullCPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams : public BaseFullCPU::Params +{ + public: +#ifdef FULL_SYSTEM + AlphaITB *itb; AlphaDTB *dtb; +#else + std::vector<Process *> workload; + Process *process; + short asid; +#endif // FULL_SYSTEM + + FunctionalMemory *mem; + + // + // Caches + // + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + // + // Fetch + // + unsigned decodeToFetchDelay; + unsigned renameToFetchDelay; + unsigned iewToFetchDelay; + unsigned commitToFetchDelay; + unsigned fetchWidth; + + // + // Decode + // + unsigned renameToDecodeDelay; + unsigned iewToDecodeDelay; + unsigned commitToDecodeDelay; + unsigned fetchToDecodeDelay; + unsigned decodeWidth; + + // + // Rename + // + unsigned iewToRenameDelay; + unsigned commitToRenameDelay; + unsigned decodeToRenameDelay; + unsigned renameWidth; + + // + // IEW + // + unsigned commitToIEWDelay; + unsigned renameToIEWDelay; + unsigned issueToExecuteDelay; + unsigned issueWidth; + unsigned executeWidth; + unsigned executeIntWidth; + unsigned executeFloatWidth; + unsigned executeBranchWidth; + unsigned executeMemoryWidth; + + // + // Commit + // + unsigned iewToCommitDelay; + unsigned renameToROBDelay; + unsigned commitWidth; + unsigned squashWidth; + + // + // Branch predictor (BP & BTB) + // +/* + unsigned localPredictorSize; + unsigned localPredictorCtrBits; +*/ + + unsigned local_predictor_size; + unsigned local_ctr_bits; + unsigned local_history_table_size; + unsigned local_history_bits; + unsigned global_predictor_size; + unsigned global_ctr_bits; + unsigned global_history_bits; + unsigned choice_predictor_size; + unsigned choice_ctr_bits; + + unsigned BTBEntries; + unsigned BTBTagSize; + + unsigned RASSize; + + // + // Load store queue + // + unsigned LQEntries; + unsigned SQEntries; + + // + // Memory dependence + // + unsigned SSITSize; + unsigned LFSTSize; + + // + // Miscellaneous + // + unsigned numPhysIntRegs; + unsigned numPhysFloatRegs; + unsigned numIQEntries; + unsigned numROBEntries; + + // Probably can get this from somewhere. + unsigned instShiftAmt; + + bool defReg; +}; + +#endif diff --git a/cpu/beta_cpu/bpred_unit.cc b/cpu/beta_cpu/bpred_unit.cc new file mode 100644 index 000000000..c4a79fbbe --- /dev/null +++ b/cpu/beta_cpu/bpred_unit.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/bpred_unit_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +template TwobitBPredUnit<AlphaSimpleImpl>; diff --git a/cpu/beta_cpu/bpred_unit.hh b/cpu/beta_cpu/bpred_unit.hh new file mode 100644 index 000000000..53c7146c5 --- /dev/null +++ b/cpu/beta_cpu/bpred_unit.hh @@ -0,0 +1,109 @@ + +#ifndef __BPRED_UNIT_HH__ +#define __BPRED_UNIT_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" +#include "base/statistics.hh" +#include "cpu/inst_seq.hh" + +#include "cpu/beta_cpu/2bit_local_pred.hh" +#include "cpu/beta_cpu/tournament_pred.hh" +#include "cpu/beta_cpu/btb.hh" +#include "cpu/beta_cpu/ras.hh" + +#include <list> + +/** + * Basically a wrapper class to hold both the branch predictor + * and the BTB. Right now I'm unsure of the implementation; it would + * be nicer to have something closer to the CPUPolicy or the Impl where + * this is just typedefs, but it forces the upper level stages to be + * aware of the constructors of the BP and the BTB. The nicer thing + * to do is have this templated on the Impl, accept the usual Params + * object, and be able to call the constructors on the BP and BTB. + */ +template<class Impl> +class TwobitBPredUnit +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + TwobitBPredUnit(Params ¶ms); + + void regStats(); + + bool predict(DynInstPtr &inst, Addr &PC); + + void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, + bool actually_taken); + + void squash(const InstSeqNum &squashed_sn); + + void update(const InstSeqNum &done_sn); + + bool BPLookup(Addr &inst_PC) + { return BP.lookup(inst_PC); } + + unsigned BPReadGlobalHist() + { return 0; } + + bool BTBValid(Addr &inst_PC) + { return BTB.valid(inst_PC); } + + Addr BTBLookup(Addr &inst_PC) + { return BTB.lookup(inst_PC); } + + // Will want to include global history. + void BPUpdate(Addr &inst_PC, unsigned global_history, bool taken) + { BP.update(inst_PC, taken); } + + void BTBUpdate(Addr &inst_PC, Addr &target_PC) + { BTB.update(inst_PC, target_PC); } + + private: + struct PredictorHistory { + PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC, + const bool pred_taken) + : seqNum(seq_num), PC(inst_PC), predTaken(pred_taken), + globalHistory(0), usedRAS(0), wasCall(0), RASIndex(0), + RASTarget(0) + { } + + InstSeqNum seqNum; + + Addr PC; + + bool predTaken; + + unsigned globalHistory; + + bool usedRAS; + + bool wasCall; + + unsigned RASIndex; + + Addr RASTarget; + }; + + std::list<PredictorHistory> predHist; + + DefaultBP BP; + + DefaultBTB BTB; + + ReturnAddrStack RAS; + + Stats::Scalar<> lookups; + Stats::Scalar<> condPredicted; + Stats::Scalar<> condIncorrect; + Stats::Scalar<> BTBLookups; + Stats::Scalar<> BTBHits; + Stats::Scalar<> BTBCorrect; + Stats::Scalar<> usedRAS; + Stats::Scalar<> RASIncorrect; +}; + +#endif // __BPRED_UNIT_HH__ diff --git a/cpu/beta_cpu/bpred_unit_impl.hh b/cpu/beta_cpu/bpred_unit_impl.hh new file mode 100644 index 000000000..02c613d34 --- /dev/null +++ b/cpu/beta_cpu/bpred_unit_impl.hh @@ -0,0 +1,247 @@ + +#include "cpu/beta_cpu/bpred_unit.hh" +#include "base/traceflags.hh" +#include "base/trace.hh" + +template<class Impl> +TwobitBPredUnit<Impl>::TwobitBPredUnit(Params ¶ms) + : BP(params.local_predictor_size, + params.local_ctr_bits, + params.instShiftAmt), + BTB(params.BTBEntries, + params.BTBTagSize, + params.instShiftAmt), + RAS(params.RASSize) +{ +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::regStats() +{ + lookups + .name(name() + ".BPredUnit.lookups") + .desc("Number of BP lookups") + ; + + condPredicted + .name(name() + ".BPredUnit.condPredicted") + .desc("Number of conditional branches predicted") + ; + + condIncorrect + .name(name() + ".BPredUnit.condIncorrect") + .desc("Number of conditional branches incorrect") + ; + + BTBLookups + .name(name() + ".BPredUnit.BTBLookups") + .desc("Number of BTB lookups") + ; + + BTBHits + .name(name() + ".BPredUnit.BTBHits") + .desc("Number of BTB hits") + ; + + BTBCorrect + .name(name() + ".BPredUnit.BTBCorrect") + .desc("Number of correct BTB predictions (this stat may not " + "work properly.") + ; + + usedRAS + .name(name() + ".BPredUnit.usedRAS") + .desc("Number of times the RAS was used.") + ; + + RASIncorrect + .name(name() + ".BPredUnit.RASInCorrect") + .desc("Number of incorrect RAS predictions.") + ; +} + +template <class Impl> +bool +TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC) +{ + // See if branch predictor predicts taken. + // If so, get its target addr either from the BTB or the RAS. + // Once that's done, speculatively update the predictor? + // Save off record of branch stuff so the RAS can be fixed + // up once it's done. + + bool pred_taken = false; + Addr target; + + ++lookups; + + if (inst->isUncondCtrl()) { + DPRINTF(Fetch, "BranchPred: Unconditional control.\n"); + pred_taken = true; + } else { + ++condPredicted; + + pred_taken = BPLookup(PC); + + DPRINTF(Fetch, "BranchPred: Branch predictor predicted %i for PC %#x" + "\n", pred_taken, inst->readPC()); + } + + PredictorHistory predict_record(inst->seqNum, PC, pred_taken); + + // Now lookup in the BTB or RAS. + if (pred_taken) { + if (inst->isReturn()) { + ++usedRAS; + + // If it's a function return call, then look up the address + // in the RAS. + target = RAS.top(); + + // Record the top entry of the RAS, and its index. + predict_record.usedRAS = true; + predict_record.RASIndex = RAS.topIdx(); + predict_record.RASTarget = target; + + RAS.pop(); + + DPRINTF(Fetch, "BranchPred: Instruction %#x is a return, RAS " + "predicted target: %#x, RAS index: %i.\n", + inst->readPC(), target, predict_record.RASIndex); + } else { + ++BTBLookups; + + if (inst->isCall()) { + RAS.push(PC+sizeof(MachInst)); + + // Record that it was a call so that the top RAS entry can + // be popped off if the speculation is incorrect. + predict_record.wasCall = true; + + DPRINTF(Fetch, "BranchPred: Instruction %#x was a call, " + "adding %#x to the RAS.\n", + inst->readPC(), PC+sizeof(MachInst)); + } + + if (BTB.valid(PC)) { + ++BTBHits; + + //If it's anything else, use the BTB to get the target addr. + target = BTB.lookup(PC); + + DPRINTF(Fetch, "BranchPred: Instruction %#x predicted target " + "is %#x.\n", inst->readPC(), target); + + } else { + DPRINTF(Fetch, "BranchPred: BTB doesn't have a valid entry." + "\n"); + pred_taken = false; + } + + } + } + + if (pred_taken) { + // Set the PC and the instruction's predicted target. + PC = target; + inst->setPredTarg(target); + } else { + PC = PC + sizeof(MachInst); + inst->setPredTarg(PC); + } + + predHist.push_front(predict_record); + + assert(!predHist.empty()); + + return pred_taken; +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn) +{ + DPRINTF(Fetch, "BranchPred: Commiting branches until sequence number " + "%i.\n", done_sn); + + while (!predHist.empty() && predHist.back().seqNum <= done_sn) { + assert(!predHist.empty()); + + // Update the branch predictor with the correct results of branches. + BP.update(predHist.back().PC, predHist.back().predTaken); + + predHist.pop_back(); + } +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn) +{ + while (!predHist.empty() && predHist.front().seqNum > squashed_sn) { + if (predHist.front().usedRAS) { + DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, " + "target: %#x.\n", + predHist.front().RASIndex, + predHist.front().RASTarget); + + RAS.restore(predHist.front().RASIndex, + predHist.front().RASTarget); + } else if (predHist.front().wasCall) { + DPRINTF(Fetch, "BranchPred: Removing speculative entry added " + "to the RAS.\n"); + + RAS.pop(); + } + + predHist.pop_front(); + } +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, + const Addr &corr_target, + const bool actually_taken) +{ + // Now that we know that a branch was mispredicted, we need to undo + // all the branches that have been seen up until this branch and + // fix up everything. + + ++condIncorrect; + + DPRINTF(Fetch, "BranchPred: Squashing from sequence number %i, " + "setting target to %#x.\n", + squashed_sn, corr_target); + + while (!predHist.empty() && predHist.front().seqNum > squashed_sn) { + + if (predHist.front().usedRAS) { + DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, " + "target: %#x.\n", + predHist.front().RASIndex, + predHist.front().RASTarget); + + RAS.restore(predHist.front().RASIndex, + predHist.front().RASTarget); + } else if (predHist.front().wasCall) { + DPRINTF(Fetch, "BranchPred: Removing speculative entry added " + "to the RAS.\n"); + + RAS.pop(); + } + + predHist.pop_front(); + } + + predHist.front().predTaken = actually_taken; + + if (predHist.front().usedRAS) { + ++RASIncorrect; + } + + BP.update(predHist.front().PC, actually_taken); + + BTB.update(predHist.front().PC, corr_target); +} diff --git a/cpu/beta_cpu/btb.cc b/cpu/beta_cpu/btb.cc new file mode 100644 index 000000000..bceaa66d1 --- /dev/null +++ b/cpu/beta_cpu/btb.cc @@ -0,0 +1,91 @@ +#include <math.h> + +#include "cpu/beta_cpu/btb.hh" +#include "base/trace.hh" + +DefaultBTB::DefaultBTB(unsigned _numEntries, + unsigned _tagBits, + unsigned _instShiftAmt) + : numEntries(_numEntries), + tagBits(_tagBits), + instShiftAmt(_instShiftAmt) +{ + // @todo Check to make sure num_entries is valid (a power of 2) + + DPRINTF(Fetch, "BTB: Creating BTB object.\n"); + + btb = new BTBEntry[numEntries]; + + for (int i = 0; i < numEntries; ++i) + { + btb[i].valid = false; + } + + idxMask = numEntries - 1; + + tagMask = (1 << tagBits) - 1; + + tagShiftAmt = instShiftAmt + (int)log2(numEntries); +} + +inline +unsigned +DefaultBTB::getIndex(const Addr &inst_PC) +{ + // Need to shift PC over by the word offset. + return (inst_PC >> instShiftAmt) & idxMask; +} + +inline +Addr +DefaultBTB::getTag(const Addr &inst_PC) +{ + return (inst_PC >> tagShiftAmt) & tagMask; +} + +bool +DefaultBTB::valid(const Addr &inst_PC) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + assert(btb_idx < numEntries); + + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { + return true; + } else { + return false; + } +} + +// @todo Create some sort of return struct that has both whether or not the +// address is valid, and also the address. For now will just use addr = 0 to +// represent invalid entry. +Addr +DefaultBTB::lookup(const Addr &inst_PC) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + assert(btb_idx < numEntries); + + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { + return btb[btb_idx].target; + } else { + return 0; + } +} + +void +DefaultBTB::update(const Addr &inst_PC, const Addr &target) +{ + unsigned btb_idx = getIndex(inst_PC); + + assert(btb_idx < numEntries); + + btb[btb_idx].valid = true; + btb[btb_idx].target = target; + btb[btb_idx].tag = getTag(inst_PC); +} diff --git a/cpu/beta_cpu/btb.hh b/cpu/beta_cpu/btb.hh new file mode 100644 index 000000000..81069eabe --- /dev/null +++ b/cpu/beta_cpu/btb.hh @@ -0,0 +1,52 @@ +#ifndef __BTB_HH__ +#define __BTB_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +class DefaultBTB +{ + private: + struct BTBEntry + { + BTBEntry() + : tag(0), target(0), valid(false) + { + } + + Addr tag; + Addr target; + bool valid; + }; + + public: + DefaultBTB(unsigned numEntries, unsigned tagBits, + unsigned instShiftAmt); + + Addr lookup(const Addr &inst_PC); + + bool valid(const Addr &inst_PC); + + void update(const Addr &inst_PC, const Addr &target_PC); + + private: + inline unsigned getIndex(const Addr &inst_PC); + + inline Addr getTag(const Addr &inst_PC); + + BTBEntry *btb; + + unsigned numEntries; + + unsigned idxMask; + + unsigned tagBits; + + unsigned tagMask; + + unsigned instShiftAmt; + + unsigned tagShiftAmt; +}; + +#endif // __BTB_HH__ diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh new file mode 100644 index 000000000..c0afe3d1b --- /dev/null +++ b/cpu/beta_cpu/comm.hh @@ -0,0 +1,143 @@ +#ifndef __CPU_BETA_CPU_COMM_HH__ +#define __CPU_BETA_CPU_COMM_HH__ + +#include <stdint.h> +#include <vector> +#include "arch/alpha/isa_traits.hh" +#include "cpu/inst_seq.hh" + +// Find better place to put this typedef. +// The impl might be the best place for this. +typedef short int PhysRegIndex; + +template<class Impl> +struct SimpleFetchSimpleDecode { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template<class Impl> +struct SimpleDecodeSimpleRename { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template<class Impl> +struct SimpleRenameSimpleIEW { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template<class Impl> +struct SimpleIEWSimpleCommit { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; + + bool squash; + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + unsigned globalHist; + InstSeqNum squashedSeqNum; +}; + +template<class Impl> +struct IssueStruct { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +struct TimeBufStruct { + struct decodeComm { + bool squash; + bool stall; + bool predIncorrect; + uint64_t branchAddr; + + InstSeqNum doneSeqNum; + + // Might want to package this kind of branch stuff into a single + // struct as it is used pretty frequently. + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + unsigned globalHist; + }; + + decodeComm decodeInfo; + + // Rename can't actually tell anything to squash or send a new PC back + // because it doesn't do anything along those lines. But maybe leave + // these fields in here to keep the stages mostly orthagonal. + struct renameComm { + bool squash; + bool stall; + + uint64_t nextPC; + }; + + renameComm renameInfo; + + struct iewComm { + bool stall; + + // Also eventually include skid buffer space. + unsigned freeIQEntries; + }; + + iewComm iewInfo; + + struct commitComm { + bool squash; + bool stall; + unsigned freeROBEntries; + + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + unsigned globalHist; + + // Think of better names here. + // Will need to be a variety of sizes... + // Maybe make it a vector, that way only need one object. + std::vector<PhysRegIndex> freeRegs; + + bool robSquashing; + + // Represents the instruction that has either been retired or + // squashed. Similar to having a single bus that broadcasts the + // retired or squashed sequence number. + InstSeqNum doneSeqNum; + + // Extra bits of information so that the LDSTQ only updates when it + // needs to. + bool commitIsStore; + bool commitIsLoad; + + // Communication specifically to the IQ to tell the IQ that it can + // schedule a non-speculative instruction. + InstSeqNum nonSpecSeqNum; + }; + + commitComm commitInfo; +}; + +#endif //__CPU_BETA_CPU_COMM_HH__ diff --git a/cpu/beta_cpu/commit.cc b/cpu/beta_cpu/commit.cc new file mode 100644 index 000000000..9e8fa2781 --- /dev/null +++ b/cpu/beta_cpu/commit.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/commit_impl.hh" + +template class SimpleCommit<AlphaSimpleImpl>; diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh new file mode 100644 index 000000000..731307bf7 --- /dev/null +++ b/cpu/beta_cpu/commit.hh @@ -0,0 +1,157 @@ +// Todo: Maybe have a special method for handling interrupts/traps. +// +// Traps: Have IEW send a signal to commit saying that there's a trap to +// be handled. Have commit send the PC back to the fetch stage, along +// with the current commit PC. Fetch will directly access the IPR and save +// off all the proper stuff. Commit can send out a squash, or something +// close to it. +// Do the same for hwrei(). However, requires that commit be specifically +// built to support that kind of stuff. Probably not horrible to have +// commit support having the CPU tell it to squash the other stages and +// restart at a given address. The IPR register does become an issue. +// Probably not a big deal if the IPR stuff isn't cycle accurate. Can just +// have the original function handle writing to the IPR register. + +#ifndef __CPU_BETA_CPU_SIMPLE_COMMIT_HH__ +#define __CPU_BETA_CPU_SIMPLE_COMMIT_HH__ + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "mem/memory_interface.hh" + +template<class Impl> +class SimpleCommit +{ + public: + // Typedefs from the Impl. + typedef typename Impl::ISA ISA; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; + + typedef typename CPUPol::ROB ROB; + + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + + public: + // I don't believe commit can block, so it will only have two + // statuses for now. + // Actually if there's a cache access that needs to block (ie + // uncachable load or just a mem access in commit) then the stage + // may have to wait. + enum Status { + Running, + Idle, + ROBSquashing, + DcacheMissStall, + DcacheMissComplete + }; + + private: + Status _status; + + public: + SimpleCommit(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr); + + void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); + + void setROB(ROB *rob_ptr); + + void tick(); + + void commit(); + + uint64_t readCommitPC(); + + void setSquashing() { _status = ROBSquashing; } + + private: + + void commitInsts(); + + bool commitHead(DynInstPtr &head_inst, unsigned inst_num); + + void getInsts(); + + void markCompletedInsts(); + + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer<TimeStruct>::wire toIEW; + + /** Wire to read information from IEW (for ROB). */ + typename TimeBuffer<TimeStruct>::wire robInfoFromIEW; + + /** IEW instruction queue interface. */ + TimeBuffer<IEWStruct> *iewQueue; + + /** Wire to read information from IEW queue. */ + typename TimeBuffer<IEWStruct>::wire fromIEW; + + /** Rename instruction queue interface, for ROB. */ + TimeBuffer<RenameStruct> *renameQueue; + + /** Wire to read information from rename queue. */ + typename TimeBuffer<RenameStruct>::wire fromRename; + + /** ROB interface. */ + ROB *rob; + + /** Pointer to FullCPU. */ + FullCPU *cpu; + + /** Pointer to the rename map. DO NOT USE if possible. */ +// typename Impl::CPUPol::RenameMap *renameMap; + + //Store buffer interface? Will need to move committed stores to the + //store buffer + + /** Memory interface. Used for d-cache accesses. */ + MemInterface *dcacheInterface; + + private: + /** IEW to Commit delay, in ticks. */ + unsigned iewToCommitDelay; + + /** Rename to ROB delay, in ticks. */ + unsigned renameToROBDelay; + + /** Rename width, in instructions. Used so ROB knows how many + * instructions to get from the rename instruction queue. + */ + unsigned renameWidth; + + /** IEW width, in instructions. Used so ROB knows how many + * instructions to get from the IEW instruction queue. + */ + unsigned iewWidth; + + /** Commit width, in instructions. */ + unsigned commitWidth; + + Stats::Scalar<> commitCommittedInsts; + Stats::Scalar<> commitSquashedInsts; + Stats::Scalar<> commitSquashEvents; + Stats::Scalar<> commitNonSpecStalls; + Stats::Scalar<> commitCommittedBranches; + Stats::Scalar<> commitCommittedLoads; + Stats::Scalar<> commitCommittedMemRefs; + Stats::Scalar<> branchMispredicts; + + Stats::Distribution<> n_committed_dist; +}; + +#endif // __CPU_BETA_CPU_SIMPLE_COMMIT_HH__ diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh new file mode 100644 index 000000000..3e97b980c --- /dev/null +++ b/cpu/beta_cpu/commit_impl.hh @@ -0,0 +1,509 @@ +// @todo: Bug when something reaches execute, and mispredicts, but is never +// put into the ROB because the ROB is full. Need rename stage to predict +// the free ROB entries better. + +#ifndef __COMMIT_IMPL_HH__ +#define __COMMIT_IMPL_HH__ + +#include "base/timebuf.hh" +#include "cpu/beta_cpu/commit.hh" +#include "cpu/exetrace.hh" + +template <class Impl> +SimpleCommit<Impl>::SimpleCommit(Params ¶ms) + : dcacheInterface(params.dcacheInterface), + iewToCommitDelay(params.iewToCommitDelay), + renameToROBDelay(params.renameToROBDelay), + renameWidth(params.renameWidth), + iewWidth(params.executeWidth), + commitWidth(params.commitWidth) +{ + _status = Idle; +} + +template <class Impl> +void +SimpleCommit<Impl>::regStats() +{ + commitCommittedInsts + .name(name() + ".commitCommittedInsts") + .desc("The number of committed instructions") + .prereq(commitCommittedInsts); + commitSquashedInsts + .name(name() + ".commitSquashedInsts") + .desc("The number of squashed insts skipped by commit") + .prereq(commitSquashedInsts); + commitSquashEvents + .name(name() + ".commitSquashEvents") + .desc("The number of times commit is told to squash") + .prereq(commitSquashEvents); + commitNonSpecStalls + .name(name() + ".commitNonSpecStalls") + .desc("The number of times commit has been forced to stall to " + "communicate backwards") + .prereq(commitNonSpecStalls); + commitCommittedBranches + .name(name() + ".commitCommittedBranches") + .desc("The number of committed branches") + .prereq(commitCommittedBranches); + commitCommittedLoads + .name(name() + ".commitCommittedLoads") + .desc("The number of committed loads") + .prereq(commitCommittedLoads); + commitCommittedMemRefs + .name(name() + ".commitCommittedMemRefs") + .desc("The number of committed memory references") + .prereq(commitCommittedMemRefs); + branchMispredicts + .name(name() + ".branchMispredicts") + .desc("The number of times a branch was mispredicted") + .prereq(branchMispredicts); + n_committed_dist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(Stats::pdf) + ; +} + +template <class Impl> +void +SimpleCommit<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Commit, "Commit: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template <class Impl> +void +SimpleCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(Commit, "Commit: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to send information back to IEW. + toIEW = timeBuffer->getWire(0); + + // Setup wire to read data from IEW (for the ROB). + robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay); +} + +template <class Impl> +void +SimpleCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr) +{ + DPRINTF(Commit, "Commit: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to get instructions from rename (for the ROB). + fromRename = renameQueue->getWire(-renameToROBDelay); +} + +template <class Impl> +void +SimpleCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) +{ + DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to get instructions from IEW. + fromIEW = iewQueue->getWire(-iewToCommitDelay); +} + +template <class Impl> +void +SimpleCommit<Impl>::setROB(ROB *rob_ptr) +{ + DPRINTF(Commit, "Commit: Setting ROB pointer.\n"); + rob = rob_ptr; +} + +template <class Impl> +void +SimpleCommit<Impl>::tick() +{ + // If the ROB is currently in its squash sequence, then continue + // to squash. In this case, commit does not do anything. Otherwise + // run commit. + if (_status == ROBSquashing) { + if (rob->isDoneSquashing()) { + _status = Running; + } else { + rob->doSquash(); + + // Send back sequence number of tail of ROB, so other stages + // can squash younger instructions. Note that really the only + // stage that this is important for is the IEW stage; other + // stages can just clear all their state as long as selective + // replay isn't used. + toIEW->commitInfo.doneSeqNum = rob->readTailSeqNum(); + toIEW->commitInfo.robSquashing = true; + } + } else { + commit(); + } + + markCompletedInsts(); + + // Writeback number of free ROB entries here. + DPRINTF(Commit, "Commit: ROB has %d free entries.\n", + rob->numFreeEntries()); + toIEW->commitInfo.freeROBEntries = rob->numFreeEntries(); +} + +template <class Impl> +void +SimpleCommit<Impl>::commit() +{ + ////////////////////////////////////// + // Check for interrupts + ////////////////////////////////////// + + // Process interrupts if interrupts are enabled and not in PAL mode. + // Take the PC from commit and write it to the IPR, then squash. The + // interrupt completing will take care of restoring the PC from that value + // in the IPR. Look at IPR[EXC_ADDR]; + // hwrei() is what resets the PC to the place where instruction execution + // beings again. +#ifdef FULL_SYSTEM + if (ISA::check_interrupts && + cpu->check_interrupts() && + !xc->inPalMode()) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. + + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); + } +#endif // FULL_SYSTEM + + //////////////////////////////////// + // Check for squash signal, handle that first + //////////////////////////////////// + + // Want to mainly check if the IEW stage is telling the ROB to squash. + // Should I also check if the commit stage is telling the ROB to squah? + // This might be necessary to keep the same timing between the IQ and + // the ROB... + if (fromIEW->squash) { + DPRINTF(Commit, "Commit: Squashing instructions in the ROB.\n"); + + _status = ROBSquashing; + + InstSeqNum squashed_inst = fromIEW->squashedSeqNum; + + rob->squash(squashed_inst); + + // Send back the sequence number of the squashed instruction. + toIEW->commitInfo.doneSeqNum = squashed_inst; + + // Send back the squash signal to tell stages that they should squash. + toIEW->commitInfo.squash = true; + + // Send back the rob squashing signal so other stages know that the + // ROB is in the process of squashing. + toIEW->commitInfo.robSquashing = true; + + toIEW->commitInfo.branchMispredict = fromIEW->branchMispredict; + + toIEW->commitInfo.branchTaken = fromIEW->branchTaken; + + toIEW->commitInfo.nextPC = fromIEW->nextPC; + + toIEW->commitInfo.mispredPC = fromIEW->mispredPC; + + toIEW->commitInfo.globalHist = fromIEW->globalHist; + + if (toIEW->commitInfo.branchMispredict) { + ++branchMispredicts; + } + } + + if (_status != ROBSquashing) { + // If we're not currently squashing, then get instructions. + getInsts(); + + // Try to commit any instructions. + commitInsts(); + } + + // If the ROB is empty, we can set this stage to idle. Use this + // in the future when the Idle status will actually be utilized. +#if 0 + if (rob->isEmpty()) { + DPRINTF(Commit, "Commit: ROB is empty. Status changed to idle.\n"); + _status = Idle; + // Schedule an event so that commit will actually wake up + // once something gets put in the ROB. + } +#endif +} + +// Loop that goes through as many instructions in the ROB as possible and +// tries to commit them. The actual work for committing is done by the +// commitHead() function. +template <class Impl> +void +SimpleCommit<Impl>::commitInsts() +{ + //////////////////////////////////// + // Handle commit + // Note that commit will be handled prior to the ROB so that the ROB + // only tries to commit instructions it has in this current cycle, and + // not instructions it is writing in during this cycle. + // Can't commit and squash things at the same time... + //////////////////////////////////// + + DynInstPtr head_inst = rob->readHeadInst(); + + unsigned num_committed = 0; + + // Commit as many instructions as possible until the commit bandwidth + // limit is reached, or it becomes impossible to commit any more. + while (!rob->isEmpty() && + head_inst->readyToCommit() && + num_committed < commitWidth) + { + DPRINTF(Commit, "Commit: Trying to commit head instruction.\n"); + + // If the head instruction is squashed, it is ready to retire at any + // time. However, we need to avoid updating any other state + // incorrectly if it's already been squashed. + if (head_inst->isSquashed()) { + // Hack to avoid the instruction being retired (and deleted) if + // it hasn't been through the IEW stage yet. + if (!head_inst->isExecuted()) { + break; + } + + DPRINTF(Commit, "Commit: Retiring squashed instruction from " + "ROB.\n"); + + // Tell ROB to retire head instruction. This retires the head + // inst in the ROB without affecting any other stages. + rob->retireHead(); + + ++commitSquashedInsts; + + } else { + // Increment the total number of non-speculative instructions + // executed. + // Hack for now: it really shouldn't happen until after the + // commit is deemed to be successful, but this count is needed + // for syscalls. + cpu->funcExeInst++; + + // Try to commit the head instruction. + bool commit_success = commitHead(head_inst, num_committed); + + // Update what instruction we are looking at if the commit worked. + if (commit_success) { + ++num_committed; + + // Send back which instruction has been committed. + // @todo: Update this later when a wider pipeline is used. + // Hmm, can't really give a pointer here...perhaps the + // sequence number instead (copy). + toIEW->commitInfo.doneSeqNum = head_inst->seqNum; + + ++commitCommittedInsts; + + if (!head_inst->isNop()) { + cpu->instDone(); + } + } else { + break; + } + } + + // Update the pointer to read the next instruction in the ROB. + head_inst = rob->readHeadInst(); + } + + DPRINTF(CommitRate, "%i\n", num_committed); + n_committed_dist.sample(num_committed); +} + +template <class Impl> +bool +SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) +{ + // Make sure instruction is valid + assert(head_inst); + + // If the instruction is not executed yet, then it is a non-speculative + // or store inst. Signal backwards that it should be executed. + if (!head_inst->isExecuted()) { + // Keep this number correct. We have not yet actually executed + // and committed this instruction. + cpu->funcExeInst--; + + if (head_inst->isStore() || head_inst->isNonSpeculative()) { + DPRINTF(Commit, "Commit: Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + head_inst->readPC()); + + toIEW->commitInfo.nonSpecSeqNum = head_inst->seqNum; + + // Change the instruction so it won't try to commit again until + // it is executed. + head_inst->clearCanCommit(); + + ++commitNonSpecStalls; + + return false; + } else { + panic("Commit: Trying to commit un-executed instruction " + "of unknown type!\n"); + } + } + + // Now check if it's one of the special trap or barrier or + // serializing instructions. + if (head_inst->isThreadSync() || + head_inst->isSerializing() || + head_inst->isMemBarrier() || + head_inst->isWriteBarrier() ) + { + // Not handled for now. Mem barriers and write barriers are safe + // to simply let commit as memory accesses only happen once they + // reach the head of commit. Not sure about the other two. + panic("Serializing or barrier instructions" + " are not handled yet.\n"); + } + + // Check if the instruction caused a fault. If so, trap. + if (head_inst->getFault() != No_Fault) { + if (!head_inst->isNop()) { +#ifdef FULL_SYSTEM + cpu->trap(fault); +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", head_inst->getFault(), + head_inst->PC); +#endif // FULL_SYSTEM + } + } + + // Check if we're really ready to commit. If not then return false. + // I'm pretty sure all instructions should be able to commit if they've + // reached this far. For now leave this in as a check. + if(!rob->isHeadReady()) { + panic("Commit: Unable to commit head instruction!\n"); + return false; + } + + // If it's a branch, then send back branch prediction update info + // to the fetch stage. + // This should be handled in the iew stage if a mispredict happens... + + if (head_inst->isControl()) { + +#if 0 + toIEW->nextPC = head_inst->readPC(); + //Maybe switch over to BTB incorrect. + toIEW->btbMissed = head_inst->btbMiss(); + toIEW->target = head_inst->nextPC; + //Maybe also include global history information. + //This simple version will have no branch prediction however. +#endif + + ++commitCommittedBranches; + } + + +#if 0 + // Check if the instruction has a destination register. + // If so add the previous physical register of its logical register's + // destination to the free list through the time buffer. + for (int i = 0; i < head_inst->numDestRegs(); i++) + { + toIEW->commitInfo.freeRegs.push_back(head_inst->prevDestRegIdx(i)); + } +#endif + + // Explicit communication back to the LDSTQ that a load has been committed + // and can be removed from the LDSTQ. Stores don't need this because + // the LDSTQ will already have been told that a store has reached the head + // of the ROB. Consider including communication if it's a store as well + // to keep things orthagonal. + if (head_inst->isMemRef()) { + ++commitCommittedMemRefs; + if (head_inst->isLoad()) { + toIEW->commitInfo.commitIsLoad = true; + ++commitCommittedLoads; + } + } + + // Now that the instruction is going to be committed, finalize its + // trace data. + if (head_inst->traceData) { + head_inst->traceData->finalize(); + } + + //Finally clear the head ROB entry. + rob->retireHead(); + + // Return true to indicate that we have committed an instruction. + return true; +} + +template <class Impl> +void +SimpleCommit<Impl>::getInsts() +{ + ////////////////////////////////////// + // Handle ROB functions + ////////////////////////////////////// + + // Read any issued instructions and place them into the ROB. Do this + // prior to squashing to avoid having instructions in the ROB that + // don't get squashed properly. + int insts_to_process = min((int)renameWidth, fromRename->size); + + for (int inst_num = 0; + inst_num < insts_to_process; + ++inst_num) + { + if (!fromRename->insts[inst_num]->isSquashed()) { + DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n", + fromRename->insts[inst_num]->readPC()); + rob->insertInst(fromRename->insts[inst_num]); + } else { + DPRINTF(Commit, "Commit: Instruction %i PC %#x was " + "squashed, skipping.\n", + fromRename->insts[inst_num]->seqNum, + fromRename->insts[inst_num]->readPC()); + } + } +} + +template <class Impl> +void +SimpleCommit<Impl>::markCompletedInsts() +{ + // Grab completed insts out of the IEW instruction queue, and mark + // instructions completed within the ROB. + for (int inst_num = 0; + inst_num < iewWidth && fromIEW->insts[inst_num]; + ++inst_num) + { + DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n", + fromIEW->insts[inst_num]->readPC(), + fromIEW->insts[inst_num]->seqNum); + + // Mark the instruction as ready to commit. + fromIEW->insts[inst_num]->setCanCommit(); + } +} + +template <class Impl> +uint64_t +SimpleCommit<Impl>::readCommitPC() +{ + return rob->readHeadPC(); +} + +#endif // __COMMIT_IMPL_HH__ diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh new file mode 100644 index 000000000..6606aba29 --- /dev/null +++ b/cpu/beta_cpu/cpu_policy.hh @@ -0,0 +1,60 @@ +#ifndef __CPU_BETA_CPU_CPU_POLICY_HH__ +#define __CPU_BETA_CPU_CPU_POLICY_HH__ + +#include "cpu/beta_cpu/bpred_unit.hh" +#include "cpu/beta_cpu/inst_queue.hh" +#include "cpu/beta_cpu/regfile.hh" +#include "cpu/beta_cpu/free_list.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/rob.hh" +#include "cpu/beta_cpu/store_set.hh" +#include "cpu/beta_cpu/mem_dep_unit.hh" +#include "cpu/beta_cpu/ldstq.hh" + +#include "cpu/beta_cpu/fetch.hh" +#include "cpu/beta_cpu/decode.hh" +#include "cpu/beta_cpu/rename.hh" +#include "cpu/beta_cpu/iew.hh" +#include "cpu/beta_cpu/commit.hh" + +#include "cpu/beta_cpu/comm.hh" + +template<class Impl> +struct SimpleCPUPolicy +{ + typedef TwobitBPredUnit<Impl> BPredUnit; + typedef PhysRegFile<Impl> RegFile; + typedef SimpleFreeList FreeList; + typedef SimpleRenameMap RenameMap; + typedef ROB<Impl> ROB; + typedef InstructionQueue<Impl> IQ; + typedef MemDepUnit<StoreSet, Impl> MemDepUnit; + typedef LDSTQ<Impl> LDSTQ; + + typedef SimpleFetch<Impl> Fetch; + typedef SimpleDecode<Impl> Decode; + typedef SimpleRename<Impl> Rename; + typedef SimpleIEW<Impl, IQ> IEW; + typedef SimpleCommit<Impl> Commit; + + /** The struct for communication between fetch and decode. */ + typedef SimpleFetchSimpleDecode<Impl> FetchStruct; + + /** The struct for communication between decode and rename. */ + typedef SimpleDecodeSimpleRename<Impl> DecodeStruct; + + /** The struct for communication between rename and IEW. */ + typedef SimpleRenameSimpleIEW<Impl> RenameStruct; + + /** The struct for communication between IEW and commit. */ + typedef SimpleIEWSimpleCommit<Impl> IEWStruct; + + /** The struct for communication within the IEW stage. */ + typedef IssueStruct<Impl> IssueStruct; + + /** The struct for all backwards communication. */ + typedef TimeBufStruct TimeStruct; + +}; + +#endif //__CPU_BETA_CPU_CPU_POLICY_HH__ diff --git a/cpu/beta_cpu/decode.cc b/cpu/beta_cpu/decode.cc new file mode 100644 index 000000000..177293bca --- /dev/null +++ b/cpu/beta_cpu/decode.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/decode_impl.hh" + +template class SimpleDecode<AlphaSimpleImpl>; diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh new file mode 100644 index 000000000..dd18cf176 --- /dev/null +++ b/cpu/beta_cpu/decode.hh @@ -0,0 +1,145 @@ +// Todo: +// Add a couple of the branch fields to DynInst. Figure out where DynInst +// should try to compute the target of a PC-relative branch. Try to avoid +// having so many returns within the code. +// Fix up squashing too, as it's too +// dependent upon the iew stage continually telling it to squash. + +#ifndef __CPU_BETA_CPU_SIMPLE_DECODE_HH__ +#define __CPU_BETA_CPU_SIMPLE_DECODE_HH__ + +#include <queue> + +#include "base/statistics.hh" +#include "base/timebuf.hh" + +template<class Impl> +class SimpleDecode +{ + private: + // Typedefs from the Impl. + typedef typename Impl::ISA ISA; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; + + // Typedefs from the CPU policy. + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + // Typedefs from the ISA. + typedef typename ISA::Addr Addr; + + public: + // The only time decode will become blocked is if dispatch becomes + // blocked, which means IQ or ROB is probably full. + enum Status { + Running, + Idle, + Squashing, + Blocked, + Unblocking + }; + + private: + // May eventually need statuses on a per thread basis. + Status _status; + + public: + SimpleDecode(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr); + + void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr); + + void tick(); + + void decode(); + + // Might want to make squash a friend function. + void squash(); + + private: + void block(); + + inline void unblock(); + + void squash(DynInstPtr &inst); + + // Interfaces to objects outside of decode. + /** CPU interface. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get rename's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + // Might not be the best name as not only fetch will read it. + typename TimeBuffer<TimeStruct>::wire toFetch; + + /** Decode instruction queue. */ + TimeBuffer<DecodeStruct> *decodeQueue; + + /** Wire used to write any information heading to rename. */ + typename TimeBuffer<DecodeStruct>::wire toRename; + + /** Fetch instruction queue interface. */ + TimeBuffer<FetchStruct> *fetchQueue; + + /** Wire to get fetch's output from fetch queue. */ + typename TimeBuffer<FetchStruct>::wire fromFetch; + + /** Skid buffer between fetch and decode. */ + std::queue<FetchStruct> skidBuffer; + + private: + //Consider making these unsigned to avoid any confusion. + /** Rename to decode delay, in ticks. */ + unsigned renameToDecodeDelay; + + /** IEW to decode delay, in ticks. */ + unsigned iewToDecodeDelay; + + /** Commit to decode delay, in ticks. */ + unsigned commitToDecodeDelay; + + /** Fetch to decode delay, in ticks. */ + unsigned fetchToDecodeDelay; + + /** The width of decode, in instructions. */ + unsigned decodeWidth; + + /** The instruction that decode is currently on. It needs to have + * persistent state so that when a stall occurs in the middle of a + * group of instructions, it can restart at the proper instruction. + */ + unsigned numInst; + + Stats::Scalar<> decodeIdleCycles; + Stats::Scalar<> decodeBlockedCycles; + Stats::Scalar<> decodeUnblockCycles; + Stats::Scalar<> decodeSquashCycles; + Stats::Scalar<> decodeBranchMispred; + Stats::Scalar<> decodeControlMispred; + Stats::Scalar<> decodeDecodedInsts; + Stats::Scalar<> decodeSquashedInsts; +}; + +#endif // __CPU_BETA_CPU_SIMPLE_DECODE_HH__ diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh new file mode 100644 index 000000000..9d88f94ac --- /dev/null +++ b/cpu/beta_cpu/decode_impl.hh @@ -0,0 +1,391 @@ +#include "cpu/beta_cpu/decode.hh" + +template<class Impl> +SimpleDecode<Impl>::SimpleDecode(Params ¶ms) + : renameToDecodeDelay(params.renameToDecodeDelay), + iewToDecodeDelay(params.iewToDecodeDelay), + commitToDecodeDelay(params.commitToDecodeDelay), + fetchToDecodeDelay(params.fetchToDecodeDelay), + decodeWidth(params.decodeWidth), + numInst(0) +{ + DPRINTF(Decode, "Decode: decodeWidth=%i.\n", decodeWidth); + _status = Idle; +} + +template <class Impl> +void +SimpleDecode<Impl>::regStats() +{ + decodeIdleCycles + .name(name() + ".decodeIdleCycles") + .desc("Number of cycles decode is idle") + .prereq(decodeIdleCycles); + decodeBlockedCycles + .name(name() + ".decodeBlockedCycles") + .desc("Number of cycles decode is blocked") + .prereq(decodeBlockedCycles); + decodeUnblockCycles + .name(name() + ".decodeUnblockCycles") + .desc("Number of cycles decode is unblocking") + .prereq(decodeUnblockCycles); + decodeSquashCycles + .name(name() + ".decodeSquashCycles") + .desc("Number of cycles decode is squashing") + .prereq(decodeSquashCycles); + decodeBranchMispred + .name(name() + ".decodeBranchMispred") + .desc("Number of times decode detected a branch misprediction") + .prereq(decodeBranchMispred); + decodeControlMispred + .name(name() + ".decodeControlMispred") + .desc("Number of times decode detected an instruction incorrectly" + " predicted as a control") + .prereq(decodeControlMispred); + decodeDecodedInsts + .name(name() + ".decodeDecodedInsts") + .desc("Number of instructions handled by decode") + .prereq(decodeDecodedInsts); + decodeSquashedInsts + .name(name() + ".decodeSquashedInsts") + .desc("Number of squashed instructions handled by decode") + .prereq(decodeSquashedInsts); +} + +template<class Impl> +void +SimpleDecode<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Decode, "Decode: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template<class Impl> +void +SimpleDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(Decode, "Decode: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to write information back to fetch. + toFetch = timeBuffer->getWire(0); + + // Create wires to get information from proper places in time buffer. + fromRename = timeBuffer->getWire(-renameToDecodeDelay); + fromIEW = timeBuffer->getWire(-iewToDecodeDelay); + fromCommit = timeBuffer->getWire(-commitToDecodeDelay); +} + +template<class Impl> +void +SimpleDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr) +{ + DPRINTF(Decode, "Decode: Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to write information to proper place in decode queue. + toRename = decodeQueue->getWire(0); +} + +template<class Impl> +void +SimpleDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) +{ + DPRINTF(Decode, "Decode: Setting fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Setup wire to read information from fetch queue. + fromFetch = fetchQueue->getWire(-fetchToDecodeDelay); +} + +template<class Impl> +void +SimpleDecode<Impl>::block() +{ + DPRINTF(Decode, "Decode: Blocking.\n"); + + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromFetch); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template<class Impl> +inline void +SimpleDecode<Impl>::unblock() +{ + DPRINTF(Decode, "Decode: Unblocking, going to remove " + "instructions from skid buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toFetch->decodeInfo.stall = true; + } else { + DPRINTF(Decode, "Decode: Finished unblocking.\n"); + _status = Running; + } +} + +// This squash is specifically for when Decode detects a PC-relative branch +// was predicted incorrectly. +template<class Impl> +void +SimpleDecode<Impl>::squash(DynInstPtr &inst) +{ + DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction " + "detected at decode.\n"); + Addr new_PC = inst->readNextPC(); + + toFetch->decodeInfo.branchMispredict = true; + toFetch->decodeInfo.doneSeqNum = inst->seqNum; + toFetch->decodeInfo.predIncorrect = true; + toFetch->decodeInfo.squash = true; + toFetch->decodeInfo.nextPC = new_PC; + toFetch->decodeInfo.branchTaken = true; + + // Set status to squashing. + _status = Squashing; + + // Maybe advance the time buffer? Not sure what to do in the normal + // case. + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } +} + +template<class Impl> +void +SimpleDecode<Impl>::squash() +{ + DPRINTF(Decode, "Decode: Squashing.\n"); + // Set status to squashing. + _status = Squashing; + + // Maybe advance the time buffer? Not sure what to do in the normal + // case. + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } +} + +template<class Impl> +void +SimpleDecode<Impl>::tick() +{ + // Decode should try to execute as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + if (_status != Blocked && _status != Squashing) { + DPRINTF(Decode, "Decode: Not blocked, so attempting to run " + "stage.\n"); + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(_status == Unblocking ? !skidBuffer.empty() : 1); + + decode(); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + if (_status == Unblocking) { + ++decodeUnblockCycles; + + if (fromFetch->size > 0) { + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromFetch); + } + + unblock(); + } + } else if (_status == Blocked) { + ++decodeBlockedCycles; + + if (fromFetch->size > 0) { + block(); + } + + if (!fromRename->renameInfo.stall && + !fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall) { + DPRINTF(Decode, "Decode: Stall signals cleared, going to " + "unblock.\n"); + _status = Unblocking; + + // Continue to tell previous stage to block until this + // stage is done unblocking. + toFetch->decodeInfo.stall = true; + } else { + DPRINTF(Decode, "Decode: Still blocked.\n"); + toFetch->decodeInfo.stall = true; + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + } + } else if (_status == Squashing) { + ++decodeSquashCycles; + + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + _status = Running; + } else if (fromCommit->commitInfo.squash) { + squash(); + } + } +} + +template<class Impl> +void +SimpleDecode<Impl>::decode() +{ + // Check time buffer if being told to squash. + if (fromCommit->commitInfo.squash) { + squash(); + return; + } + + // Check time buffer if being told to stall. + if (fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + block(); + return; + } + + // Check fetch queue to see if instructions are available. + // If no available instructions, do nothing, unless this stage is + // currently unblocking. + if (fromFetch->size == 0 && _status != Unblocking) { + DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n"); + // Should I change the status to idle? + ++decodeIdleCycles; + return; + } + + // Might be better to use a base DynInst * instead? + DynInstPtr inst; + + unsigned to_rename_index = 0; + + int insts_available = _status == Unblocking ? + skidBuffer.front().size : + fromFetch->size; + + // Debug block... +#if 0 + if (insts_available) { + DPRINTF(Decode, "Decode: Instructions available.\n"); + } else { + if (_status == Unblocking && skidBuffer.empty()) { + DPRINTF(Decode, "Decode: No instructions available, skid buffer " + "empty.\n"); + } else if (_status != Unblocking && + !fromFetch->insts[0]) { + DPRINTF(Decode, "Decode: No instructions available, fetch queue " + "empty.\n"); + } else { + panic("Decode: No instructions available, unexpected condition!" + "\n"); + } + } +#endif + + while (insts_available > 0) + { + DPRINTF(Decode, "Decode: Sending instruction to rename.\n"); + + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : + fromFetch->insts[numInst]; + + DPRINTF(Decode, "Decode: Processing instruction %i with PC %#x\n", + inst->seqNum, inst->readPC()); + + if (inst->isSquashed()) { + DPRINTF(Decode, "Decode: Instruction %i with PC %#x is " + "squashed, skipping.\n", + inst->seqNum, inst->readPC()); + + ++decodeSquashedInsts; + + ++numInst; + --insts_available; + + continue; + } + + // This current instruction is valid, so add it into the decode + // queue. The next instruction may not be valid, so check to + // see if branches were predicted correctly. + toRename->insts[to_rename_index] = inst; + + ++(toRename->size); + + // Ensure that if it was predicted as a branch, it really is a + // branch. + if (inst->predTaken() && !inst->isControl()) { + panic("Instruction predicted as a branch!"); + + ++decodeControlMispred; + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst); + break; + } + + // Go ahead and compute any PC-relative branches. + + if (inst->isDirectCtrl() && inst->isUncondCtrl()) { + + inst->setNextPC(inst->branchTarget()); + + if (inst->mispredicted()) { + ++decodeBranchMispred; + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst); + break; + } + } + + // Normally can check if a direct branch has the right target + // addr (either the immediate, or the branch PC + 4) and redirect + // fetch if it's incorrect. + + + // Also check if instructions have no source registers. Mark + // them as ready to issue at any time. Not sure if this check + // should exist here or at a later stage; however it doesn't matter + // too much for function correctness. + // Isn't this handled by the inst queue? + if (inst->numSrcRegs() == 0) { + inst->setCanIssue(); + } + + // Increment which instruction we're looking at. + ++numInst; + ++to_rename_index; + ++decodeDecodedInsts; + + --insts_available; + } + + numInst = 0; +} diff --git a/cpu/beta_cpu/fetch.cc b/cpu/beta_cpu/fetch.cc new file mode 100644 index 000000000..877262750 --- /dev/null +++ b/cpu/beta_cpu/fetch.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/fetch_impl.hh" + +template class SimpleFetch<AlphaSimpleImpl>; diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh new file mode 100644 index 000000000..7a3893708 --- /dev/null +++ b/cpu/beta_cpu/fetch.hh @@ -0,0 +1,210 @@ +// Todo: add in statistics, only get the MachInst and let decode actually +// decode, think about SMT fetch, +// fix up branch prediction stuff into one thing, +// Figure out where to advance time buffer. Add a way to get a +// stage's current status. + +#ifndef __CPU_BETA_CPU_SIMPLE_FETCH_HH__ +#define __CPU_BETA_CPU_SIMPLE_FETCH_HH__ + +//Will want to include: time buffer, structs, MemInterface, Event, +//whatever class bzero uses, MemReqPtr + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/pc_event.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" + +/** + * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch + * will stall if there's an Icache miss, but otherwise assumes a one cycle + * Icache hit. + */ + +template <class Impl> +class SimpleFetch +{ + public: + /** Typedefs from Impl. */ + typedef typename Impl::ISA ISA; + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename CPUPol::BPredUnit BPredUnit; + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + /** Typedefs from ISA. */ + typedef typename ISA::MachInst MachInst; + + public: + enum Status { + Running, + Idle, + Squashing, + Blocked, + IcacheMissStall, + IcacheMissComplete + }; + + // May eventually need statuses on a per thread basis. + Status _status; + + bool stalled; + + public: + /** SimpleFetch constructor. */ + SimpleFetch(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer); + + void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr); + + void tick(); + + void fetch(); + + void processCacheCompletion(); + +// private: + // Figure out PC vs next PC and how it should be updated + void squash(const Addr &new_PC); + + private: + inline void doSquash(const Addr &new_PC); + + void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num); + + /** + * Looks up in the branch predictor to see if the next PC should be + * either next PC+=MachInst or a branch target. + * @params next_PC Next PC variable passed in by reference. It is + * expected to be set to the current PC; it will be updated with what + * the next PC will be. + * @return Whether or not a branch was predicted as taken. + */ + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC); + + // Might not want this function... +// inline void recordGlobalHist(DynInstPtr &inst); + + /** + * Fetches the cache line that contains fetch_PC. Returns any + * fault that happened. Puts the data into the class variable + * cacheData. + * @params fetch_PC The PC address that is being fetched from. + * @return Any fault that occured. + */ + Fault fetchCacheLine(Addr fetch_PC); + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = ISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + public: + class CacheCompletionEvent : public Event + { + private: + SimpleFetch *fetch; + + public: + CacheCompletionEvent(SimpleFetch *_fetch); + + virtual void process(); + virtual const char *description(); + }; + +// CacheCompletionEvent cacheCompletionEvent; + + private: + /** Pointer to the FullCPU. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get decode's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromDecode; + + /** Wire to get rename's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Internal fetch instruction queue. */ + TimeBuffer<FetchStruct> *fetchQueue; + + //Might be annoying how this name is different than the queue. + /** Wire used to write any information heading to decode. */ + typename TimeBuffer<FetchStruct>::wire toDecode; + + /** Icache interface. */ + MemInterface *icacheInterface; + + /** BPredUnit. */ + BPredUnit branchPred; + + /** Memory request used to access cache. */ + MemReqPtr memReq; + + /** Decode to fetch delay, in ticks. */ + unsigned decodeToFetchDelay; + + /** Rename to fetch delay, in ticks. */ + unsigned renameToFetchDelay; + + /** IEW to fetch delay, in ticks. */ + unsigned iewToFetchDelay; + + /** Commit to fetch delay, in ticks. */ + unsigned commitToFetchDelay; + + /** The width of fetch in instructions. */ + unsigned fetchWidth; + + /** Cache block size. */ + int cacheBlkSize; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + /** The instruction being fetched. */ +// MachInst inst; + + /** The cache line being fetched. */ + uint8_t *cacheData; + + /** Size of instructions. */ + int instSize; + + /** Icache stall statistics. */ + Counter lastIcacheStall; + + Stats::Scalar<> icacheStallCycles; + Stats::Scalar<> fetchedInsts; + Stats::Scalar<> predictedBranches; + Stats::Scalar<> fetchCycles; + Stats::Scalar<> fetchSquashCycles; + Stats::Scalar<> fetchBlockedCycles; + Stats::Scalar<> fetchedCacheLines; + + Stats::Distribution<> fetch_nisn_dist; +}; + +#endif //__CPU_BETA_CPU_SIMPLE_FETCH_HH__ diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh new file mode 100644 index 000000000..90caf9ffe --- /dev/null +++ b/cpu/beta_cpu/fetch_impl.hh @@ -0,0 +1,599 @@ +// Remove this later; used only for debugging. +#define OPCODE(X) (X >> 26) & 0x3f + + +#include "arch/alpha/byte_swap.hh" +#include "cpu/exetrace.hh" +#include "mem/base_mem.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "cpu/beta_cpu/fetch.hh" + +#include "sim/universe.hh" + +template<class Impl> +SimpleFetch<Impl>::CacheCompletionEvent +::CacheCompletionEvent(SimpleFetch *_fetch) + : Event(&mainEventQueue), + fetch(_fetch) +{ +} + +template<class Impl> +void +SimpleFetch<Impl>::CacheCompletionEvent::process() +{ + fetch->processCacheCompletion(); +} + +template<class Impl> +const char * +SimpleFetch<Impl>::CacheCompletionEvent::description() +{ + return "SimpleFetch cache completion event"; +} + +template<class Impl> +SimpleFetch<Impl>::SimpleFetch(Params ¶ms) + : //cacheCompletionEvent(this), + icacheInterface(params.icacheInterface), + branchPred(params), + decodeToFetchDelay(params.decodeToFetchDelay), + renameToFetchDelay(params.renameToFetchDelay), + iewToFetchDelay(params.iewToFetchDelay), + commitToFetchDelay(params.commitToFetchDelay), + fetchWidth(params.fetchWidth) +{ + // Set status to idle. + _status = Idle; + + // Create a new memory request. + memReq = new MemReq(); + // Not sure of this parameter. I think it should be based on the + // thread number. +#ifndef FULL_SYSTEM + memReq->asid = params.asid; +#else + memReq->asid = 0; +#endif // FULL_SYSTEM + memReq->data = new uint8_t[64]; + + // Size of cache block. + cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + // Create mask to get rid of offset bits. + cacheBlkMask = (cacheBlkSize - 1); + + // Get the size of an instruction. + instSize = sizeof(MachInst); + + // Create space to store a cache line. + cacheData = new uint8_t[cacheBlkSize]; +} + +template <class Impl> +void +SimpleFetch<Impl>::regStats() +{ + icacheStallCycles + .name(name() + ".icacheStallCycles") + .desc("Number of cycles fetch is stalled on an Icache miss") + .prereq(icacheStallCycles); + + fetchedInsts + .name(name() + ".fetchedInsts") + .desc("Number of instructions fetch has processed") + .prereq(fetchedInsts); + predictedBranches + .name(name() + ".predictedBranches") + .desc("Number of branches that fetch has predicted taken") + .prereq(predictedBranches); + fetchCycles + .name(name() + ".fetchCycles") + .desc("Number of cycles fetch has run and was not squashing or" + " blocked") + .prereq(fetchCycles); + fetchSquashCycles + .name(name() + ".fetchSquashCycles") + .desc("Number of cycles fetch has spent squashing") + .prereq(fetchSquashCycles); + fetchBlockedCycles + .name(name() + ".fetchBlockedCycles") + .desc("Number of cycles fetch has spent blocked") + .prereq(fetchBlockedCycles); + fetchedCacheLines + .name(name() + ".fetchedCacheLines") + .desc("Number of cache lines fetched") + .prereq(fetchedCacheLines); + + fetch_nisn_dist + .init(/* base value */ 0, + /* last value */ fetchWidth, + /* bucket size */ 1) + .name(name() + ".FETCH:rate_dist") + .desc("Number of instructions fetched each cycle (Total)") + .flags(Stats::pdf) + ; + + branchPred.regStats(); +} + +template<class Impl> +void +SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n"); + cpu = cpu_ptr; + // This line will be removed eventually. + memReq->xc = cpu->xcBase(); +} + +template<class Impl> +void +SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) +{ + DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n"); + timeBuffer = time_buffer; + + // Create wires to get information from proper places in time buffer. + fromDecode = timeBuffer->getWire(-decodeToFetchDelay); + fromRename = timeBuffer->getWire(-renameToFetchDelay); + fromIEW = timeBuffer->getWire(-iewToFetchDelay); + fromCommit = timeBuffer->getWire(-commitToFetchDelay); +} + +template<class Impl> +void +SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) +{ + DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Create wire to write information to proper place in fetch queue. + toDecode = fetchQueue->getWire(0); +} + +template<class Impl> +void +SimpleFetch<Impl>::processCacheCompletion() +{ + DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n"); + + // Only change the status if it's still waiting on the icache access + // to return. + // Can keep track of how many cache accesses go unused due to + // misspeculation here. + // How to handle an outstanding miss which gets cancelled due to squash, + // then a new icache miss gets scheduled? + if (_status == IcacheMissStall) + _status = IcacheMissComplete; +} + +#if 0 +template <class Impl> +inline void +SimpleFetch<Impl>::recordGlobalHist(DynInstPtr &inst) +{ + inst->setGlobalHist(branchPred.BPReadGlobalHist()); +} +#endif + +template <class Impl> +bool +SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) +{ + // Do branch prediction check here. + // A bit of a misnomer...next_PC is actually the current PC until + // this function updates it. + bool predict_taken; + + if (!inst->isControl()) { + next_PC = next_PC + instSize; + inst->setPredTarg(next_PC); + return false; + } + + predict_taken = branchPred.predict(inst, next_PC); + + if (predict_taken) { + ++predictedBranches; + } + + return predict_taken; +} + +template <class Impl> +Fault +SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC) +{ + // Check if the instruction exists within the cache. + // If it does, then proceed on to read the instruction and the rest + // of the instructions in the cache line until either the end of the + // cache line or a predicted taken branch is encountered. + +#ifdef FULL_SYSTEM + // Flag to say whether or not address is physical addr. + unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + + Fault fault = No_Fault; + + // Align the fetch PC so it's at the start of a cache block. + fetch_PC = icacheBlockAlignPC(fetch_PC); + + // Setup the memReq to do a read of the first isntruction's address. + // Set the appropriate read size and flags as well. + memReq->cmd = Read; + memReq->reset(fetch_PC, cacheBlkSize, flags); + + // Translate the instruction request. + // Should this function be + // in the CPU class ? Probably...ITB/DTB should exist within the + // CPU. + + fault = cpu->translateInstReq(memReq); + + // In the case of faults, the fetch stage may need to stall and wait + // on what caused the fetch (ITB or Icache miss). + + // If translation was successful, attempt to read the first + // instruction. + if (fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); + fault = cpu->mem->read(memReq, cacheData); + // This read may change when the mem interface changes. + + fetchedCacheLines++; + } + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (icacheInterface && fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Doing timing memory access.\n"); + memReq->completionEvent = NULL; + + memReq->time = curTick; + + MemAccessResult result = icacheInterface->access(memReq); + + // If the cache missed (in this model functional and timing + // memories are different), then schedule an event to wake + // up this stage once the cache miss completes. + if (result != MA_HIT && icacheInterface->doEvents()) { + memReq->completionEvent = new CacheCompletionEvent(this); +// lastIcacheStall = curTick; + + // How does current model work as far as individual + // stages scheduling/unscheduling? + // Perhaps have only the main CPU scheduled/unscheduled, + // and have it choose what stages to run appropriately. + + DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n"); + _status = IcacheMissStall; + } + } + + return fault; +} + +template <class Impl> +inline void +SimpleFetch<Impl>::doSquash(const Addr &new_PC) +{ + DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); + + cpu->setNextPC(new_PC + instSize); + cpu->setPC(new_PC); + + // Clear the icache miss if it's outstanding. + if (_status == IcacheMissStall && icacheInterface) { + DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n"); + // @todo: Use an actual thread number here. + icacheInterface->squash(0); + } + + _status = Squashing; + + ++fetchSquashCycles; +} + +template<class Impl> +void +SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC, + const InstSeqNum &seq_num) +{ + DPRINTF(Fetch, "Fetch: Squashing from decode.\n"); + + doSquash(new_PC); + + // Tell the CPU to remove any instructions that are in flight between + // fetch and decode. + cpu->removeInstsUntil(seq_num); + +} + +template <class Impl> +void +SimpleFetch<Impl>::squash(const Addr &new_PC) +{ + DPRINTF(Fetch, "Fetch: Squash from commit.\n"); + + doSquash(new_PC); + + // Tell the CPU to remove any instructions that are not in the ROB. + cpu->removeInstsNotInROB(); +} + +template<class Impl> +void +SimpleFetch<Impl>::tick() +{ + // Check squash signals from commit. + if (fromCommit->commitInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from commit.\n"); + + // In any case, squash. + squash(fromCommit->commitInfo.nextPC); + + // Also check if there's a mispredict that happened. + if (fromCommit->commitInfo.branchMispredict) { + branchPred.squash(fromCommit->commitInfo.doneSeqNum, + fromCommit->commitInfo.nextPC, + fromCommit->commitInfo.branchTaken); + } else { + branchPred.squash(fromCommit->commitInfo.doneSeqNum); + } + + return; + } else if (fromCommit->commitInfo.doneSeqNum) { + // Update the branch predictor if it wasn't a squashed instruction + // that was braodcasted. + branchPred.update(fromCommit->commitInfo.doneSeqNum); + } + + // Check ROB squash signals from commit. + if (fromCommit->commitInfo.robSquashing) { + DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); + + // Continue to squash. + _status = Squashing; + + ++fetchSquashCycles; + return; + } + + // Check squash signals from decode. + if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + + // Update the branch predictor. + if (fromDecode->decodeInfo.branchMispredict) { + branchPred.squash(fromDecode->decodeInfo.doneSeqNum, + fromDecode->decodeInfo.nextPC, + fromDecode->decodeInfo.branchTaken); + } else { + branchPred.squash(fromDecode->decodeInfo.doneSeqNum); + } + + if (_status != Squashing) { + // Squash unless we're already squashing? + squashFromDecode(fromDecode->decodeInfo.nextPC, + fromDecode->decodeInfo.doneSeqNum); + return; + } + } + + // Check if any of the stall signals are high. + if (fromDecode->decodeInfo.stall || + fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + // Block stage, regardless of current status. + + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); + DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " + "Commit: %i\n", + fromDecode->decodeInfo.stall, + fromRename->renameInfo.stall, + fromIEW->iewInfo.stall, + fromCommit->commitInfo.stall); + + _status = Blocked; + + ++fetchBlockedCycles; + return; + } else if (_status == Blocked) { + // Unblock stage if status is currently blocked and none of the + // stall signals are being held high. + _status = Running; + + ++fetchBlockedCycles; + return; + } + + // If fetch has reached this point, then there are no squash signals + // still being held high. Check if fetch is in the squashing state; + // if so, fetch can switch to running. + // Similarly, there are no blocked signals still being held high. + // Check if fetch is in the blocked state; if so, fetch can switch to + // running. + if (_status == Squashing) { + DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n"); + + // Switch status to running + _status = Running; + + ++fetchSquashCycles; + } else if (_status != IcacheMissStall) { + DPRINTF(Fetch, "Fetch: Running stage.\n"); + + ++fetchCycles; + + fetch(); + } +} + +template<class Impl> +void +SimpleFetch<Impl>::fetch() +{ + ////////////////////////////////////////// + // Start actual fetch + ////////////////////////////////////////// + + // The current PC. + Addr fetch_PC = cpu->readPC(); + + // Fault code for memory access. + Fault fault = No_Fault; + + // If returning from the delay of a cache miss, then update the status + // to running, otherwise do the cache access. Possibly move this up + // to tick() function. + if (_status == IcacheMissComplete) { + DPRINTF(Fetch, "Fetch: Icache miss is complete.\n"); + + // Reset the completion event to NULL. + memReq->completionEvent = NULL; + + _status = Running; + } else { + DPRINTF(Fetch, "Fetch: Attempting to translate and read " + "instruction, starting at PC %08p.\n", + fetch_PC); + + fault = fetchCacheLine(fetch_PC); + } + + // If we had a stall due to an icache miss, then return. It'd + // be nicer if this were handled through the kind of fault that + // is returned by the function. + if (_status == IcacheMissStall) { + return; + } + + // As far as timing goes, the CPU will need to send an event through + // the MemReq in order to be woken up once the memory access completes. + // Probably have a status on a per thread basis so each thread can + // block independently and be woken up independently. + + Addr next_PC = fetch_PC; + InstSeqNum inst_seq; + MachInst inst; + unsigned offset = fetch_PC & cacheBlkMask; + unsigned fetched; + + if (fault == No_Fault) { + // If the read of the first instruction was successful, then grab the + // instructions from the rest of the cache line and put them into the + // queue heading to decode. + + DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); + + ////////////////////////// + // Fetch first instruction + ////////////////////////// + + // Need to keep track of whether or not a predicted branch + // ended this fetch block. + bool predicted_branch = false; + + for (fetched = 0; + offset < cacheBlkSize && + fetched < fetchWidth && + !predicted_branch; + ++fetched) + { + + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); + + // Make sure this is a valid index. + assert(offset <= cacheBlkSize - instSize); + + // Get the instruction from the array of the cache line. + inst = htoa(*reinterpret_cast<MachInst *> + (&cacheData[offset])); + + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC, + inst_seq, cpu); + + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", + inst_seq, instruction->readPC()); + + DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", + OPCODE(inst)); + + instruction->traceData = + Trace::getInstRecord(curTick, cpu->xcBase(), cpu, + instruction->staticInst, + instruction->readPC(), 0); + + predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); + + // Add instruction to the CPU's list of instructions. + cpu->addInst(instruction); + + // Write the instruction to the first slot in the queue + // that heads to decode. + toDecode->insts[fetched] = instruction; + + toDecode->size++; + + // Increment stat of fetched instructions. + ++fetchedInsts; + + // Move to the next instruction, unless we have a branch. + fetch_PC = next_PC; + + offset+= instSize; + } + + fetch_nisn_dist.sample(fetched); + } + + // Now that fetching is completed, update the PC to signify what the next + // cycle will be. Might want to move this to the beginning of this + // function so that the PC updates at the beginning of everything. + // Or might want to leave setting the PC to the main CPU, with fetch + // only changing the nextPC (will require correct determination of + // next PC). + if (fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC); + cpu->setPC(next_PC); + cpu->setNextPC(next_PC + instSize); + } else { + // If the issue was an icache miss, then we can just return and + // wait until it is handled. + if (_status == IcacheMissStall) { + return; + } + + // Handle the fault. + // This stage will not be able to continue until all the ROB + // slots are empty, at which point the fault can be handled. + // The only other way it can wake up is if a squash comes along + // and changes the PC. Not sure how to handle that case...perhaps + // have it handled by the upper level CPU class which peeks into the + // time buffer and sees if a squash comes along, in which case it + // changes the status. + + DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n"); + + _status = Blocked; +#ifdef FULL_SYSTEM +// cpu->trap(fault); + // Send a signal to the ROB indicating that there's a trap from the + // fetch stage that needs to be handled. Need to indicate that + // there's a fault, and the fault type. +#else // !FULL_SYSTEM + fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC()); +#endif // FULL_SYSTEM + } +} diff --git a/cpu/beta_cpu/free_list.cc b/cpu/beta_cpu/free_list.cc new file mode 100644 index 000000000..542b87471 --- /dev/null +++ b/cpu/beta_cpu/free_list.cc @@ -0,0 +1,54 @@ +#include "base/trace.hh" + +#include "cpu/beta_cpu/free_list.hh" + +SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs) +{ + DPRINTF(FreeList, "FreeList: Creating new free list object.\n"); + + // DEBUG stuff. + freeIntRegsScoreboard.resize(numPhysicalIntRegs); + + freeFloatRegsScoreboard.resize(numPhysicalRegs); + + for (PhysRegIndex i = 0; i < numLogicalIntRegs; ++i) { + freeIntRegsScoreboard[i] = 0; + } + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. + for (PhysRegIndex i = numLogicalIntRegs; + i < numPhysicalIntRegs; ++i) + { + freeIntRegs.push(i); + + freeIntRegsScoreboard[i] = 1; + } + + for (PhysRegIndex i = 0; i < numPhysicalIntRegs + numLogicalFloatRegs; + ++i) + { + freeFloatRegsScoreboard[i] = 0; + } + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. Because the + // float registers' indices start where the physical registers end, + // some math must be done to determine where the free registers start. + for (PhysRegIndex i = numPhysicalIntRegs + numLogicalFloatRegs; + i < numPhysicalRegs; ++i) + { + freeFloatRegs.push(i); + + freeFloatRegsScoreboard[i] = 1; + } +} + diff --git a/cpu/beta_cpu/free_list.hh b/cpu/beta_cpu/free_list.hh new file mode 100644 index 000000000..e8e75f7ec --- /dev/null +++ b/cpu/beta_cpu/free_list.hh @@ -0,0 +1,169 @@ +#ifndef __FREE_LIST_HH__ +#define __FREE_LIST_HH__ + +#include <iostream> +#include <queue> + +#include "arch/alpha/isa_traits.hh" +#include "cpu/beta_cpu/comm.hh" +#include "base/traceflags.hh" +#include "base/trace.hh" + +/** + * FreeList class that simply holds the list of free integer and floating + * point registers. Can request for a free register of either type, and + * also send back free registers of either type. This is a very simple + * class, but it should be sufficient for most implementations. Like all + * other classes, it assumes that the indices for the floating point + * registers starts after the integer registers end. Hence the variable + * numPhysicalIntRegs is logically equivalent to the baseFP dependency. + * Note that + * while this most likely should be called FreeList, the name "FreeList" + * is used in a typedef within the CPU Policy, and therefore no class + * can be named simply "FreeList". + * @todo: Give a better name to the base FP dependency. + */ +class SimpleFreeList +{ + public: + + private: + /** The list of free integer registers. */ + std::queue<PhysRegIndex> freeIntRegs; + + /** The list of free floating point registers. */ + std::queue<PhysRegIndex> freeFloatRegs; + + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Total number of physical registers. */ + int numPhysicalRegs; + + /** DEBUG stuff below. */ + std::vector<int> freeIntRegsScoreboard; + + std::vector<bool> freeFloatRegsScoreboard; + + public: + SimpleFreeList(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs); + + PhysRegIndex getIntReg(); + + PhysRegIndex getFloatReg(); + + void addReg(PhysRegIndex freed_reg); + + void addIntReg(PhysRegIndex freed_reg); + + void addFloatReg(PhysRegIndex freed_reg); + + bool hasFreeIntRegs() + { return !freeIntRegs.empty(); } + + bool hasFreeFloatRegs() + { return !freeFloatRegs.empty(); } + + int numFreeIntRegs() + { return freeIntRegs.size(); } + + int numFreeFloatRegs() + { return freeFloatRegs.size(); } +}; + +inline PhysRegIndex +SimpleFreeList::getIntReg() +{ + DPRINTF(Rename, "FreeList: Trying to get free integer register.\n"); + if (freeIntRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeIntRegs.front(); + + freeIntRegs.pop(); + + // DEBUG + assert(freeIntRegsScoreboard[free_reg]); + freeIntRegsScoreboard[free_reg] = 0; + + return(free_reg); +} + +inline PhysRegIndex +SimpleFreeList::getFloatReg() +{ + DPRINTF(Rename, "FreeList: Trying to get free float register.\n"); + if (freeFloatRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeFloatRegs.front(); + + freeFloatRegs.pop(); + + // DEBUG + assert(freeFloatRegsScoreboard[free_reg]); + freeFloatRegsScoreboard[free_reg] = 0; + + return(free_reg); +} + +inline void +SimpleFreeList::addReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing register %i.\n", freed_reg); + //Might want to add in a check for whether or not this register is + //already in there. A bit vector or something similar would be useful. + if (freed_reg < numPhysicalIntRegs) { + freeIntRegs.push(freed_reg); + + // DEBUG + assert(freeIntRegsScoreboard[freed_reg] == false); + freeIntRegsScoreboard[freed_reg] = 1; + } else if (freed_reg < numPhysicalRegs) { + freeFloatRegs.push(freed_reg); + + // DEBUG + assert(freeFloatRegsScoreboard[freed_reg] == false); + freeFloatRegsScoreboard[freed_reg] = 1; + } +} + +inline void +SimpleFreeList::addIntReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing int register %i.\n", freed_reg); + + // DEBUG + assert(!freeIntRegsScoreboard[freed_reg]); + freeIntRegsScoreboard[freed_reg] = 1; + + freeIntRegs.push(freed_reg); +} + +inline void +SimpleFreeList::addFloatReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing float register %i.\n", freed_reg); + + // DEBUG + assert(!freeFloatRegsScoreboard[freed_reg]); + freeFloatRegsScoreboard[freed_reg] = 1; + + freeFloatRegs.push(freed_reg); +} + +#endif // __FREE_LIST_HH__ diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc new file mode 100644 index 000000000..d5228601c --- /dev/null +++ b/cpu/beta_cpu/full_cpu.cc @@ -0,0 +1,520 @@ +#ifndef __SIMPLE_FULL_CPU_CC__ +#define __SIMPLE_FULL_CPU_CC__ + +#ifdef FULL_SYSTEM +#include "sim/system.hh" +#else +#include "sim/process.hh" +#endif +#include "sim/universe.hh" + +#include "cpu/exec_context.hh" +#include "cpu/beta_cpu/full_cpu.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +using namespace std; + +#ifdef FULL_SYSTEM +BaseFullCPU::BaseFullCPU(Params ¶ms) + : BaseCPU(params.name, params.numberOfThreads, + params.maxInstsAnyThread, params.maxInstsAllThreads, + params.maxLoadsAnyThread, params.maxLoadsAllThreads, + params._system, params.freq) +{ +} +#else +BaseFullCPU::BaseFullCPU(Params ¶ms) + : BaseCPU(params.name, params.numberOfThreads, + params.maxInstsAnyThread, params.maxInstsAllThreads, + params.maxLoadsAnyThread, params.maxLoadsAllThreads) +{ +} +#endif // FULL_SYSTEM + +template <class Impl> +FullBetaCPU<Impl>::TickEvent::TickEvent(FullBetaCPU<Impl> *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +template <class Impl> +void +FullBetaCPU<Impl>::TickEvent::process() +{ + cpu->tick(); +} + +template <class Impl> +const char * +FullBetaCPU<Impl>::TickEvent::description() +{ + return "FullBetaCPU tick event"; +} + +//Call constructor to all the pipeline stages here +template <class Impl> +FullBetaCPU<Impl>::FullBetaCPU(Params ¶ms) +#ifdef FULL_SYSTEM + : BaseFullCPU(params), +#else + : BaseFullCPU(params), +#endif // FULL_SYSTEM + tickEvent(this), + fetch(params), + decode(params), + rename(params), + iew(params), + commit(params), + + regFile(params.numPhysIntRegs, params.numPhysFloatRegs), + + freeList(Impl::ISA::NumIntRegs, params.numPhysIntRegs, + Impl::ISA::NumFloatRegs, params.numPhysFloatRegs), + + renameMap(Impl::ISA::NumIntRegs, params.numPhysIntRegs, + Impl::ISA::NumFloatRegs, params.numPhysFloatRegs, + Impl::ISA::NumMiscRegs, + Impl::ISA::ZeroReg, + Impl::ISA::ZeroReg + Impl::ISA::NumIntRegs), + + rob(params.numROBEntries, params.squashWidth), + + // What to pass to these time buffers? + // For now just have these time buffers be pretty big. + timeBuffer(5, 5), + fetchQueue(5, 5), + decodeQueue(5, 5), + renameQueue(5, 5), + iewQueue(5, 5), + + xc(NULL), + + globalSeqNum(1), + +#ifdef FULL_SYSTEM + system(params.system), + memCtrl(system->memCtrl), + physmem(system->physmem), + itb(params.itb), + dtb(params.dtb), + mem(params.mem), +#else + process(params.process), + asid(params.asid), + mem(process->getMemory()), +#endif // FULL_SYSTEM + + icacheInterface(params.icacheInterface), + dcacheInterface(params.dcacheInterface), + deferRegistration(params.defReg), + numInsts(0), + funcExeInst(0) +{ + _status = Idle; +#ifdef FULL_SYSTEM + xc = new ExecContext(this, 0, system, itb, dtb, mem); + + // initialize CPU, including PC + TheISA::initCPU(&xc->regs); +#else + DPRINTF(FullCPU, "FullCPU: Process's starting PC is %#x, process is %#x", + process->prog_entry, process); + xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0); + + assert(process->getMemory() != NULL); + assert(mem != NULL); +#endif // !FULL_SYSTEM + execContexts.push_back(xc); + + // The stages also need their CPU pointer setup. However this must be + // done at the upper level CPU because they have pointers to the upper + // level CPU, and not this FullBetaCPU. + + // Give each of the stages the time buffer they will use. + fetch.setTimeBuffer(&timeBuffer); + decode.setTimeBuffer(&timeBuffer); + rename.setTimeBuffer(&timeBuffer); + iew.setTimeBuffer(&timeBuffer); + commit.setTimeBuffer(&timeBuffer); + + // Also setup each of the stages' queues. + fetch.setFetchQueue(&fetchQueue); + decode.setFetchQueue(&fetchQueue); + decode.setDecodeQueue(&decodeQueue); + rename.setDecodeQueue(&decodeQueue); + rename.setRenameQueue(&renameQueue); + iew.setRenameQueue(&renameQueue); + iew.setIEWQueue(&iewQueue); + commit.setIEWQueue(&iewQueue); + commit.setRenameQueue(&renameQueue); + + // Setup the rename map for whichever stages need it. + rename.setRenameMap(&renameMap); + iew.setRenameMap(&renameMap); + + // Setup the free list for whichever stages need it. + rename.setFreeList(&freeList); + renameMap.setFreeList(&freeList); + + // Setup the ROB for whichever stages need it. + commit.setROB(&rob); +} + +template <class Impl> +FullBetaCPU<Impl>::~FullBetaCPU() +{ +} + +template <class Impl> +void +FullBetaCPU<Impl>::fullCPURegStats() +{ + // Register any of the FullCPU's stats here. +} + +template <class Impl> +void +FullBetaCPU<Impl>::tick() +{ + DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullBetaCPU.\n"); + + //Tick each of the stages if they're actually running. + //Will want to figure out a way to unschedule itself if they're all + //going to be idle for a long time. + fetch.tick(); + + decode.tick(); + + rename.tick(); + + iew.tick(); + + commit.tick(); + + // Now advance the time buffers, unless the stage is stalled. + timeBuffer.advance(); + + fetchQueue.advance(); + decodeQueue.advance(); + renameQueue.advance(); + iewQueue.advance(); + + if (_status == Running && !tickEvent.scheduled()) + tickEvent.schedule(curTick + 1); +} + +template <class Impl> +void +FullBetaCPU<Impl>::init() +{ + if(!deferRegistration) + { + this->registerExecContexts(); + + // Need to do a copy of the xc->regs into the CPU's regfile so + // that it can start properly. + + // First loop through the integer registers. + for (int i = 0; i < Impl::ISA::NumIntRegs; ++i) + { + regFile.intRegFile[i] = xc->regs.intRegFile[i]; + } + + // Then loop through the floating point registers. + for (int i = 0; i < Impl::ISA::NumFloatRegs; ++i) + { + regFile.floatRegFile[i].d = xc->regs.floatRegFile.d[i]; + regFile.floatRegFile[i].q = xc->regs.floatRegFile.q[i]; + } + + // Then loop through the misc registers. + regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + + // Then finally set the PC and the next PC. + regFile.pc = xc->regs.pc; + regFile.npc = xc->regs.npc; + } +} + +template <class Impl> +void +FullBetaCPU<Impl>::activateContext(int thread_num, int delay) +{ + // Needs to set each stage to running as well. + + scheduleTickEvent(delay); + + _status = Running; +} + +template <class Impl> +void +FullBetaCPU<Impl>::suspendContext(int thread_num) +{ + panic("suspendContext unimplemented!"); +} + +template <class Impl> +void +FullBetaCPU<Impl>::deallocateContext(int thread_num) +{ + panic("deallocateContext unimplemented!"); +} + +template <class Impl> +void +FullBetaCPU<Impl>::haltContext(int thread_num) +{ + panic("haltContext unimplemented!"); +} + +template <class Impl> +void +FullBetaCPU<Impl>::switchOut() +{ + panic("FullBetaCPU does not have a switch out function.\n"); +} + +template <class Impl> +void +FullBetaCPU<Impl>::takeOverFrom(BaseCPU *oldCPU) +{ + BaseCPU::takeOverFrom(oldCPU); + + assert(!tickEvent.scheduled()); + + // Set all status's to active, schedule the + // CPU's tick event. + tickEvent.schedule(curTick); + for (int i = 0; i < execContexts.size(); ++i) { + execContexts[i]->activate(); + } + + // Switch out the other CPU. + oldCPU->switchOut(); +} + +template <class Impl> +InstSeqNum +FullBetaCPU<Impl>::getAndIncrementInstSeq() +{ + // Hopefully this works right. + return globalSeqNum++; +} + +template <class Impl> +uint64_t +FullBetaCPU<Impl>::readIntReg(int reg_idx) +{ + return regFile.readIntReg(reg_idx); +} + +template <class Impl> +float +FullBetaCPU<Impl>::readFloatRegSingle(int reg_idx) +{ + return regFile.readFloatRegSingle(reg_idx); +} + +template <class Impl> +double +FullBetaCPU<Impl>::readFloatRegDouble(int reg_idx) +{ + return regFile.readFloatRegDouble(reg_idx); +} + +template <class Impl> +uint64_t +FullBetaCPU<Impl>::readFloatRegInt(int reg_idx) +{ + return regFile.readFloatRegInt(reg_idx); +} + +template <class Impl> +void +FullBetaCPU<Impl>::setIntReg(int reg_idx, uint64_t val) +{ + regFile.setIntReg(reg_idx, val); +} + +template <class Impl> +void +FullBetaCPU<Impl>::setFloatRegSingle(int reg_idx, float val) +{ + regFile.setFloatRegSingle(reg_idx, val); +} + +template <class Impl> +void +FullBetaCPU<Impl>::setFloatRegDouble(int reg_idx, double val) +{ + regFile.setFloatRegDouble(reg_idx, val); +} + +template <class Impl> +void +FullBetaCPU<Impl>::setFloatRegInt(int reg_idx, uint64_t val) +{ + regFile.setFloatRegInt(reg_idx, val); +} + +template <class Impl> +uint64_t +FullBetaCPU<Impl>::readPC() +{ + return regFile.readPC(); +} + +template <class Impl> +void +FullBetaCPU<Impl>::setNextPC(uint64_t val) +{ + regFile.setNextPC(val); +} + +template <class Impl> +void +FullBetaCPU<Impl>::setPC(Addr new_PC) +{ + regFile.setPC(new_PC); +} + +template <class Impl> +void +FullBetaCPU<Impl>::addInst(DynInstPtr &inst) +{ + instList.push_back(inst); +} + +template <class Impl> +void +FullBetaCPU<Impl>::instDone() +{ + // Keep an instruction count. + numInsts++; + + // Check for instruction-count-based events. + comInstEventQueue[0]->serviceEvents(numInsts); +} + +template <class Impl> +void +FullBetaCPU<Impl>::removeBackInst(DynInstPtr &inst) +{ + DynInstPtr inst_to_delete; + + // Walk through the instruction list, removing any instructions + // that were inserted after the given instruction, inst. + while (instList.back() != inst) + { + assert(!instList.empty()); + + // Obtain the pointer to the instruction. + inst_to_delete = instList.back(); + + DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", + inst_to_delete->seqNum, inst_to_delete->readPC()); + + // Remove the instruction from the list. + instList.pop_back(); + + // Mark it as squashed. + inst_to_delete->setSquashed(); + } +} + +template <class Impl> +void +FullBetaCPU<Impl>::removeFrontInst(DynInstPtr &inst) +{ + DynInstPtr inst_to_remove; + + // The front instruction should be the same one being asked to be removed. + assert(instList.front() == inst); + + // Remove the front instruction. + inst_to_remove = inst; + instList.pop_front(); + + DPRINTF(FullCPU, "FullCPU: Removing committed instruction %#x, PC %#x\n", + inst_to_remove, inst_to_remove->readPC()); +} + +template <class Impl> +void +FullBetaCPU<Impl>::removeInstsNotInROB() +{ + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + "list.\n"); + + DynInstPtr rob_tail = rob.readTailInst(); + + removeBackInst(rob_tail); +} + +template <class Impl> +void +FullBetaCPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num) +{ + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + "list.\n"); + + DynInstPtr inst_to_delete; + + while (instList.back()->seqNum > seq_num) { + assert(!instList.empty()); + + // Obtain the pointer to the instruction. + inst_to_delete = instList.back(); + + DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", + inst_to_delete->seqNum, inst_to_delete->readPC()); + + // Remove the instruction from the list. + instList.pop_back(); + + // Mark it as squashed. + inst_to_delete->setSquashed(); + } + +} + +template <class Impl> +void +FullBetaCPU<Impl>::removeAllInsts() +{ + instList.clear(); +} + +template <class Impl> +void +FullBetaCPU<Impl>::dumpInsts() +{ + int num = 0; + typename list<DynInstPtr>::iterator inst_list_it = instList.begin(); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\nPC:%#x\nSN:%lli\nIssued:%i\nSquashed:%i\n\n", + num, (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, + (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); + inst_list_it++; + ++num; + } +} + +template <class Impl> +void +FullBetaCPU<Impl>::wakeDependents(DynInstPtr &inst) +{ + iew.wakeDependents(inst); +} + +// Forward declaration of FullBetaCPU. +template FullBetaCPU<AlphaSimpleImpl>; + +#endif // __SIMPLE_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh new file mode 100644 index 000000000..19eb972d9 --- /dev/null +++ b/cpu/beta_cpu/full_cpu.hh @@ -0,0 +1,342 @@ +//Todo: Add in a lot of the functions that are ISA specific. Also define +//the functions that currently exist within the base cpu class. Define +//everything for the simobject stuff so it can be serialized and +//instantiated, add in debugging statements everywhere. Have CPU schedule +//itself properly. Constructor. Derived alpha class. Threads! +// Avoid running stages and advancing queues if idle/stalled. + +#ifndef __SIMPLE_FULL_CPU_HH__ +#define __SIMPLE_FULL_CPU_HH__ + +#include <iostream> +#include <list> + +#include "cpu/beta_cpu/comm.hh" + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" +#include "cpu/beta_cpu/cpu_policy.hh" +#include "sim/process.hh" + +class FunctionalMemory; +class Process; + +class BaseFullCPU : public BaseCPU +{ + //Stuff that's pretty ISA independent will go here. + public: + class Params + { + public: +#ifdef FULL_SYSTEM + std::string name; + int numberOfThreads; + Counter maxInstsAnyThread; + Counter maxInstsAllThreads; + Counter maxLoadsAnyThread; + Counter maxLoadsAllThreads; + System *_system; + Tick freq; +#else + std::string name; + int numberOfThreads; + Counter maxInstsAnyThread; + Counter maxInstsAllThreads; + Counter maxLoadsAnyThread; + Counter maxLoadsAllThreads; +#endif // FULL_SYSTEM + }; + +#ifdef FULL_SYSTEM + BaseFullCPU(Params ¶ms); +#else + BaseFullCPU(Params ¶ms); +#endif // FULL_SYSTEM +}; + +template <class Impl> +class FullBetaCPU : public BaseFullCPU +{ + public: + //Put typedefs from the Impl here. + typedef typename Impl::CPUPol CPUPolicy; + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + public: + enum Status { + Running, + Idle, + Halted, + Blocked // ? + }; + + Status _status; + + private: + class TickEvent : public Event + { + private: + FullBetaCPU<Impl> *cpu; + + public: + TickEvent(FullBetaCPU<Impl> *c); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + /// Schedule tick event, regardless of its current state. + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) + tickEvent.reschedule(curTick + delay); + else if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + delay); + } + + /// Unschedule tick event, regardless of its current state. + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + public: + void tick(); + + FullBetaCPU(Params ¶ms); + ~FullBetaCPU(); + + void init(); + + void fullCPURegStats(); + + void activateContext(int thread_num, int delay); + void suspendContext(int thread_num); + void deallocateContext(int thread_num); + void haltContext(int thread_num); + + void switchOut(); + void takeOverFrom(BaseCPU *oldCPU); + + /** Get the current instruction sequence number, and increment it. */ + InstSeqNum getAndIncrementInstSeq(); + +#ifdef FULL_SYSTEM + /** Check if this address is a valid instruction address. */ + bool validInstAddr(Addr addr) { return true; } + + /** Check if this address is a valid data address. */ + bool validDataAddr(Addr addr) { return true; } + + /** Get instruction asid. */ + int getInstAsid() { return ITB_ASN_ASN(regs.ipr[ISA::IPR_ITB_ASN]); } + + /** Get data asid. */ + int getDataAsid() { return DTB_ASN_ASN(regs.ipr[ISA::IPR_DTB_ASN]); } +#else + bool validInstAddr(Addr addr) + { return process->validInstAddr(addr); } + + bool validDataAddr(Addr addr) + { return process->validDataAddr(addr); } + + int getInstAsid() { return asid; } + int getDataAsid() { return asid; } + +#endif + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx); + + float readFloatRegSingle(int reg_idx); + + double readFloatRegDouble(int reg_idx); + + uint64_t readFloatRegInt(int reg_idx); + + void setIntReg(int reg_idx, uint64_t val); + + void setFloatRegSingle(int reg_idx, float val); + + void setFloatRegDouble(int reg_idx, double val); + + void setFloatRegInt(int reg_idx, uint64_t val); + + uint64_t readPC(); + + void setNextPC(uint64_t val); + + void setPC(Addr new_PC); + + /** Function to add instruction onto the head of the list of the + * instructions. Used when new instructions are fetched. + */ + void addInst(DynInstPtr &inst); + + /** Function to tell the CPU that an instruction has completed. */ + void instDone(); + + /** Remove all instructions in back of the given instruction, but leave + * that instruction in the list. This is useful in a squash, when there + * are instructions in this list that don't exist in structures such as + * the ROB. The instruction doesn't have to be the last instruction in + * the list, but will be once this function completes. + * @todo: Remove only up until that inst? Squashed inst is most likely + * valid. + */ + void removeBackInst(DynInstPtr &inst); + + /** Remove an instruction from the front of the list. It is expected + * that there are no instructions in front of it (that is, none are older + * than the instruction being removed). Used when retiring instructions. + * @todo: Remove the argument to this function, and just have it remove + * last instruction once it's verified that commit has the same ordering + * as the instruction list. + */ + void removeFrontInst(DynInstPtr &inst); + + /** Remove all instructions that are not currently in the ROB. */ + void removeInstsNotInROB(); + + /** Remove all instructions younger than the given sequence number. */ + void removeInstsUntil(const InstSeqNum &seq_num); + + /** Remove all instructions from the list. */ + void removeAllInsts(); + + void dumpInsts(); + + /** Basically a wrapper function so that instructions executed at + * commit can tell the instruction queue that they have completed. + * Eventually this hack should be removed. + */ + void wakeDependents(DynInstPtr &inst); + + public: + /** List of all the instructions in flight. */ + list<DynInstPtr> instList; + + //not sure these should be private. + protected: + /** The fetch stage. */ + typename CPUPolicy::Fetch fetch; + + /** The fetch stage's status. */ + typename CPUPolicy::Fetch::Status fetchStatus; + + /** The decode stage. */ + typename CPUPolicy::Decode decode; + + /** The decode stage's status. */ + typename CPUPolicy::Decode::Status decodeStatus; + + /** The dispatch stage. */ + typename CPUPolicy::Rename rename; + + /** The dispatch stage's status. */ + typename CPUPolicy::Rename::Status renameStatus; + + /** The issue/execute/writeback stages. */ + typename CPUPolicy::IEW iew; + + /** The issue/execute/writeback stage's status. */ + typename CPUPolicy::IEW::Status iewStatus; + + /** The commit stage. */ + typename CPUPolicy::Commit commit; + + /** The fetch stage's status. */ + typename CPUPolicy::Commit::Status commitStatus; + + //Might want to just pass these objects in to the constructors of the + //appropriate stage. regFile is in iew, freeList in dispatch, renameMap + //in dispatch, and the rob in commit. + /** The register file. */ + typename CPUPolicy::RegFile regFile; + + /** The free list. */ + typename CPUPolicy::FreeList freeList; + + /** The rename map. */ + typename CPUPolicy::RenameMap renameMap; + + /** The re-order buffer. */ + typename CPUPolicy::ROB rob; + + public: + /** Typedefs from the Impl to get the structs that each of the + * time buffers should use. + */ + typedef typename CPUPolicy::TimeStruct TimeStruct; + + typedef typename CPUPolicy::FetchStruct FetchStruct; + + typedef typename CPUPolicy::DecodeStruct DecodeStruct; + + typedef typename CPUPolicy::RenameStruct RenameStruct; + + typedef typename CPUPolicy::IEWStruct IEWStruct; + + /** The main time buffer to do backwards communication. */ + TimeBuffer<TimeStruct> timeBuffer; + + /** The fetch stage's instruction queue. */ + TimeBuffer<FetchStruct> fetchQueue; + + /** The decode stage's instruction queue. */ + TimeBuffer<DecodeStruct> decodeQueue; + + /** The rename stage's instruction queue. */ + TimeBuffer<RenameStruct> renameQueue; + + /** The IEW stage's instruction queue. */ + TimeBuffer<IEWStruct> iewQueue; + + public: + /** The temporary exec context to support older accessors. */ + ExecContext *xc; + + /** Temporary function to get pointer to exec context. */ + ExecContext *xcBase() { return xc; } + + InstSeqNum globalSeqNum; + +#ifdef FULL_SYSTEM + System *system; + + MemoryController *memCtrl; + PhysicalMemory *physmem; + + AlphaITB *itb; + AlphaDTB *dtb; + +// SWContext *swCtx; +#else + Process *process; + + // Address space ID. Note that this is used for TIMING cache + // simulation only; all functional memory accesses should use + // one of the FunctionalMemory pointers above. + short asid; +#endif + + FunctionalMemory *mem; + + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + bool deferRegistration; + + Counter numInsts; + + Counter funcExeInst; +}; + +#endif diff --git a/cpu/beta_cpu/iew.cc b/cpu/beta_cpu/iew.cc new file mode 100644 index 000000000..a90d64434 --- /dev/null +++ b/cpu/beta_cpu/iew.cc @@ -0,0 +1,7 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/iew_impl.hh" +#include "cpu/beta_cpu/inst_queue.hh" + +template class SimpleIEW<AlphaSimpleImpl, AlphaSimpleImpl::CPUPol::IQ>; diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh new file mode 100644 index 000000000..e3e7c6db5 --- /dev/null +++ b/cpu/beta_cpu/iew.hh @@ -0,0 +1,190 @@ +//Todo: Update with statuses. +//Need to handle delaying writes to the writeback bus if it's full at the +//given time. Load store queue. + +#ifndef __CPU_BETA_CPU_SIMPLE_IEW_HH__ +#define __CPU_BETA_CPU_SIMPLE_IEW_HH__ + +#include <queue> + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/beta_cpu/comm.hh" + +//Can IEW even stall? Space should be available/allocated already...maybe +//if there's not enough write ports on the ROB or waiting for CDB +//arbitration. +template<class Impl, class IQ> +class SimpleIEW +{ + private: + //Typedefs from Impl + typedef typename Impl::ISA ISA; + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename CPUPol::RenameMap RenameMap; + typedef typename CPUPol::LDSTQ LDSTQ; + + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::IssueStruct IssueStruct; + + public: + enum Status { + Running, + Blocked, + Idle, + Squashing, + Unblocking + }; + + private: + Status _status; + Status _issueStatus; + Status _exeStatus; + Status _wbStatus; + + public: + void squash(); + + void squashDueToBranch(DynInstPtr &inst); + + void squashDueToMem(DynInstPtr &inst); + + void block(); + + inline void unblock(); + + public: + SimpleIEW(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr); + + void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); + + void setRenameMap(RenameMap *rm_ptr); + + void wakeDependents(DynInstPtr &inst); + + void tick(); + + void iew(); + + private: + void dispatchInsts(); + + void executeInsts(); + + //Interfaces to objects inside and outside of IEW. + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer<TimeStruct>::wire toRename; + + /** Rename instruction queue interface. */ + TimeBuffer<RenameStruct> *renameQueue; + + /** Wire to get rename's output from rename queue. */ + typename TimeBuffer<RenameStruct>::wire fromRename; + + /** Issue stage queue. */ + TimeBuffer<IssueStruct> issueToExecQueue; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer<IssueStruct>::wire fromIssue; + + /** + * IEW stage time buffer. Holds ROB indices of instructions that + * can be marked as completed. + */ + TimeBuffer<IEWStruct> *iewQueue; + + /** Wire to write infromation heading to commit. */ + typename TimeBuffer<IEWStruct>::wire toCommit; + + //Will need internal queue to hold onto instructions coming from + //the rename stage in case of a stall. + /** Skid buffer between rename and IEW. */ + std::queue<RenameStruct> skidBuffer; + + /** Instruction queue. */ + IQ instQueue; + + LDSTQ ldstQueue; + + /** Pointer to rename map. Might not want this stage to directly + * access this though... + */ + RenameMap *renameMap; + + /** CPU interface. */ + FullCPU *cpu; + + private: + /** Commit to IEW delay, in ticks. */ + unsigned commitToIEWDelay; + + /** Rename to IEW delay, in ticks. */ + unsigned renameToIEWDelay; + + /** + * Issue to execute delay, in ticks. What this actually represents is + * the amount of time it takes for an instruction to wake up, be + * scheduled, and sent to a FU for execution. + */ + unsigned issueToExecuteDelay; + + /** Width of issue's read path, in instructions. The read path is both + * the skid buffer and the rename instruction queue. + * Note to self: is this really different than issueWidth? + */ + unsigned issueReadWidth; + + /** Width of issue, in instructions. */ + unsigned issueWidth; + + /** Width of execute, in instructions. Might make more sense to break + * down into FP vs int. + */ + unsigned executeWidth; + + /** Number of cycles stage has been squashing. Used so that the stage + * knows when it can start unblocking, which is when the previous stage + * has received the stall signal and clears up its outputs. + */ + unsigned cyclesSquashing; + + Stats::Scalar<> iewIdleCycles; + Stats::Scalar<> iewSquashCycles; + Stats::Scalar<> iewBlockCycles; + Stats::Scalar<> iewUnblockCycles; +// Stats::Scalar<> iewWBInsts; + Stats::Scalar<> iewDispatchedInsts; + Stats::Scalar<> iewDispSquashedInsts; + Stats::Scalar<> iewDispLoadInsts; + Stats::Scalar<> iewDispStoreInsts; + Stats::Scalar<> iewDispNonSpecInsts; + Stats::Scalar<> iewIQFullEvents; + Stats::Scalar<> iewExecutedInsts; + Stats::Scalar<> iewExecLoadInsts; + Stats::Scalar<> iewExecStoreInsts; + Stats::Scalar<> iewExecSquashedInsts; + Stats::Scalar<> memOrderViolationEvents; + Stats::Scalar<> predictedTakenIncorrect; +}; + +#endif // __CPU_BETA_CPU_IEW_HH__ diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh new file mode 100644 index 000000000..b718e6aa0 --- /dev/null +++ b/cpu/beta_cpu/iew_impl.hh @@ -0,0 +1,665 @@ +// @todo: Fix the instantaneous communication among all the stages within +// iew. There's a clear delay between issue and execute, yet backwards +// communication happens simultaneously. Might not be that bad really... +// it might skew stats a bit though. Issue would otherwise try to issue +// instructions that would never be executed if there were a delay; without +// it issue will simply squash. Make this stage block properly. +// Update the statuses for each stage. +// Actually read instructions out of the skid buffer. + +#include <queue> + +#include "base/timebuf.hh" +#include "cpu/beta_cpu/iew.hh" + +template<class Impl, class IQ> +SimpleIEW<Impl, IQ>::SimpleIEW(Params ¶ms) + : // Just make this time buffer really big for now + issueToExecQueue(5, 5), + instQueue(params), + ldstQueue(params), + commitToIEWDelay(params.commitToIEWDelay), + renameToIEWDelay(params.renameToIEWDelay), + issueToExecuteDelay(params.issueToExecuteDelay), + issueReadWidth(params.issueWidth), + issueWidth(params.issueWidth), + executeWidth(params.executeWidth) +{ + DPRINTF(IEW, "IEW: executeIntWidth: %i.\n", params.executeIntWidth); + _status = Idle; + _issueStatus = Idle; + _exeStatus = Idle; + _wbStatus = Idle; + + // Setup wire to read instructions coming from issue. + fromIssue = issueToExecQueue.getWire(-issueToExecuteDelay); + + // Instruction queue needs the queue between issue and execute. + instQueue.setIssueToExecuteQueue(&issueToExecQueue); +} + +template <class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::regStats() +{ + instQueue.regStats(); + + iewIdleCycles + .name(name() + ".iewIdleCycles") + .desc("Number of cycles IEW is idle"); + + iewSquashCycles + .name(name() + ".iewSquashCycles") + .desc("Number of cycles IEW is squashing"); + + iewBlockCycles + .name(name() + ".iewBlockCycles") + .desc("Number of cycles IEW is blocking"); + + iewUnblockCycles + .name(name() + ".iewUnblockCycles") + .desc("Number of cycles IEW is unblocking"); + +// iewWBInsts; + + iewDispatchedInsts + .name(name() + ".iewDispatchedInsts") + .desc("Number of instructions dispatched to IQ"); + + iewDispSquashedInsts + .name(name() + ".iewDispSquashedInsts") + .desc("Number of squashed instructions skipped by dispatch"); + + iewDispLoadInsts + .name(name() + ".iewDispLoadInsts") + .desc("Number of dispatched load instructions"); + + iewDispStoreInsts + .name(name() + ".iewDispStoreInsts") + .desc("Number of dispatched store instructions"); + + iewDispNonSpecInsts + .name(name() + ".iewDispNonSpecInsts") + .desc("Number of dispatched non-speculative instructions"); + + iewIQFullEvents + .name(name() + ".iewIQFullEvents") + .desc("Number of times the IQ has become full, causing a stall"); + + iewExecutedInsts + .name(name() + ".iewExecutedInsts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .name(name() + ".iewExecLoadInsts") + .desc("Number of load instructions executed"); + + iewExecStoreInsts + .name(name() + ".iewExecStoreInsts") + .desc("Number of store instructions executed"); + + iewExecSquashedInsts + .name(name() + ".iewExecSquashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + memOrderViolationEvents + .name(name() + ".memOrderViolationEvents") + .desc("Number of memory order violations"); + + predictedTakenIncorrect + .name(name() + ".predictedTakenIncorrect") + .desc("Number of branches that were predicted taken incorrectly"); +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(IEW, "IEW: Setting CPU pointer.\n"); + cpu = cpu_ptr; + + instQueue.setCPU(cpu_ptr); + ldstQueue.setCPU(cpu_ptr); +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(IEW, "IEW: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from commit. + fromCommit = timeBuffer->getWire(-commitToIEWDelay); + + // Setup wire to write information back to previous stages. + toRename = timeBuffer->getWire(0); + + // Instruction queue also needs main time buffer. + instQueue.setTimeBuffer(tb_ptr); +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr) +{ + DPRINTF(IEW, "IEW: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to read information from rename queue. + fromRename = renameQueue->getWire(-renameToIEWDelay); +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) +{ + DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to write instructions to commit. + toCommit = iewQueue->getWire(0); +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::setRenameMap(RenameMap *rm_ptr) +{ + DPRINTF(IEW, "IEW: Setting rename map pointer.\n"); + renameMap = rm_ptr; +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::wakeDependents(DynInstPtr &inst) +{ + instQueue.wakeDependents(inst); +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::block() +{ + DPRINTF(IEW, "IEW: Blocking.\n"); + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromRename); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template<class Impl, class IQ> +inline void +SimpleIEW<Impl, IQ>::unblock() +{ + // Check if there's information in the skid buffer. If there is, then + // set status to unblocking, otherwise set it directly to running. + DPRINTF(IEW, "IEW: Reading instructions out of the skid " + "buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toRename->iewInfo.stall = true; + } else { + DPRINTF(IEW, "IEW: Stage is done unblocking.\n"); + _status = Running; + } +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::squash() +{ + DPRINTF(IEW, "IEW: Squashing all instructions.\n"); + _status = Squashing; + + // Tell the IQ to start squashing. + instQueue.squash(); + + // Tell the LDSTQ to start squashing. + ldstQueue.squash(fromCommit->commitInfo.doneSeqNum); +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::squashDueToBranch(DynInstPtr &inst) +{ + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", + inst->PC); + // Perhaps leave the squashing up to the ROB stage to tell it when to + // squash? + _status = Squashing; + + // Tell rename to squash through the time buffer. + toCommit->squash = true; + // Also send PC update information back to prior stages. + toCommit->squashedSeqNum = inst->seqNum; + toCommit->mispredPC = inst->readPC(); + toCommit->nextPC = inst->readCalcTarg(); + toCommit->branchMispredict = true; + // Prediction was incorrect, so send back inverse. + toCommit->branchTaken = inst->readCalcTarg() != + (inst->readPC() + sizeof(MachInst)); +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::squashDueToMem(DynInstPtr &inst) +{ + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", + inst->PC); + // Perhaps leave the squashing up to the ROB stage to tell it when to + // squash? + _status = Squashing; + + // Tell rename to squash through the time buffer. + toCommit->squash = true; + // Also send PC update information back to prior stages. + toCommit->squashedSeqNum = inst->seqNum; + toCommit->nextPC = inst->readCalcTarg(); +} + +template <class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::dispatchInsts() +{ + //////////////////////////////////////// + // DISPATCH/ISSUE stage + //////////////////////////////////////// + + //Put into its own function? + //Add instructions to IQ if there are any instructions there + + // Check if there are any instructions coming from rename, and we're. + // not squashing. + if (fromRename->size > 0) { + int insts_to_add = fromRename->size; + + // Loop through the instructions, putting them in the instruction + // queue. + for (int inst_num = 0; inst_num < insts_to_add; ++inst_num) + { + DynInstPtr inst = fromRename->insts[inst_num]; + + // Make sure there's a valid instruction there. + assert(inst); + + DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n", + inst->readPC()); + + // Be sure to mark these instructions as ready so that the + // commit stage can go ahead and execute them, and mark + // them as issued so the IQ doesn't reprocess them. + if (inst->isSquashed()) { + ++iewDispSquashedInsts; + continue; + } else if (instQueue.isFull()) { + DPRINTF(IEW, "IEW: Issue: IQ has become full.\n"); + // Call function to start blocking. + block(); + // Tell previous stage to stall. + toRename->iewInfo.stall = true; + + ++iewIQFullEvents; + break; + } else if (inst->isLoad()) { + DPRINTF(IEW, "IEW: Issue: Memory instruction " + "encountered, adding to LDSTQ.\n"); + + // Reserve a spot in the load store queue for this + // memory access. + ldstQueue.insertLoad(inst); + + ++iewDispLoadInsts; + } else if (inst->isStore()) { + ldstQueue.insertStore(inst); + + // A bit of a hack. Set that it can commit so that + // the commit stage will try committing it, and then + // once commit realizes it's a store it will send back + // a signal to this stage to issue and execute that + // store. Change to be a bit that says the instruction + // has extra work to do at commit. + inst->setCanCommit(); + + instQueue.insertNonSpec(inst); + + ++iewDispStoreInsts; + ++iewDispNonSpecInsts; + + continue; + } else if (inst->isNonSpeculative()) { + DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " + "encountered, skipping.\n"); + + // Same hack as with stores. + inst->setCanCommit(); + + // Specificall insert it as nonspeculative. + instQueue.insertNonSpec(inst); + + ++iewDispNonSpecInsts; + + continue; + } else if (inst->isNop()) { + DPRINTF(IEW, "IEW: Issue: Nop instruction encountered " + ", skipping.\n"); + + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + + continue; + } else if (inst->isExecuted()) { + assert(0 && "Instruction shouldn't be executed.\n"); + DPRINTF(IEW, "IEW: Issue: Executed branch encountered, " + "skipping.\n"); + +// assert(inst->isDirectCtrl()); + + inst->setIssued(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + + continue; + } + + // If the instruction queue is not full, then add the + // instruction. + instQueue.insert(fromRename->insts[inst_num]); + + ++iewDispatchedInsts; + } + } +} + +template <class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::executeInsts() +{ + //////////////////////////////////////// + //EXECUTE/WRITEBACK stage + //////////////////////////////////////// + + //Put into its own function? + //Similarly should probably have separate execution for int vs FP. + // Above comment is handled by the issue queue only issuing a valid + // mix of int/fp instructions. + //Actually okay to just have one execution, buuuuuut will need + //somewhere that defines the execution latency of all instructions. + // @todo: Move to the FU pool used in the current full cpu. + + int fu_usage = 0; + bool fetch_redirect = false; + + // Execute/writeback any instructions that are available. + for (int inst_num = 0; + fu_usage < executeWidth && /* Haven't exceeded available FU's. */ + inst_num < issueWidth && + fromIssue->insts[inst_num]; + ++inst_num) { + + DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n"); + + // Get instruction from issue's queue. + DynInstPtr inst = fromIssue->insts[inst_num]; + + DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC()); + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n"); + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + toCommit->insts[inst_num] = inst; + + ++iewExecSquashedInsts; + + continue; + } + + inst->setExecuted(); + + // If an instruction is executed, then count it towards FU usage. + ++fu_usage; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + if (inst->isMemRef()) { + DPRINTF(IEW, "IEW: Execute: Calculating address for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { + ldstQueue.executeLoad(inst); + + ++iewExecLoadInsts; + } else if (inst->isStore()) { + ldstQueue.executeStore(); + + ++iewExecStoreInsts; + } else { + panic("IEW: Unexpected memory type!\n"); + } + + } else { + inst->execute(); + + ++iewExecutedInsts; + } + + // First check the time slot that this instruction will write + // to. If there are free write ports at the time, then go ahead + // and write the instruction to that time. If there are not, + // keep looking back to see where's the first time there's a + // free slot. What happens if you run out of free spaces? + // For now naively assume that all instructions take one cycle. + // Otherwise would have to look into the time buffer based on the + // latency of the instruction. + + // Add finished instruction to queue to commit. + toCommit->insts[inst_num] = inst; + + // Check if branch was correct. This check happens after the + // instruction is added to the queue because even if the branch + // is mispredicted, the branch instruction itself is still valid. + // Only handle this if there hasn't already been something that + // redirects fetch in this group of instructions. + if (!fetch_redirect) { + if (inst->mispredicted()) { + fetch_redirect = true; + + DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); + DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); + + // If incorrect, then signal the ROB that it must be squashed. + squashDueToBranch(inst); + + if (inst->predTaken()) { + predictedTakenIncorrect++; + } + } else if (ldstQueue.violation()) { + fetch_redirect = true; + + // Get the DynInst that caused the violation. + DynInstPtr violator = ldstQueue.getMemDepViolator(); + + DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Tell the instruction queue that a violation has occured. + instQueue.violation(inst, violator); + + // Squash. + squashDueToMem(inst); + + ++memOrderViolationEvents; + } + } + } +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::tick() +{ + // Considering putting all the state-determining stuff in this section. + + // Try to fill up issue queue with as many instructions as bandwidth + // allows. + // Decode should try to execute as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + + // Check if the stage is in a running status. + if (_status != Blocked && _status != Squashing) { + DPRINTF(IEW, "IEW: Status is not blocked, attempting to run " + "stage.\n"); + iew(); + + // If it's currently unblocking, check to see if it should switch + // to running. + if (_status == Unblocking) { + unblock(); + + ++iewUnblockCycles; + } + } else if (_status == Squashing) { + + DPRINTF(IEW, "IEW: Still squashing.\n"); + + // Check if stage should remain squashing. Stop squashing if the + // squash signal clears. + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + DPRINTF(IEW, "IEW: Done squashing, changing status to " + "running.\n"); + + _status = Running; + instQueue.stopSquash(); + } else { + instQueue.doSquash(); + } + + ++iewSquashCycles; + + // Also should advance its own time buffers if the stage ran. + // Not sure about this... +// issueToExecQueue.advance(); + } else if (_status == Blocked) { + // Continue to tell previous stage to stall. + toRename->iewInfo.stall = true; + + // Check if possible stall conditions have cleared. + if (!fromCommit->commitInfo.stall && + !instQueue.isFull()) { + DPRINTF(IEW, "IEW: Stall signals cleared, going to unblock.\n"); + _status = Unblocking; + } + + // If there's still instructions coming from rename, continue to + // put them on the skid buffer. + if (fromRename->size == 0) { + block(); + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + } + + ++iewBlockCycles; + } + + // @todo: Maybe put these at the beginning, so if it's idle it can + // return early. + // Write back number of free IQ entries here. + toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries(); + + // Check the committed load/store signals to see if there's a load + // or store to commit. Also check if it's being told to execute a + // nonspeculative instruction. + if (fromCommit->commitInfo.commitIsStore) { + ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum); + } else if (fromCommit->commitInfo.commitIsLoad) { + ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum); + } + + if (fromCommit->commitInfo.nonSpecSeqNum != 0) { + instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum); + } + + DPRINTF(IEW, "IEW: IQ has %i free entries.\n", + instQueue.numFreeEntries()); +} + +template<class Impl, class IQ> +void +SimpleIEW<Impl, IQ>::iew() +{ + // Might want to put all state checks in the tick() function. + // Check if being told to stall from commit. + if (fromCommit->commitInfo.stall) { + block(); + return; + } else if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + // Also check if commit is telling this stage to squash. + squash(); + return; + } + + dispatchInsts(); + + // Have the instruction queue try to schedule any ready instructions. + instQueue.scheduleReadyInsts(); + + executeInsts(); + + // Loop through the head of the time buffer and wake any dependents. + // These instructions are about to write back. In the simple model + // this loop can really happen within the previous loop, but when + // instructions have actual latencies, this loop must be separate. + // Also mark scoreboard that this instruction is finally complete. + // Either have IEW have direct access to rename map, or have this as + // part of backwards communication. + for (int inst_num = 0; inst_num < issueWidth && + toCommit->insts[inst_num]; inst_num++) + { + DynInstPtr inst = toCommit->insts[inst_num]; + + DPRINTF(IEW, "IEW: Sending instructions to commit, PC %#x.\n", + inst->readPC()); + + if(!inst->isSquashed()) { + instQueue.wakeDependents(inst); + + for (int i = 0; i < inst->numDestRegs(); i++) + { + renameMap->markAsReady(inst->renamedDestRegIdx(i)); + } + } + } + + // Also should advance its own time buffers if the stage ran. + // Not the best place for it, but this works (hopefully). + issueToExecQueue.advance(); +} diff --git a/cpu/beta_cpu/inst_queue.cc b/cpu/beta_cpu/inst_queue.cc new file mode 100644 index 000000000..c4fd077bc --- /dev/null +++ b/cpu/beta_cpu/inst_queue.cc @@ -0,0 +1,10 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/inst_queue_impl.hh" + +// Force instantiation of InstructionQueue. +template InstructionQueue<AlphaSimpleImpl>; + +unsigned +InstructionQueue<AlphaSimpleImpl>::DependencyEntry::mem_alloc_counter = 0; diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh new file mode 100644 index 000000000..6fcce70a4 --- /dev/null +++ b/cpu/beta_cpu/inst_queue.hh @@ -0,0 +1,308 @@ +#ifndef __INST_QUEUE_HH__ +#define __INST_QUEUE_HH__ + +#include <list> +#include <map> +#include <queue> +#include <stdint.h> +#include <vector> + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" + +/** + * A standard instruction queue class. It holds instructions in an + * array, holds the ordering of the instructions within a linked list, + * and tracks producer/consumer dependencies within a separate linked + * list. Similar to the rename map and the free list, it expects that + * floating point registers have their indices start after the integer + * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer + * and 96-191 are fp). This remains true even for both logical and + * physical register indices. + */ +template <class Impl> +class InstructionQueue +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + + typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + typedef typename Impl::CPUPol::TimeStruct TimeStruct; + + // Typedef of iterator through the list of instructions. Might be + // better to untie this from the FullCPU or pass its information to + // the stages. + typedef typename std::list<DynInstPtr>::iterator ListIt; + + /** + * Struct for comparing entries to be added to the priority queue. This + * gives reverse ordering to the instructions in terms of sequence + * numbers: the instructions with smaller sequence numbers (and hence + * are older) will be at the top of the priority queue. + */ + struct pqCompare + { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + + /** + * Struct for comparing entries to be added to the set. This gives + * standard ordering in terms of sequence numbers. + */ + struct setCompare + { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum < rhs->seqNum; + } + }; + + typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare> + ReadyInstQueue; + + InstructionQueue(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu); + + void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + unsigned numFreeEntries(); + + bool isFull(); + + void insert(DynInstPtr &new_inst); + + void insertNonSpec(DynInstPtr &new_inst); + + void advanceTail(DynInstPtr &inst); + + void scheduleReadyInsts(); + + void scheduleNonSpec(const InstSeqNum &inst); + + void wakeDependents(DynInstPtr &completed_inst); + + void violation(DynInstPtr &store, DynInstPtr &faulting_load); + + // Change this to take in the sequence number + void squash(); + + void doSquash(); + + void stopSquash(); + + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + + private: + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** The memory dependence unit, which tracks/predicts memory dependences + * between instructions. + */ + MemDepUnit memDepUnit; + + /** The queue to the execute stage. Issued instructions will be written + * into it. + */ + TimeBuffer<IssueStruct> *issueToExecuteQueue; + + /** The backwards time buffer. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to read information from timebuffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + enum InstList { + Int, + Float, + Branch, + Memory, + Misc, + Squashed, + None + }; + + /** List of ready int instructions. Used to keep track of the order in + * which instructions should issue. + */ + ReadyInstQueue readyIntInsts; + + /** List of ready floating point instructions. */ + ReadyInstQueue readyFloatInsts; + + /** List of ready branch instructions. */ + ReadyInstQueue readyBranchInsts; + + /** List of ready memory instructions. */ +// ReadyInstQueue readyMemInsts; + + /** List of ready miscellaneous instructions. */ + ReadyInstQueue readyMiscInsts; + + /** List of squashed instructions (which are still valid and in IQ). + * Implemented using a priority queue; the entries must contain both + * the IQ index and sequence number of each instruction so that + * ordering based on sequence numbers can be used. + */ + ReadyInstQueue squashedInsts; + + /** List of non-speculative instructions that will be scheduled + * once the IQ gets a signal from commit. While it's redundant to + * have the key be a part of the value (the sequence number is stored + * inside of DynInst), when these instructions are woken up only + * the sequence number will be available. Thus it is necessary to be + * able to search by the sequence number alone. + */ + std::map<InstSeqNum, DynInstPtr> nonSpecInsts; + + typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t; + + /** Number of free IQ entries left. */ + unsigned freeEntries; + + /** The number of entries in the instruction queue. */ + unsigned numEntries; + + /** The number of integer instructions that can be issued in one + * cycle. + */ + unsigned intWidth; + + /** The number of floating point instructions that can be issued + * in one cycle. + */ + unsigned floatWidth; + + /** The number of branches that can be issued in one cycle. */ + unsigned branchWidth; + + /** The number of memory instructions that can be issued in one cycle. */ + unsigned memoryWidth; + + /** The total number of instructions that can be issued in one cycle. */ + unsigned totalWidth; + + //The number of physical registers in the CPU. + unsigned numPhysRegs; + + /** The number of physical integer registers in the CPU. */ + unsigned numPhysIntRegs; + + /** The number of floating point registers in the CPU. */ + unsigned numPhysFloatRegs; + + /** Delay between commit stage and the IQ. + * @todo: Make there be a distinction between the delays within IEW. + */ + unsigned commitToIEWDelay; + + ////////////////////////////////// + // Variables needed for squashing + ////////////////////////////////// + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Iterator that points to the youngest instruction in the IQ. */ + ListIt tail; + + /** Iterator that points to the last instruction that has been squashed. + * This will not be valid unless the IQ is in the process of squashing. + */ + ListIt squashIt; + + /////////////////////////////////// + // Dependency graph stuff + /////////////////////////////////// + + class DependencyEntry + { + public: + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry *next; + + //This function, and perhaps this whole class, stand out a little + //bit as they don't fit a classification well. I want access + //to the underlying structure of the linked list, yet at + //the same time it feels like this should be something abstracted + //away. So for now it will sit here, within the IQ, until + //a better implementation is decided upon. + // This function probably shouldn't be within the entry... + void insert(DynInstPtr &new_inst); + + void remove(DynInstPtr &inst_to_remove); + + // Debug variable, remove when done testing. + static unsigned mem_alloc_counter; + }; + + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DependencyEntry *dependGraph; + + /** A cache of the recently woken registers. It is 1 if the register + * has been woken up recently, and 0 if the register has been added + * to the dependency graph and has not yet received its value. It + * is basically a secondary scoreboard, and should pretty much mirror + * the scoreboard that exists in the rename map. + */ + vector<bool> regScoreboard; + + bool addToDependents(DynInstPtr &new_inst); + void insertDependency(DynInstPtr &new_inst); + void createDependency(DynInstPtr &new_inst); + void dumpDependGraph(); + + void addIfReady(DynInstPtr &inst); + + Stats::Scalar<> iqInstsAdded; + Stats::Scalar<> iqNonSpecInstsAdded; +// Stats::Scalar<> iqIntInstsAdded; + Stats::Scalar<> iqIntInstsIssued; +// Stats::Scalar<> iqFloatInstsAdded; + Stats::Scalar<> iqFloatInstsIssued; +// Stats::Scalar<> iqBranchInstsAdded; + Stats::Scalar<> iqBranchInstsIssued; +// Stats::Scalar<> iqMemInstsAdded; + Stats::Scalar<> iqMemInstsIssued; +// Stats::Scalar<> iqMiscInstsAdded; + Stats::Scalar<> iqMiscInstsIssued; + Stats::Scalar<> iqSquashedInstsIssued; + Stats::Scalar<> iqLoopSquashStalls; + Stats::Scalar<> iqSquashedInstsExamined; + Stats::Scalar<> iqSquashedOperandsExamined; + Stats::Scalar<> iqSquashedNonSpecRemoved; + +}; + +#endif //__INST_QUEUE_HH__ diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh new file mode 100644 index 000000000..c688181ed --- /dev/null +++ b/cpu/beta_cpu/inst_queue_impl.hh @@ -0,0 +1,1085 @@ +#ifndef __INST_QUEUE_IMPL_HH__ +#define __INST_QUEUE_IMPL_HH__ + +// Todo: +// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake +// it; either do in reverse order, or have added instructions put into a +// different ready queue that, in scheduleRreadyInsts(), gets put onto the +// normal ready queue. This would however give only a one cycle delay, +// but probably is more flexible to actually add in a delay parameter than +// just running it backwards. + +#include <vector> + +#include "sim/universe.hh" +#include "cpu/beta_cpu/inst_queue.hh" + +// Either compile error or max int due to sign extension. +// Blatant hack to avoid compile warnings. +const InstSeqNum MaxInstSeqNum = 0 - 1; + +template <class Impl> +InstructionQueue<Impl>::InstructionQueue(Params ¶ms) + : memDepUnit(params), + numEntries(params.numIQEntries), + intWidth(params.executeIntWidth), + floatWidth(params.executeFloatWidth), + branchWidth(params.executeBranchWidth), + memoryWidth(params.executeMemoryWidth), + totalWidth(params.issueWidth), + numPhysIntRegs(params.numPhysIntRegs), + numPhysFloatRegs(params.numPhysFloatRegs), + commitToIEWDelay(params.commitToIEWDelay) +{ + DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth); + + // Initialize the number of free IQ entries. + freeEntries = numEntries; + + // Set the number of physical registers as the number of int + float + numPhysRegs = numPhysIntRegs + numPhysFloatRegs; + + DPRINTF(IQ, "IQ: There are %i physical registers.\n", numPhysRegs); + + //Create an entry for each physical register within the + //dependency graph. + dependGraph = new DependencyEntry[numPhysRegs]; + + // Resize the register scoreboard. + regScoreboard.resize(numPhysRegs); + + // Initialize all the head pointers to point to NULL, and all the + // entries as unready. + // Note that in actuality, the registers corresponding to the logical + // registers start off as ready. However this doesn't matter for the + // IQ as the instruction should have been correctly told if those + // registers are ready in rename. Thus it can all be initialized as + // unready. + for (int i = 0; i < numPhysRegs; ++i) + { + dependGraph[i].next = NULL; + dependGraph[i].inst = NULL; + regScoreboard[i] = false; + } + +} + +template <class Impl> +void +InstructionQueue<Impl>::regStats() +{ + iqInstsAdded + .name(name() + ".iqInstsAdded") + .desc("Number of instructions added to the IQ (excludes non-spec)") + .prereq(iqInstsAdded); + + iqNonSpecInstsAdded + .name(name() + ".iqNonSpecInstsAdded") + .desc("Number of non-speculative instructions added to the IQ") + .prereq(iqNonSpecInstsAdded); + +// iqIntInstsAdded; + + iqIntInstsIssued + .name(name() + ".iqIntInstsIssued") + .desc("Number of integer instructions issued") + .prereq(iqIntInstsIssued); + +// iqFloatInstsAdded; + + iqFloatInstsIssued + .name(name() + ".iqFloatInstsIssued") + .desc("Number of float instructions issued") + .prereq(iqFloatInstsIssued); + +// iqBranchInstsAdded; + + iqBranchInstsIssued + .name(name() + ".iqBranchInstsIssued") + .desc("Number of branch instructions issued") + .prereq(iqBranchInstsIssued); + +// iqMemInstsAdded; + + iqMemInstsIssued + .name(name() + ".iqMemInstsIssued") + .desc("Number of memory instructions issued") + .prereq(iqMemInstsIssued); + +// iqMiscInstsAdded; + + iqMiscInstsIssued + .name(name() + ".iqMiscInstsIssued") + .desc("Number of miscellaneous instructions issued") + .prereq(iqMiscInstsIssued); + + iqSquashedInstsIssued + .name(name() + ".iqSquashedInstsIssued") + .desc("Number of squashed instructions issued") + .prereq(iqSquashedInstsIssued); + + iqLoopSquashStalls + .name(name() + ".iqLoopSquashStalls") + .desc("Number of times issue loop had to restart due to squashed " + "inst; mainly for profiling") + .prereq(iqLoopSquashStalls); + + iqSquashedInstsExamined + .name(name() + ".iqSquashedInstsExamined") + .desc("Number of squashed instructions iterated over during squash;" + " mainly for profiling") + .prereq(iqSquashedInstsExamined); + + iqSquashedOperandsExamined + .name(name() + ".iqSquashedOperandsExamined") + .desc("Number of squashed operands that are examined and possibly " + "removed from graph") + .prereq(iqSquashedOperandsExamined); + + iqSquashedNonSpecRemoved + .name(name() + ".iqSquashedNonSpecRemoved") + .desc("Number of squashed non-spec instructions that were removed") + .prereq(iqSquashedNonSpecRemoved); + + // Tell mem dependence unit to reg stats as well. + memDepUnit.regStats(); +} + +template <class Impl> +void +InstructionQueue<Impl>::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + tail = cpu->instList.begin(); +} + +template <class Impl> +void +InstructionQueue<Impl>::setIssueToExecuteQueue( + TimeBuffer<IssueStruct> *i2e_ptr) +{ + DPRINTF(IQ, "IQ: Set the issue to execute queue.\n"); + issueToExecuteQueue = i2e_ptr; +} + +template <class Impl> +void +InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(IQ, "IQ: Set the time buffer.\n"); + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToIEWDelay); +} + +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +template <class Impl> +bool +InstructionQueue<Impl>::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + +template <class Impl> +unsigned +InstructionQueue<Impl>::numFreeEntries() +{ + return freeEntries; +} + +template <class Impl> +void +InstructionQueue<Impl>::insert(DynInstPtr &new_inst) +{ + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != new_inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + + // Decrease the number of free entries. + --freeEntries; + + // Look through its source registers (physical regs), and mark any + // dependencies. + addToDependents(new_inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (new_inst->isMemRef()) { + memDepUnit.insert(new_inst); + // Uh..forgot to look it up and put it on the proper dependency list + // if the instruction should not go yet. + } else { + // If the instruction is ready then add it to the ready list. + addIfReady(new_inst); + } + + ++iqInstsAdded; + + assert(freeEntries == (numEntries - countInsts())); +} + +template <class Impl> +void +InstructionQueue<Impl>::insertNonSpec(DynInstPtr &inst) +{ + nonSpecInsts[inst->seqNum] = inst; + + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + + // Make sure the instruction is valid + assert(inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + // Decrease the number of free entries. + --freeEntries; + + // Look through its source registers (physical regs), and mark any + // dependencies. +// addToDependents(inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (inst->isMemRef()) { + memDepUnit.insertNonSpec(inst); + } + + ++iqNonSpecInstsAdded; +} + +// Slightly hack function to advance the tail iterator in the case that +// the IEW stage issues an instruction that is not added to the IQ. This +// is needed in case a long chain of such instructions occurs. +// I don't think this is used anymore. +template <class Impl> +void +InstructionQueue<Impl>::advanceTail(DynInstPtr &inst) +{ + // Make sure the instruction is valid + assert(inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + assert(freeEntries <= numEntries); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); +} + +// Need to make sure the number of float and integer instructions +// issued does not exceed the total issue bandwidth. +// @todo: Figure out a better way to remove the squashed items from the +// lists. Checking the top item of each list to see if it's squashed +// wastes time and forces jumps. +template <class Impl> +void +InstructionQueue<Impl>::scheduleReadyInsts() +{ + DPRINTF(IQ, "IQ: Attempting to schedule ready instructions from " + "the IQ.\n"); + + int int_issued = 0; + int float_issued = 0; + int branch_issued = 0; + int memory_issued = 0; + int squashed_issued = 0; + int total_issued = 0; + + IssueStruct *i2e_info = issueToExecuteQueue->access(0); + + bool insts_available = !readyBranchInsts.empty() || + !readyIntInsts.empty() || + !readyFloatInsts.empty() || + !memDepUnit.empty() || + !readyMiscInsts.empty() || + !squashedInsts.empty(); + + // Note: Requires a globally defined constant. + InstSeqNum oldest_inst = MaxInstSeqNum; + InstList list_with_oldest = None; + + // Temporary values. + DynInstPtr int_head_inst; + DynInstPtr float_head_inst; + DynInstPtr branch_head_inst; + DynInstPtr mem_head_inst; + DynInstPtr misc_head_inst; + DynInstPtr squashed_head_inst; + + // Somewhat nasty code to look at all of the lists where issuable + // instructions are located, and choose the oldest instruction among + // those lists. Consider a rewrite in the future. + while (insts_available && total_issued < totalWidth) + { + // Set this to false. Each if-block is required to set it to true + // if there were instructions available this check. This will cause + // this loop to run once more than necessary, but avoids extra calls. + insts_available = false; + + oldest_inst = MaxInstSeqNum; + + list_with_oldest = None; + + if (!readyIntInsts.empty() && + int_issued < intWidth) { + + insts_available = true; + + int_head_inst = readyIntInsts.top(); + + if (int_head_inst->isSquashed()) { + readyIntInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } + + oldest_inst = int_head_inst->seqNum; + + list_with_oldest = Int; + } + + if (!readyFloatInsts.empty() && + float_issued < floatWidth) { + + insts_available = true; + + float_head_inst = readyFloatInsts.top(); + + if (float_head_inst->isSquashed()) { + readyFloatInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (float_head_inst->seqNum < oldest_inst) { + oldest_inst = float_head_inst->seqNum; + + list_with_oldest = Float; + } + } + + if (!readyBranchInsts.empty() && + branch_issued < branchWidth) { + + insts_available = true; + + branch_head_inst = readyBranchInsts.top(); + + if (branch_head_inst->isSquashed()) { + readyBranchInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (branch_head_inst->seqNum < oldest_inst) { + oldest_inst = branch_head_inst->seqNum; + + list_with_oldest = Branch; + } + + } + + if (!memDepUnit.empty() && + memory_issued < memoryWidth) { + + insts_available = true; + + mem_head_inst = memDepUnit.top(); + + if (mem_head_inst->isSquashed()) { + memDepUnit.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (mem_head_inst->seqNum < oldest_inst) { + oldest_inst = mem_head_inst->seqNum; + + list_with_oldest = Memory; + } + } + + if (!readyMiscInsts.empty()) { + + insts_available = true; + + misc_head_inst = readyMiscInsts.top(); + + if (misc_head_inst->isSquashed()) { + readyMiscInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (misc_head_inst->seqNum < oldest_inst) { + oldest_inst = misc_head_inst->seqNum; + + list_with_oldest = Misc; + } + } + + if (!squashedInsts.empty()) { + + insts_available = true; + + squashed_head_inst = squashedInsts.top(); + + if (squashed_head_inst->seqNum < oldest_inst) { + list_with_oldest = Squashed; + } + + } + + DynInstPtr issuing_inst = NULL; + + switch (list_with_oldest) { + case None: + DPRINTF(IQ, "IQ: Not able to schedule any instructions. Issuing " + "inst is %#x.\n", issuing_inst); + break; + + case Int: + issuing_inst = int_head_inst; + readyIntInsts.pop(); + ++int_issued; + DPRINTF(IQ, "IQ: Issuing integer instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Float: + issuing_inst = float_head_inst; + readyFloatInsts.pop(); + ++float_issued; + DPRINTF(IQ, "IQ: Issuing float instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Branch: + issuing_inst = branch_head_inst; + readyBranchInsts.pop(); + ++branch_issued; + DPRINTF(IQ, "IQ: Issuing branch instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Memory: + issuing_inst = mem_head_inst; + + memDepUnit.pop(); + ++memory_issued; + DPRINTF(IQ, "IQ: Issuing memory instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Misc: + issuing_inst = misc_head_inst; + readyMiscInsts.pop(); + + ++iqMiscInstsIssued; + + DPRINTF(IQ, "IQ: Issuing a miscellaneous instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Squashed: + issuing_inst = squashed_head_inst; + squashedInsts.pop(); + ++squashed_issued; + DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n", + issuing_inst->readPC()); + break; + } + + if (list_with_oldest != None) { + i2e_info->insts[total_issued] = issuing_inst; + i2e_info->size++; + + issuing_inst->setIssued(); + + ++freeEntries; + ++total_issued; + } + + assert(freeEntries == (numEntries - countInsts())); + } + + iqIntInstsIssued += int_issued; + iqFloatInstsIssued += float_issued; + iqBranchInstsIssued += branch_issued; + iqMemInstsIssued += memory_issued; + iqSquashedInstsIssued += squashed_issued; +} + +template <class Impl> +void +InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst) +{ + DPRINTF(IQ, "IQ: Marking nonspeculative instruction with sequence " + "number %i as ready to execute.\n", inst); + + non_spec_it_t inst_it = nonSpecInsts.find(inst); + + assert(inst_it != nonSpecInsts.end()); + + // Mark this instruction as ready to issue. + (*inst_it).second->setCanIssue(); + + // Now schedule the instruction. + if (!(*inst_it).second->isMemRef()) { + addIfReady((*inst_it).second); + } else { + memDepUnit.nonSpecInstReady((*inst_it).second); + } + + nonSpecInsts.erase(inst_it); +} + +template <class Impl> +void +InstructionQueue<Impl>::violation(DynInstPtr &store, + DynInstPtr &faulting_load) +{ + memDepUnit.violation(store, faulting_load); +} + +template <class Impl> +void +InstructionQueue<Impl>::squash() +{ + DPRINTF(IQ, "IQ: Starting to squash instructions in the IQ.\n"); + + // Read instruction sequence number of last instruction out of the + // time buffer. + squashedSeqNum = fromCommit->commitInfo.doneSeqNum; + + // Setup the squash iterator to point to the tail. + squashIt = tail; + + // Call doSquash. + doSquash(); + + // Also tell the memory dependence unit to squash. + memDepUnit.squash(squashedSeqNum); +} + +template <class Impl> +void +InstructionQueue<Impl>::doSquash() +{ + // Make sure the squash iterator isn't pointing to nothing. + assert(squashIt != cpu->instList.end()); + // Make sure the squashed sequence number is valid. + assert(squashedSeqNum != 0); + + DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n"); + + // Squash any instructions younger than the squashed sequence number + // given. + while ((*squashIt)->seqNum > squashedSeqNum) { + DynInstPtr squashed_inst = (*squashIt); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (!squashed_inst->isIssued() && + !squashed_inst->isSquashedInIQ()) { + + // Remove the instruction from the dependency list. + // Hack for now: These below don't add themselves to the + // dependency list, so don't try to remove them. + if (!squashed_inst->isNonSpeculative() && + !squashed_inst->isStore()) { + int8_t total_src_regs = squashed_inst->numSrcRegs(); + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + PhysRegIndex src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + + // Only remove it from the dependency graph if it was + // placed there in the first place. + // HACK: This assumes that instructions woken up from the + // dependency chain aren't informed that a specific src + // register has become ready. This may not always be true + // in the future. + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && + src_reg < numPhysRegs) { + dependGraph[src_reg].remove(squashed_inst); + } + + ++iqSquashedOperandsExamined; + } + + // Might want to remove producers as well. + } else { + nonSpecInsts.erase(squashed_inst->seqNum); + + ++iqSquashedNonSpecRemoved; + } + + // Might want to also clear out the head of the dependency graph. + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + + squashedInsts.push(squashed_inst); + + DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", + squashed_inst->readPC()); + } + + --squashIt; + ++iqSquashedInstsExamined; + } +} + +template <class Impl> +void +InstructionQueue<Impl>::stopSquash() +{ + // Clear up the squash variables to ensure that squashing doesn't + // get called improperly. + squashedSeqNum = 0; + + squashIt = cpu->instList.end(); +} + +template <class Impl> +void +InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst) +{ + DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n"); + //Look at the physical destination register of the DynInst + //and look it up on the dependency graph. Then mark as ready + //any instructions within the instruction queue. + int8_t total_dest_regs = completed_inst->numDestRegs(); + + DependencyEntry *curr; + + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. + + if (completed_inst->isMemRef()) { + memDepUnit.wakeDependents(completed_inst); + } + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIndex dest_reg = + completed_inst->renamedDestRegIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + // @todo Figure out a cleaner way to handle thie. + if (dest_reg >= numPhysRegs) { + continue; + } + + DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n", + (int) dest_reg); + + //Maybe abstract this part into a function. + //Go through the dependency chain, marking the registers as ready + //within the waiting instructions. + while (dependGraph[dest_reg].next) { + + curr = dependGraph[dest_reg].next; + + DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n", + curr->inst->readPC()); + + // Might want to give more information to the instruction + // so that it knows which of its source registers is ready. + // However that would mean that the dependency graph entries + // would need to hold the src_reg_idx. + curr->inst->markSrcRegReady(); + + addIfReady(curr->inst); + + dependGraph[dest_reg].next = curr->next; + + DependencyEntry::mem_alloc_counter--; + + delete curr; + } + + // Reset the head node now that all of its dependents have been woken + // up. + dependGraph[dest_reg].next = NULL; + dependGraph[dest_reg].inst = NULL; + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg] = true; + } +} + +template <class Impl> +bool +InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) +{ + // Loop through the instruction's source registers, adding + // them to the dependency list if they are not ready. + int8_t total_src_regs = new_inst->numSrcRegs(); + bool return_val = false; + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only add it to the dependency graph if it's not ready. + if (!new_inst->isReadySrcRegIdx(src_reg_idx)) { + PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx); + + // Check the IQ's scoreboard to make sure the register + // hasn't become ready while the instruction was in flight + // between stages. Only if it really isn't ready should + // it be added to the dependency graph. + if (src_reg >= numPhysRegs) { + continue; + } else if (regScoreboard[src_reg] == false) { + DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " + "is being added to the dependency chain.\n", + new_inst->readPC(), src_reg); + + dependGraph[src_reg].insert(new_inst); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + } else { + DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " + "became ready before it reached the IQ.\n", + new_inst->readPC(), src_reg); + // Mark a register ready within the instruction. + new_inst->markSrcRegReady(); + } + } + } + + return return_val; +} + +template <class Impl> +void +InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst) +{ + //Actually nothing really needs to be marked when an + //instruction becomes the producer of a register's value, + //but for convenience a ptr to the producing instruction will + //be placed in the head node of the dependency links. + int8_t total_dest_regs = new_inst->numDestRegs(); + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + + // Instructions that use the misc regs will have a reg number + // higher than the normal physical registers. In this case these + // registers are not renamed, and there is no need to track + // dependencies as these instructions must be executed at commit. + if (dest_reg >= numPhysRegs) { + continue; + } + + dependGraph[dest_reg].inst = new_inst; + + assert(!dependGraph[dest_reg].next); + + // Mark the scoreboard to say it's not yet ready. + regScoreboard[dest_reg] = false; + } +} + +template <class Impl> +void +InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DependencyEntry *new_entry = new DependencyEntry; + new_entry->next = this->next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + this->next = new_entry; + + ++mem_alloc_counter; +} + +template <class Impl> +void +InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove) +{ + DependencyEntry *prev = this; + DependencyEntry *curr = this->next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + // Find the instruction to remove within the dependency linked list. + while(curr->inst != inst_to_remove) + { + prev = curr; + curr = curr->next; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --mem_alloc_counter; + + delete curr; +} + +template <class Impl> +void +InstructionQueue<Impl>::dumpDependGraph() +{ + DependencyEntry *curr; + + for (int i = 0; i < numPhysRegs; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x consumer: ", i, + curr->inst->readPC()); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x ", curr->inst->readPC()); + } + + cprintf("\n"); + } +} + +template <class Impl> +void +InstructionQueue<Impl>::addIfReady(DynInstPtr &inst) +{ + //If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + if (inst->readyToIssue()) { + + //Add the instruction to the proper ready list. + if (inst->isControl()) { + + DPRINTF(IQ, "IQ: Branch instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyBranchInsts.push(inst); + + } else if (inst->isMemRef()) { + + DPRINTF(IQ, "IQ: Checking if memory instruction can issue.\n"); + + // Message to the mem dependence unit that this instruction has + // its registers ready. + + memDepUnit.regsReady(inst); + +#if 0 + if (memDepUnit.readyToIssue(inst)) { + DPRINTF(IQ, "IQ: Memory instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyMemInsts.push(inst); + } else { + // Make dependent on the store. + // Will need some way to get the store instruction it should + // be dependent upon; then when the store issues it can + // put the instruction on the ready list. + // Yet another tree? + assert(0 && "Instruction has no way to actually issue"); + } +#endif + + } else if (inst->isInteger()) { + + DPRINTF(IQ, "IQ: Integer instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyIntInsts.push(inst); + + } else if (inst->isFloating()) { + + DPRINTF(IQ, "IQ: Floating instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyFloatInsts.push(inst); + + } else { + DPRINTF(IQ, "IQ: Miscellaneous instruction is ready to issue, " + "putting it onto the ready list, PC %#x..\n", + inst->readPC()); + + readyMiscInsts.push(inst); + } + } +} + +template <class Impl> +int +InstructionQueue<Impl>::countInsts() +{ + ListIt count_it = cpu->instList.begin(); + int total_insts = 0; + + while (count_it != tail) { + if (!(*count_it)->isIssued()) { + ++total_insts; + } + + ++count_it; + + assert(count_it != cpu->instList.end()); + } + + // Need to count the tail iterator as well. + if (count_it != cpu->instList.end() && + (*count_it) && + !(*count_it)->isIssued()) { + ++total_insts; + } + + return total_insts; +} + +template <class Impl> +void +InstructionQueue<Impl>::dumpLists() +{ + cprintf("Ready integer list size: %i\n", readyIntInsts.size()); + + cprintf("Ready float list size: %i\n", readyFloatInsts.size()); + + cprintf("Ready branch list size: %i\n", readyBranchInsts.size()); + +// cprintf("Ready memory list size: %i\n", readyMemInsts.size()); + + cprintf("Ready misc list size: %i\n", readyMiscInsts.size()); + + cprintf("Squashed list size: %i\n", squashedInsts.size()); + + cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); + + non_spec_it_t non_spec_it = nonSpecInsts.begin(); + + cprintf("Non speculative list: "); + + while (non_spec_it != nonSpecInsts.end()) { + cprintf("%#x ", (*non_spec_it).second->readPC()); + ++non_spec_it; + } + + cprintf("\n"); + +} + +#endif // __INST_QUEUE_IMPL_HH__ diff --git a/cpu/beta_cpu/mem_dep_unit.cc b/cpu/beta_cpu/mem_dep_unit.cc new file mode 100644 index 000000000..3175997f6 --- /dev/null +++ b/cpu/beta_cpu/mem_dep_unit.cc @@ -0,0 +1,9 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/store_set.hh" +#include "cpu/beta_cpu/mem_dep_unit_impl.hh" + +// Force instantation of memory dependency unit using store sets and +// AlphaSimpleImpl. +template MemDepUnit<StoreSet, AlphaSimpleImpl>; diff --git a/cpu/beta_cpu/mem_dep_unit.hh b/cpu/beta_cpu/mem_dep_unit.hh new file mode 100644 index 000000000..e43543e09 --- /dev/null +++ b/cpu/beta_cpu/mem_dep_unit.hh @@ -0,0 +1,140 @@ + +#ifndef __MEM_DEP_UNIT_HH__ +#define __MEM_DEP_UNIT_HH__ + +#include <set> +#include <map> + +#include "cpu/inst_seq.hh" +#include "base/statistics.hh" + +/** + * Memory dependency unit class. This holds the memory dependence predictor. + * As memory operations are issued to the IQ, they are also issued to this + * unit, which then looks up the prediction as to what they are dependent + * upon. This unit must be checked prior to a memory operation being able + * to issue. Although this is templated, it's somewhat hard to make a generic + * memory dependence unit. This one is mostly for store sets; it will be + * quite limited in what other memory dependence predictions it can also + * utilize. Thus this class should be most likely be rewritten for other + * dependence prediction schemes. + */ +template <class MemDepPred, class Impl> +class MemDepUnit { + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + public: + MemDepUnit(Params ¶ms); + + void regStats(); + + void insert(DynInstPtr &inst); + + void insertNonSpec(DynInstPtr &inst); + + void regsReady(DynInstPtr &inst); + + void nonSpecInstReady(DynInstPtr &inst); + + void issue(DynInstPtr &inst); + + void wakeDependents(DynInstPtr &inst); + + void squash(const InstSeqNum &squashed_num); + + void violation(DynInstPtr &store_inst, DynInstPtr &violating_load); + + // Will want to make this operation relatively fast. Right now it + // kind of sucks. + DynInstPtr &top(); + + void pop(); + + inline bool empty() + { return readyInsts.empty(); } + + private: + typedef typename std::set<InstSeqNum>::iterator sn_it_t; + typedef typename std::map<InstSeqNum, DynInstPtr>::iterator dyn_it_t; + + // Forward declarations so that the following two typedefs work. + class Dependency; + class ltDependency; + + typedef typename std::set<Dependency, ltDependency>::iterator dep_it_t; + typedef typename std::map<InstSeqNum, vector<dep_it_t> >::iterator + sd_it_t; + + struct Dependency { + Dependency(const InstSeqNum &_seqNum) + : seqNum(_seqNum), regsReady(0), memDepReady(0) + { } + + Dependency(const InstSeqNum &_seqNum, bool _regsReady, + bool _memDepReady) + : seqNum(_seqNum), regsReady(_regsReady), + memDepReady(_memDepReady) + { } + + InstSeqNum seqNum; + mutable bool regsReady; + mutable bool memDepReady; + mutable sd_it_t storeDep; + }; + + struct ltDependency { + bool operator() (const Dependency &lhs, const Dependency &rhs) + { + return lhs.seqNum < rhs.seqNum; + } + }; + + + private: + inline void moveToReady(dep_it_t &woken_inst); + + private: + /** List of instructions that have passed through rename, yet are still + * waiting on either a memory dependence to resolve or source registers to + * become available before they can issue. + */ + std::set<Dependency, ltDependency> waitingInsts; + + /** List of instructions that have all their predicted memory dependences + * resolved and their source registers ready. + */ + std::set<InstSeqNum> readyInsts; + + // Change this to hold a vector of iterators, which will point to the + // entry of the waiting instructions. + /** List of stores' sequence numbers, each of which has a vector of + * iterators. The iterators point to the appropriate node within + * waitingInsts that has the depenendent instruction. + */ + std::map<InstSeqNum, vector<dep_it_t> > storeDependents; + + // For now will implement this as a map...hash table might not be too + // bad, or could move to something that mimics the current dependency + // graph. + std::map<InstSeqNum, DynInstPtr> memInsts; + + // Iterator pointer to the top instruction which has is ready. + // Is set by the top() call. + dyn_it_t topInst; + + /** The memory dependence predictor. It is accessed upon new + * instructions being added to the IQ, and responds by telling + * this unit what instruction the newly added instruction is dependent + * upon. + */ + MemDepPred depPred; + + Stats::Scalar<> insertedLoads; + Stats::Scalar<> insertedStores; + Stats::Scalar<> conflictingLoads; + Stats::Scalar<> conflictingStores; +}; + +#endif diff --git a/cpu/beta_cpu/mem_dep_unit_impl.hh b/cpu/beta_cpu/mem_dep_unit_impl.hh new file mode 100644 index 000000000..4161ac2a8 --- /dev/null +++ b/cpu/beta_cpu/mem_dep_unit_impl.hh @@ -0,0 +1,392 @@ + +#include <map> + +#include "cpu/beta_cpu/mem_dep_unit.hh" + +template <class MemDepPred, class Impl> +MemDepUnit<MemDepPred, Impl>::MemDepUnit(Params ¶ms) + : depPred(params.SSITSize, params.LFSTSize) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Creating MemDepUnit object.\n"); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::regStats() +{ + insertedLoads + .name(name() + ".memDep.insertedLoads") + .desc("Number of loads inserted to the mem dependence unit."); + + insertedStores + .name(name() + ".memDep.insertedStores") + .desc("Number of stores inserted to the mem dependence unit."); + + conflictingLoads + .name(name() + ".memDep.conflictingLoads") + .desc("Number of conflicting loads."); + + conflictingStores + .name(name() + ".memDep.conflictingStores") + .desc("Number of conflicting stores."); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency unresolved_dependencies(inst_seq_num); + + InstSeqNum producing_store = depPred.checkInst(inst->readPC()); + + if (producing_store == 0 || + storeDependents.find(producing_store) == storeDependents.end()) { + + DPRINTF(MemDepUnit, "MemDepUnit: No dependency for inst PC " + "%#x.\n", inst->readPC()); + + unresolved_dependencies.storeDep = storeDependents.end(); + + if (inst->readyToIssue()) { + readyInsts.insert(inst_seq_num); + } else { + unresolved_dependencies.memDepReady = true; + + waitingInsts.insert(unresolved_dependencies); + } + } else { + DPRINTF(MemDepUnit, "MemDepUnit: Adding to dependency list; " + "inst PC %#x is dependent on seq num %i.\n", + inst->readPC(), producing_store); + + if (inst->readyToIssue()) { + unresolved_dependencies.regsReady = true; + } + + // Find the store that this instruction is dependent on. + sd_it_t store_loc = storeDependents.find(producing_store); + + assert(store_loc != storeDependents.end()); + + // Record the location of the store that this instruction is + // dependent on. + unresolved_dependencies.storeDep = store_loc; + + // If it's not already ready, then add it to the renamed + // list and the dependencies. + dep_it_t inst_loc = + (waitingInsts.insert(unresolved_dependencies)).first; + + // Add this instruction to the list of dependents. + (*store_loc).second.push_back(inst_loc); + + assert(!(*store_loc).second.empty()); + + if (inst->isLoad()) { + ++conflictingLoads; + } else { + ++conflictingStores; + } + } + + if (inst->isStore()) { + DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n", + inst->readPC()); + + depPred.insertStore(inst->readPC(), inst_seq_num); + + // Make sure this store isn't already in this list. + assert(storeDependents.find(inst_seq_num) == storeDependents.end()); + + // Put a dependency entry in at the store's sequence number. + // Uh, not sure how this works...I want to create an entry but + // I don't have anything to put into the value yet. + storeDependents[inst_seq_num]; + + assert(storeDependents.size() != 0); + + ++insertedStores; + + } else if (inst->isLoad()) { + ++insertedLoads; + } else { + panic("MemDepUnit: Unknown type! (most likely a barrier)."); + } + + memInsts[inst_seq_num] = inst; +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency non_spec_inst(inst_seq_num); + + non_spec_inst.storeDep = storeDependents.end(); + + waitingInsts.insert(non_spec_inst); + + // Might want to turn this part into an inline function or something. + // It's shared between both insert functions. + if (inst->isStore()) { + DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n", + inst->readPC()); + + depPred.insertStore(inst->readPC(), inst_seq_num); + + // Make sure this store isn't already in this list. + assert(storeDependents.find(inst_seq_num) == storeDependents.end()); + + // Put a dependency entry in at the store's sequence number. + // Uh, not sure how this works...I want to create an entry but + // I don't have anything to put into the value yet. + storeDependents[inst_seq_num]; + + assert(storeDependents.size() != 0); + + ++insertedStores; + + } else if (inst->isLoad()) { + ++insertedLoads; + } else { + panic("MemDepUnit: Unknown type! (most likely a barrier)."); + } + + memInsts[inst_seq_num] = inst; +} + +template <class MemDepPred, class Impl> +typename Impl::DynInstPtr & +MemDepUnit<MemDepPred, Impl>::top() +{ + topInst = memInsts.find( (*readyInsts.begin()) ); + + DPRINTF(MemDepUnit, "MemDepUnit: Top instruction is PC %#x.\n", + (*topInst).second->readPC()); + + return (*topInst).second; +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::pop() +{ + DPRINTF(MemDepUnit, "MemDepUnit: Removing instruction PC %#x.\n", + (*topInst).second->readPC()); + + wakeDependents((*topInst).second); + + issue((*topInst).second); + + memInsts.erase(topInst); + + topInst = memInsts.end(); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::regsReady(DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Marking registers as ready for " + "instruction PC %#x.\n", + inst->readPC()); + + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency inst_to_find(inst_seq_num); + + dep_it_t waiting_inst = waitingInsts.find(inst_to_find); + + assert(waiting_inst != waitingInsts.end()); + + if ((*waiting_inst).memDepReady) { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction has its memory " + "dependencies resolved, adding it to the ready list.\n"); + + moveToReady(waiting_inst); + } else { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction still waiting on " + "memory dependency.\n"); + + (*waiting_inst).regsReady = true; + } +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Marking non speculative " + "instruction PC %#x as ready.\n", + inst->readPC()); + + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency inst_to_find(inst_seq_num); + + dep_it_t waiting_inst = waitingInsts.find(inst_to_find); + + assert(waiting_inst != waitingInsts.end()); + + moveToReady(waiting_inst); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::issue(DynInstPtr &inst) +{ + assert(readyInsts.find(inst->seqNum) != readyInsts.end()); + + DPRINTF(MemDepUnit, "MemDepUnit: Issuing instruction PC %#x.\n", + inst->readPC()); + + // Remove the instruction from the ready list. + readyInsts.erase(inst->seqNum); + + depPred.issued(inst->readPC(), inst->seqNum, inst->isStore()); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst) +{ + // Only stores have dependents. + if (!inst->isStore()) { + return; + } + + // Wake any dependencies. + sd_it_t sd_it = storeDependents.find(inst->seqNum); + + // If there's no entry, then return. Really there should only be + // no entry if the instruction is a load. + if (sd_it == storeDependents.end()) { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction PC %#x, sequence " + "number %i has no dependents.\n", + inst->readPC(), inst->seqNum); + + return; + } + + for (int i = 0; i < (*sd_it).second.size(); ++i ) { + dep_it_t woken_inst = (*sd_it).second[i]; + + DPRINTF(MemDepUnit, "MemDepUnit: Waking up a dependent inst, " + "sequence number %i.\n", + (*woken_inst).seqNum); +#if 0 + // Should we have reached instructions that are actually squashed, + // there will be no more useful instructions in this dependency + // list. Break out early. + if (waitingInsts.find(woken_inst) == waitingInsts.end()) { + DPRINTF(MemDepUnit, "MemDepUnit: Dependents on inst PC %#x " + "are squashed, starting at SN %i. Breaking early.\n", + inst->readPC(), woken_inst); + break; + } +#endif + + if ((*woken_inst).regsReady) { + moveToReady(woken_inst); + } else { + (*woken_inst).memDepReady = true; + } + } + + storeDependents.erase(sd_it); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num) +{ + + if (!waitingInsts.empty()) { + dep_it_t waiting_it = waitingInsts.end(); + + --waiting_it; + + // Remove entries from the renamed list as long as we haven't reached + // the end and the entries continue to be younger than the squashed. + while (!waitingInsts.empty() && + (*waiting_it).seqNum > squashed_num) + { + if (!(*waiting_it).memDepReady && + (*waiting_it).storeDep != storeDependents.end()) { + sd_it_t sd_it = (*waiting_it).storeDep; + + // Make sure the iterator that the store has pointing + // back is actually to this instruction. + assert((*sd_it).second.back() == waiting_it); + + // Now remove this from the store's list of dependent + // instructions. + (*sd_it).second.pop_back(); + } + + waitingInsts.erase(waiting_it--); + } + } + + if (!readyInsts.empty()) { + sn_it_t ready_it = readyInsts.end(); + + --ready_it; + + // Same for the ready list. + while (!readyInsts.empty() && + (*ready_it) > squashed_num) + { + readyInsts.erase(ready_it--); + } + } + + if (!storeDependents.empty()) { + sd_it_t dep_it = storeDependents.end(); + + --dep_it; + + // Same for the dependencies list. + while (!storeDependents.empty() && + (*dep_it).first > squashed_num) + { + // This store's list of dependent instructions should be empty. + assert((*dep_it).second.empty()); + + storeDependents.erase(dep_it--); + } + } + + // Tell the dependency predictor to squash as well. + depPred.squash(squashed_num); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::violation(DynInstPtr &store_inst, + DynInstPtr &violating_load) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Passing violating PCs to store sets," + " load: %#x, store: %#x\n", violating_load->readPC(), + store_inst->readPC()); + // Tell the memory dependence unit of the violation. + depPred.violation(violating_load->readPC(), store_inst->readPC()); +} + +template <class MemDepPred, class Impl> +inline void +MemDepUnit<MemDepPred, Impl>::moveToReady(dep_it_t &woken_inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Adding instruction sequence number %i " + "to the ready list.\n", (*woken_inst).seqNum); + + // Add it to the ready list. + readyInsts.insert((*woken_inst).seqNum); + + // Remove it from the waiting instructions. + waitingInsts.erase(woken_inst); +} diff --git a/cpu/beta_cpu/ras.cc b/cpu/beta_cpu/ras.cc new file mode 100644 index 000000000..ca05f5a0d --- /dev/null +++ b/cpu/beta_cpu/ras.cc @@ -0,0 +1,42 @@ +#include "cpu/beta_cpu/ras.hh" + +ReturnAddrStack::ReturnAddrStack(unsigned _numEntries) + : numEntries(_numEntries), usedEntries(0), + tos(0) +{ + addrStack = new Addr[numEntries](0); +} + +void +ReturnAddrStack::push(const Addr &return_addr) +{ + incrTos(); + + addrStack[tos] = return_addr; + + if (usedEntries != numEntries) { + ++usedEntries; + } +} + +void +ReturnAddrStack::pop() +{ + // Not sure it's possible to really track usedEntries properly. +// assert(usedEntries > 0); + + if (usedEntries > 0) { + --usedEntries; + } + + decrTos(); +} + +void +ReturnAddrStack::restore(unsigned top_entry_idx, + const Addr &restored_target) +{ + tos = top_entry_idx; + + addrStack[tos] = restored_target; +} diff --git a/cpu/beta_cpu/ras.hh b/cpu/beta_cpu/ras.hh new file mode 100644 index 000000000..7666f825f --- /dev/null +++ b/cpu/beta_cpu/ras.hh @@ -0,0 +1,40 @@ +#ifndef __RAS_HH__ +#define __RAS_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +class ReturnAddrStack +{ + public: + ReturnAddrStack(unsigned numEntries); + + Addr top() + { return addrStack[tos]; } + + unsigned topIdx() + { return tos; } + + void push(const Addr &return_addr); + + void pop(); + + void restore(unsigned top_entry_idx, const Addr &restored_target); + + private: + inline void incrTos() + { tos = (tos + 1) % numEntries; } + + inline void decrTos() + { tos = (tos == 0 ? numEntries - 1 : tos - 1); } + + Addr *addrStack; + + unsigned numEntries; + + unsigned usedEntries; + + unsigned tos; +}; + +#endif // __RAS_HH__ diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh new file mode 100644 index 000000000..a81ed63bc --- /dev/null +++ b/cpu/beta_cpu/regfile.hh @@ -0,0 +1,599 @@ +#ifndef __REGFILE_HH__ +#define __REGFILE_HH__ + +// @todo: Destructor + +#include "arch/alpha/isa_traits.hh" +#include "cpu/beta_cpu/comm.hh" + +#include "base/trace.hh" + +// This really only depends on the ISA, and not the Impl. It might be nicer +// to see if I can make it depend on nothing... +// Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, +// and should go in the AlphaFullCPU. + +template <class Impl> +class PhysRegFile +{ + //Note that most of the definitions of the IntReg, FloatReg, etc. exist + //within the Impl/ISA class and not within this PhysRegFile class. + + //Will need some way to allow stuff like swap_palshadow to access the + //correct registers. Might require code changes to swap_palshadow and + //other execution contexts. + + //Will make these registers public for now, but they probably should + //be private eventually with some accessor functions. + public: + typedef typename Impl::ISA ISA; + + PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs); + + //Everything below should be pretty well identical to the normal + //register file that exists within AlphaISA class. + //The duplication is unfortunate but it's better than having + //different ways to access certain registers. + + //Add these in later when everything else is in place +// void serialize(std::ostream &os); +// void unserialize(Checkpoint *cp, const std::string §ion); + + uint64_t readIntReg(PhysRegIndex reg_idx) + { + assert(reg_idx < numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Access to int register %i, has data " + "%i\n", int(reg_idx), intRegFile[reg_idx]); + return intRegFile[reg_idx]; + } + + float readFloatRegSingle(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Access to float register %i as single, has " + "data %8.8f\n", int(reg_idx), (float)floatRegFile[reg_idx].d); + + return (float)floatRegFile[reg_idx].d; + } + + double readFloatRegDouble(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Access to float register %i as double, has " + " data %8.8f\n", int(reg_idx), floatRegFile[reg_idx].d); + + return floatRegFile[reg_idx].d; + } + + uint64_t readFloatRegInt(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Access to float register %i as int, has data " + "%lli\n", int(reg_idx), floatRegFile[reg_idx].q); + + return floatRegFile[reg_idx].q; + } + + void setIntReg(PhysRegIndex reg_idx, uint64_t val) + { + assert(reg_idx < numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting int register %i to %lli\n", + int(reg_idx), val); + + intRegFile[reg_idx] = val; + } + + void setFloatRegSingle(PhysRegIndex reg_idx, float val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n", + int(reg_idx), val); + + floatRegFile[reg_idx].d = (double)val; + } + + void setFloatRegDouble(PhysRegIndex reg_idx, double val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n", + int(reg_idx), val); + + floatRegFile[reg_idx].d = val; + } + + void setFloatRegInt(PhysRegIndex reg_idx, uint64_t val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", + int(reg_idx), val); + + floatRegFile[reg_idx].q = val; + } + + uint64_t readPC() + { + return pc; + } + + void setPC(uint64_t val) + { + pc = val; + } + + void setNextPC(uint64_t val) + { + npc = val; + } + + //Consider leaving this stuff and below in some implementation specific + //file as opposed to the general register file. Or have a derived class. + uint64_t readUniq() + { + return miscRegs.uniq; + } + + void setUniq(uint64_t val) + { + miscRegs.uniq = val; + } + + uint64_t readFpcr() + { + return miscRegs.fpcr; + } + + void setFpcr(uint64_t val) + { + miscRegs.fpcr = val; + } + +#ifdef FULL_SYSTEM + uint64_t readIpr(int idx, Fault &fault); + Fault setIpr(int idx, uint64_t val); + int readIntrFlag() { return intrflag; } + void setIntrFlag(int val) { intrflag = val; } +#endif + + // These should be private eventually, but will be public for now + // so that I can hack around the initregs issue. + public: + /** (signed) integer register file. */ + IntReg *intRegFile; + + /** Floating point register file. */ + FloatReg *floatRegFile; + + /** Miscellaneous register file. */ + MiscRegFile miscRegs; + + Addr pc; // program counter + Addr npc; // next-cycle program counter + + private: + unsigned numPhysicalIntRegs; + unsigned numPhysicalFloatRegs; +}; + +template <class Impl> +PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs) + : numPhysicalIntRegs(_numPhysicalIntRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs) +{ + intRegFile = new IntReg[numPhysicalIntRegs]; + floatRegFile = new FloatReg[numPhysicalFloatRegs]; + + memset(intRegFile, 0, sizeof(*intRegFile)); + memset(floatRegFile, 0, sizeof(*floatRegFile)); +} + +#ifdef FULL_SYSTEM + +//Problem: This code doesn't make sense at the RegFile level because it +//needs things such as the itb and dtb. Either put it at the CPU level or +//the DynInst level. +template <class Impl> +uint64_t +PhysRegFile<Impl>::readIpr(int idx, Fault &fault) +{ + uint64_t retval = 0; // return value, default 0 + + switch (idx) { + case ISA::IPR_PALtemp0: + case ISA::IPR_PALtemp1: + case ISA::IPR_PALtemp2: + case ISA::IPR_PALtemp3: + case ISA::IPR_PALtemp4: + case ISA::IPR_PALtemp5: + case ISA::IPR_PALtemp6: + case ISA::IPR_PALtemp7: + case ISA::IPR_PALtemp8: + case ISA::IPR_PALtemp9: + case ISA::IPR_PALtemp10: + case ISA::IPR_PALtemp11: + case ISA::IPR_PALtemp12: + case ISA::IPR_PALtemp13: + case ISA::IPR_PALtemp14: + case ISA::IPR_PALtemp15: + case ISA::IPR_PALtemp16: + case ISA::IPR_PALtemp17: + case ISA::IPR_PALtemp18: + case ISA::IPR_PALtemp19: + case ISA::IPR_PALtemp20: + case ISA::IPR_PALtemp21: + case ISA::IPR_PALtemp22: + case ISA::IPR_PALtemp23: + case ISA::IPR_PAL_BASE: + + case ISA::IPR_IVPTBR: + case ISA::IPR_DC_MODE: + case ISA::IPR_MAF_MODE: + case ISA::IPR_ISR: + case ISA::IPR_EXC_ADDR: + case ISA::IPR_IC_PERR_STAT: + case ISA::IPR_DC_PERR_STAT: + case ISA::IPR_MCSR: + case ISA::IPR_ASTRR: + case ISA::IPR_ASTER: + case ISA::IPR_SIRR: + case ISA::IPR_ICSR: + case ISA::IPR_ICM: + case ISA::IPR_DTB_CM: + case ISA::IPR_IPLR: + case ISA::IPR_INTID: + case ISA::IPR_PMCTR: + // no side-effect + retval = ipr[idx]; + break; + + case ISA::IPR_CC: + retval |= ipr[idx] & ULL(0xffffffff00000000); + retval |= curTick & ULL(0x00000000ffffffff); + break; + + case ISA::IPR_VA: + // SFX: unlocks interrupt status registers + retval = ipr[idx]; + + if (!misspeculating()) + regs.intrlock = false; + break; + + case ISA::IPR_VA_FORM: + case ISA::IPR_MM_STAT: + case ISA::IPR_IFAULT_VA_FORM: + case ISA::IPR_EXC_MASK: + case ISA::IPR_EXC_SUM: + retval = ipr[idx]; + break; + + case ISA::IPR_DTB_PTE: + { + ISA::PTE &pte = dtb->index(!misspeculating()); + + retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; + retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; + retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; + retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; + retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; + retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; + retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; + } + break; + + // write only registers + case ISA::IPR_HWINT_CLR: + case ISA::IPR_SL_XMIT: + case ISA::IPR_DC_FLUSH: + case ISA::IPR_IC_FLUSH: + case ISA::IPR_ALT_MODE: + case ISA::IPR_DTB_IA: + case ISA::IPR_DTB_IAP: + case ISA::IPR_ITB_IA: + case ISA::IPR_ITB_IAP: + fault = Unimplemented_Opcode_Fault; + break; + + default: + // invalid IPR + fault = Unimplemented_Opcode_Fault; + break; + } + + return retval; +} + +#ifdef DEBUG +// Cause the simulator to break when changing to the following IPL +int break_ipl = -1; +#endif + +template <class Impl> +Fault +PhysRegFile<Impl>::setIpr(int idx, uint64_t val) +{ + uint64_t old; + + if (misspeculating()) + return No_Fault; + + switch (idx) { + case ISA::IPR_PALtemp0: + case ISA::IPR_PALtemp1: + case ISA::IPR_PALtemp2: + case ISA::IPR_PALtemp3: + case ISA::IPR_PALtemp4: + case ISA::IPR_PALtemp5: + case ISA::IPR_PALtemp6: + case ISA::IPR_PALtemp7: + case ISA::IPR_PALtemp8: + case ISA::IPR_PALtemp9: + case ISA::IPR_PALtemp10: + case ISA::IPR_PALtemp11: + case ISA::IPR_PALtemp12: + case ISA::IPR_PALtemp13: + case ISA::IPR_PALtemp14: + case ISA::IPR_PALtemp15: + case ISA::IPR_PALtemp16: + case ISA::IPR_PALtemp17: + case ISA::IPR_PALtemp18: + case ISA::IPR_PALtemp19: + case ISA::IPR_PALtemp20: + case ISA::IPR_PALtemp21: + case ISA::IPR_PALtemp22: + case ISA::IPR_PAL_BASE: + case ISA::IPR_IC_PERR_STAT: + case ISA::IPR_DC_PERR_STAT: + case ISA::IPR_PMCTR: + // write entire quad w/ no side-effect + ipr[idx] = val; + break; + + case ISA::IPR_CC_CTL: + // This IPR resets the cycle counter. We assume this only + // happens once... let's verify that. + assert(ipr[idx] == 0); + ipr[idx] = 1; + break; + + case ISA::IPR_CC: + // This IPR only writes the upper 64 bits. It's ok to write + // all 64 here since we mask out the lower 32 in rpcc (see + // isa_desc). + ipr[idx] = val; + break; + + case ISA::IPR_PALtemp23: + // write entire quad w/ no side-effect + old = ipr[idx]; + ipr[idx] = val; + kernelStats.context(old, val); + break; + + case ISA::IPR_DTB_PTE: + // write entire quad w/ no side-effect, tag is forthcoming + ipr[idx] = val; + break; + + case ISA::IPR_EXC_ADDR: + // second least significant bit in PC is always zero + ipr[idx] = val & ~2; + break; + + case ISA::IPR_ASTRR: + case ISA::IPR_ASTER: + // only write least significant four bits - privilege mask + ipr[idx] = val & 0xf; + break; + + case ISA::IPR_IPLR: +#ifdef DEBUG + if (break_ipl != -1 && break_ipl == (val & 0x1f)) + debug_break(); +#endif + + // only write least significant five bits - interrupt level + ipr[idx] = val & 0x1f; + kernelStats.swpipl(ipr[idx]); + break; + + case ISA::IPR_DTB_CM: + kernelStats.mode((val & 0x18) != 0); + + case ISA::IPR_ICM: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case ISA::IPR_ALT_MODE: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case ISA::IPR_MCSR: + // more here after optimization... + ipr[idx] = val; + break; + + case ISA::IPR_SIRR: + // only write software interrupt mask + ipr[idx] = val & 0x7fff0; + break; + + case ISA::IPR_ICSR: + ipr[idx] = val & ULL(0xffffff0300); + break; + + case ISA::IPR_IVPTBR: + case ISA::IPR_MVPTBR: + ipr[idx] = val & ULL(0xffffffffc0000000); + break; + + case ISA::IPR_DC_TEST_CTL: + ipr[idx] = val & 0x1ffb; + break; + + case ISA::IPR_DC_MODE: + case ISA::IPR_MAF_MODE: + ipr[idx] = val & 0x3f; + break; + + case ISA::IPR_ITB_ASN: + ipr[idx] = val & 0x7f0; + break; + + case ISA::IPR_DTB_ASN: + ipr[idx] = val & ULL(0xfe00000000000000); + break; + + case ISA::IPR_EXC_SUM: + case ISA::IPR_EXC_MASK: + // any write to this register clears it + ipr[idx] = 0; + break; + + case ISA::IPR_INTID: + case ISA::IPR_SL_RCV: + case ISA::IPR_MM_STAT: + case ISA::IPR_ITB_PTE_TEMP: + case ISA::IPR_DTB_PTE_TEMP: + // read-only registers + return Unimplemented_Opcode_Fault; + + case ISA::IPR_HWINT_CLR: + case ISA::IPR_SL_XMIT: + case ISA::IPR_DC_FLUSH: + case ISA::IPR_IC_FLUSH: + // the following are write only + ipr[idx] = val; + break; + + case ISA::IPR_DTB_IA: + // really a control write + ipr[idx] = 0; + + dtb->flushAll(); + break; + + case ISA::IPR_DTB_IAP: + // really a control write + ipr[idx] = 0; + + dtb->flushProcesses(); + break; + + case ISA::IPR_DTB_IS: + // really a control write + ipr[idx] = val; + + dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN])); + break; + + case ISA::IPR_DTB_TAG: { + struct ISA::PTE pte; + + // FIXME: granularity hints NYI... + if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]); + pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]); + pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]); + pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]); + pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]); + pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]); + pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]); + + // insert new TAG/PTE value into data TLB + dtb->insert(val, pte); + } + break; + + case ISA::IPR_ITB_PTE: { + struct ISA::PTE pte; + + // FIXME: granularity hints NYI... + if (ITB_PTE_GH(val) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = ITB_PTE_PPN(val); + pte.xre = ITB_PTE_XRE(val); + pte.xwe = 0; + pte.fonr = ITB_PTE_FONR(val); + pte.fonw = ITB_PTE_FONW(val); + pte.asma = ITB_PTE_ASMA(val); + pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]); + + // insert new TAG/PTE value into data TLB + itb->insert(ipr[ISA::IPR_ITB_TAG], pte); + } + break; + + case ISA::IPR_ITB_IA: + // really a control write + ipr[idx] = 0; + + itb->flushAll(); + break; + + case ISA::IPR_ITB_IAP: + // really a control write + ipr[idx] = 0; + + itb->flushProcesses(); + break; + + case ISA::IPR_ITB_IS: + // really a control write + ipr[idx] = val; + + itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN])); + break; + + default: + // invalid IPR + return Unimplemented_Opcode_Fault; + } + + // no error... + return No_Fault; +} + +#endif // #ifdef FULL_SYSTEM + +#endif // __REGFILE_HH__ diff --git a/cpu/beta_cpu/rename.cc b/cpu/beta_cpu/rename.cc new file mode 100644 index 000000000..1feec4342 --- /dev/null +++ b/cpu/beta_cpu/rename.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/rename_impl.hh" + +template class SimpleRename<AlphaSimpleImpl>; diff --git a/cpu/beta_cpu/rename.hh b/cpu/beta_cpu/rename.hh new file mode 100644 index 000000000..3e9899718 --- /dev/null +++ b/cpu/beta_cpu/rename.hh @@ -0,0 +1,206 @@ +// Todo: +// Fix up trap and barrier handling. +// May want to have different statuses to differentiate the different stall +// conditions. + +#ifndef __CPU_BETA_CPU_SIMPLE_RENAME_HH__ +#define __CPU_BETA_CPU_SIMPLE_RENAME_HH__ + +#include <list> + +#include "base/statistics.hh" +#include "base/timebuf.hh" + +// Will need rename maps for both the int reg file and fp reg file. +// Or change rename map class to handle both. (RegFile handles both.) +template<class Impl> +class SimpleRename +{ + public: + // Typedefs from the Impl. + typedef typename Impl::ISA ISA; + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + // Typedefs from the CPUPol + typedef typename CPUPol::FreeList FreeList; + typedef typename CPUPol::RenameMap RenameMap; + + // Typedefs from the ISA. + typedef typename ISA::Addr Addr; + + public: + // Rename will block if ROB becomes full or issue queue becomes full, + // or there are no free registers to rename to. + // Only case where rename squashes is if IEW squashes. + enum Status { + Running, + Idle, + Squashing, + Blocked, + Unblocking, + BarrierStall + }; + + private: + Status _status; + + public: + SimpleRename(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr); + + void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr); + + void setRenameMap(RenameMap *rm_ptr); + + void setFreeList(FreeList *fl_ptr); + + void dumpHistory(); + + void tick(); + + void rename(); + + void squash(); + + private: + void block(); + + inline void unblock(); + + void doSquash(); + + void removeFromHistory(InstSeqNum inst_seq_num); + + inline void renameSrcRegs(DynInstPtr &inst); + + inline void renameDestRegs(DynInstPtr &inst); + + inline int calcFreeROBEntries(); + + inline int calcFreeIQEntries(); + + /** Holds the previous information for each rename. + * Note that often times the inst may have been deleted, so only access + * the pointer for the address and do not dereference it. + */ + struct RenameHistory { + RenameHistory(InstSeqNum _instSeqNum, RegIndex _archReg, + PhysRegIndex _newPhysReg, PhysRegIndex _prevPhysReg) + : instSeqNum(_instSeqNum), archReg(_archReg), + newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg), + placeHolder(false) + { + } + + /** Constructor used specifically for cases where a place holder + * rename history entry is being made. + */ + RenameHistory(InstSeqNum _instSeqNum) + : instSeqNum(_instSeqNum), archReg(0), newPhysReg(0), + prevPhysReg(0), placeHolder(true) + { + } + + InstSeqNum instSeqNum; + RegIndex archReg; + PhysRegIndex newPhysReg; + PhysRegIndex prevPhysReg; + bool placeHolder; + }; + + std::list<RenameHistory> historyBuffer; + + /** CPU interface. */ + FullCPU *cpu; + + // Interfaces to objects outside of rename. + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get IEW's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromIEW; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Wire to write infromation heading to previous stages. */ + // Might not be the best name as not only decode will read it. + typename TimeBuffer<TimeStruct>::wire toDecode; + + /** Rename instruction queue. */ + TimeBuffer<RenameStruct> *renameQueue; + + /** Wire to write any information heading to IEW. */ + typename TimeBuffer<RenameStruct>::wire toIEW; + + /** Decode instruction queue interface. */ + TimeBuffer<DecodeStruct> *decodeQueue; + + /** Wire to get decode's output from decode queue. */ + typename TimeBuffer<DecodeStruct>::wire fromDecode; + + /** Skid buffer between rename and decode. */ + std::queue<DecodeStruct> skidBuffer; + + /** Rename map interface. */ + SimpleRenameMap *renameMap; + + /** Free list interface. */ + FreeList *freeList; + + /** Delay between iew and rename, in ticks. */ + int iewToRenameDelay; + + /** Delay between decode and rename, in ticks. */ + int decodeToRenameDelay; + + /** Delay between commit and rename, in ticks. */ + unsigned commitToRenameDelay; + + /** Rename width, in instructions. */ + unsigned renameWidth; + + /** Commit width, in instructions. Used so rename knows how many + * instructions might have freed registers in the previous cycle. + */ + unsigned commitWidth; + + /** The instruction that rename is currently on. It needs to have + * persistent state so that when a stall occurs in the middle of a + * group of instructions, it can restart at the proper instruction. + */ + unsigned numInst; + + Stats::Scalar<> renameSquashCycles; + Stats::Scalar<> renameIdleCycles; + Stats::Scalar<> renameBlockCycles; + Stats::Scalar<> renameUnblockCycles; + Stats::Scalar<> renameRenamedInsts; + Stats::Scalar<> renameSquashedInsts; + Stats::Scalar<> renameROBFullEvents; + Stats::Scalar<> renameIQFullEvents; + Stats::Scalar<> renameFullRegistersEvents; + Stats::Scalar<> renameRenamedOperands; + Stats::Scalar<> renameRenameLookups; + Stats::Scalar<> renameHBPlaceHolders; + Stats::Scalar<> renameCommittedMaps; + Stats::Scalar<> renameUndoneMaps; + Stats::Scalar<> renameValidUndoneMaps; +}; + +#endif // __CPU_BETA_CPU_SIMPLE_RENAME_HH__ diff --git a/cpu/beta_cpu/rename_impl.hh b/cpu/beta_cpu/rename_impl.hh new file mode 100644 index 000000000..5a8e499e9 --- /dev/null +++ b/cpu/beta_cpu/rename_impl.hh @@ -0,0 +1,743 @@ +#include <list> + +#include "cpu/beta_cpu/rename.hh" + +template <class Impl> +SimpleRename<Impl>::SimpleRename(Params ¶ms) + : iewToRenameDelay(params.iewToRenameDelay), + decodeToRenameDelay(params.decodeToRenameDelay), + commitToRenameDelay(params.commitToRenameDelay), + renameWidth(params.renameWidth), + commitWidth(params.commitWidth), + numInst(0) +{ + _status = Idle; +} + +template <class Impl> +void +SimpleRename<Impl>::regStats() +{ + renameSquashCycles + .name(name() + ".renameSquashCycles") + .desc("Number of cycles rename is squashing") + .prereq(renameSquashCycles); + renameIdleCycles + .name(name() + ".renameIdleCycles") + .desc("Number of cycles rename is idle") + .prereq(renameIdleCycles); + renameBlockCycles + .name(name() + ".renameBlockCycles") + .desc("Number of cycles rename is blocking") + .prereq(renameBlockCycles); + renameUnblockCycles + .name(name() + ".renameUnblockCycles") + .desc("Number of cycles rename is unblocking") + .prereq(renameUnblockCycles); + renameRenamedInsts + .name(name() + ".renameRenamedInsts") + .desc("Number of instructions processed by rename") + .prereq(renameRenamedInsts); + renameSquashedInsts + .name(name() + ".renameSquashedInsts") + .desc("Number of squashed instructions processed by rename") + .prereq(renameSquashedInsts); + renameROBFullEvents + .name(name() + ".renameROBFullEvents") + .desc("Number of times rename has considered the ROB 'full'") + .prereq(renameROBFullEvents); + renameIQFullEvents + .name(name() + ".renameIQFullEvents") + .desc("Number of times rename has considered the IQ 'full'") + .prereq(renameIQFullEvents); + renameFullRegistersEvents + .name(name() + ".renameFullRegisterEvents") + .desc("Number of times there has been no free registers") + .prereq(renameFullRegistersEvents); + renameRenamedOperands + .name(name() + ".renameRenamedOperands") + .desc("Number of destination operands rename has renamed") + .prereq(renameRenamedOperands); + renameRenameLookups + .name(name() + ".renameRenameLookups") + .desc("Number of register rename lookups that rename has made") + .prereq(renameRenameLookups); + renameHBPlaceHolders + .name(name() + ".renameHBPlaceHolders") + .desc("Number of place holders added to the history buffer") + .prereq(renameHBPlaceHolders); + renameCommittedMaps + .name(name() + ".renameCommittedMaps") + .desc("Number of HB maps that are committed") + .prereq(renameCommittedMaps); + renameUndoneMaps + .name(name() + ".renameUndoneMaps") + .desc("Number of HB maps that are undone due to squashing") + .prereq(renameUndoneMaps); + renameValidUndoneMaps + .name(name() + ".renameValidUndoneMaps") + .desc("Number of HB maps that are undone, and are not place holders") + .prereq(renameValidUndoneMaps); +} + +template <class Impl> +void +SimpleRename<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Rename, "Rename: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template <class Impl> +void +SimpleRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(Rename, "Rename: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from IEW stage. + fromIEW = timeBuffer->getWire(-iewToRenameDelay); + + // Setup wire to read infromation from time buffer, from commit stage. + fromCommit = timeBuffer->getWire(-commitToRenameDelay); + + // Setup wire to write information to previous stages. + toDecode = timeBuffer->getWire(0); +} + +template <class Impl> +void +SimpleRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr) +{ + DPRINTF(Rename, "Rename: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to write information to future stages. + toIEW = renameQueue->getWire(0); +} + +template <class Impl> +void +SimpleRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr) +{ + DPRINTF(Rename, "Rename: Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to get information from decode. + fromDecode = decodeQueue->getWire(-decodeToRenameDelay); +} + +template <class Impl> +void +SimpleRename<Impl>::setRenameMap(RenameMap *rm_ptr) +{ + DPRINTF(Rename, "Rename: Setting rename map pointer.\n"); + renameMap = rm_ptr; +} + +template <class Impl> +void +SimpleRename<Impl>::setFreeList(FreeList *fl_ptr) +{ + DPRINTF(Rename, "Rename: Setting free list pointer.\n"); + freeList = fl_ptr; +} + +template <class Impl> +void +SimpleRename<Impl>::dumpHistory() +{ + typename list<RenameHistory>::iterator buf_it = historyBuffer.begin(); + + while (buf_it != historyBuffer.end()) + { + cprintf("Seq num: %i\nArch reg: %i New phys reg: %i Old phys " + "reg: %i\n", (*buf_it).instSeqNum, (int)(*buf_it).archReg, + (int)(*buf_it).newPhysReg, (int)(*buf_it).prevPhysReg); + + buf_it++; + } +} + +template <class Impl> +void +SimpleRename<Impl>::block() +{ + DPRINTF(Rename, "Rename: Blocking.\n"); + // Set status to Blocked. + _status = Blocked; + + // Add the current inputs onto the skid buffer, so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromDecode); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template <class Impl> +inline void +SimpleRename<Impl>::unblock() +{ + DPRINTF(Rename, "Rename: Read instructions out of skid buffer this " + "cycle.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toDecode->renameInfo.stall = true; + } else { + DPRINTF(Rename, "Rename: Done unblocking.\n"); + _status = Running; + } +} + +template <class Impl> +void +SimpleRename<Impl>::doSquash() +{ + typename list<RenameHistory>::iterator hb_it = historyBuffer.begin(); + + InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum; + +#ifdef FULL_SYSTEM + assert(!historyBuffer.empty()); +#else + // After a syscall squashes everything, the history buffer may be empty + // but the ROB may still be squashing instructions. + if (historyBuffer.empty()) { + return; + } +#endif // FULL_SYSTEM + + // Go through the most recent instructions, undoing the mappings + // they did and freeing up the registers. + while ((*hb_it).instSeqNum > squashed_seq_num) + { + assert(hb_it != historyBuffer.end()); + + DPRINTF(Rename, "Rename: Removing history entry with sequence " + "number %i.\n", (*hb_it).instSeqNum); + + // If it's not simply a place holder, then add the registers. + if (!(*hb_it).placeHolder) { + // Tell the rename map to set the architected register to the + // previous physical register that it was renamed to. + renameMap->setEntry(hb_it->archReg, hb_it->prevPhysReg); + + // Put the renamed physical register back on the free list. + freeList->addReg(hb_it->newPhysReg); + + ++renameValidUndoneMaps; + } + + historyBuffer.erase(hb_it++); + + ++renameUndoneMaps; + } +} + +template <class Impl> +void +SimpleRename<Impl>::squash() +{ + DPRINTF(Rename, "Rename: Squashing instructions.\n"); + // Set the status to Squashing. + _status = Squashing; + + numInst = 0; + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } + + doSquash(); +} + +template<class Impl> +void +SimpleRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num) +{ + DPRINTF(Rename, "Rename: Removing a committed instruction from the " + "history buffer, until sequence number %lli.\n", inst_seq_num); + typename list<RenameHistory>::iterator hb_it = historyBuffer.end(); + + --hb_it; + + if (hb_it->instSeqNum > inst_seq_num) { + DPRINTF(Rename, "Rename: Old sequence number encountered. Ensure " + "that a syscall happened recently.\n"); + return; + } + + while ((*hb_it).instSeqNum != inst_seq_num) + { + // Make sure we haven't gone off the end of the list. + assert(hb_it != historyBuffer.end()); + + // In theory instructions at the end of the history buffer + // should be older than the instruction being removed, which + // means they will have a lower sequence number. Also the + // instruction being removed from the history really should + // be the last instruction in the list, as it is the instruction + // that was just committed that is being removed. + assert(hb_it->instSeqNum < inst_seq_num); + DPRINTF(Rename, "Rename: Freeing up older rename of reg %i, sequence" + " number %i.\n", + (*hb_it).prevPhysReg, (*hb_it).instSeqNum); + + if (!(*hb_it).placeHolder) { + freeList->addReg((*hb_it).prevPhysReg); + ++renameCommittedMaps; + } + + historyBuffer.erase(hb_it--); + } + + // Finally free up the previous register of the finished instruction + // itself. + if (!(*hb_it).placeHolder) { + freeList->addReg(hb_it->prevPhysReg); + ++renameCommittedMaps; + } + + historyBuffer.erase(hb_it); +} + +template <class Impl> +inline void +SimpleRename<Impl>::renameSrcRegs(DynInstPtr &inst) +{ + unsigned num_src_regs = inst->numSrcRegs(); + + // Get the architectual register numbers from the source and + // destination operands, and redirect them to the right register. + // Will need to mark dependencies though. + for (int src_idx = 0; src_idx < num_src_regs; src_idx++) + { + RegIndex src_reg = inst->srcRegIdx(src_idx); + + // Look up the source registers to get the phys. register they've + // been renamed to, and set the sources to those registers. + PhysRegIndex renamed_reg = renameMap->lookup(src_reg); + + DPRINTF(Rename, "Rename: Looking up arch reg %i, got " + "physical reg %i.\n", (int)src_reg, (int)renamed_reg); + + inst->renameSrcReg(src_idx, renamed_reg); + + // Either incorporate it into the info passed back, + // or make another function call to see if that register is + // ready or not. + if (renameMap->isReady(renamed_reg)) { + DPRINTF(Rename, "Rename: Register is ready.\n"); + + inst->markSrcRegReady(src_idx); + } + + ++renameRenameLookups; + } +} + +template <class Impl> +inline void +SimpleRename<Impl>::renameDestRegs(DynInstPtr &inst) +{ + typename SimpleRenameMap::RenameInfo rename_result; + + unsigned num_dest_regs = inst->numDestRegs(); + + // If it's an instruction with no destination registers, then put + // a placeholder within the history buffer. It might be better + // to not put it in the history buffer at all (other than branches, + // which always need at least a place holder), and differentiate + // between instructions with and without destination registers + // when getting from commit the instructions that committed. + if (num_dest_regs == 0) { + RenameHistory hb_entry(inst->seqNum); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding placeholder instruction to " + "history buffer, sequence number %lli.\n", + inst->seqNum); + + ++renameHBPlaceHolders; + } else { + + // Rename the destination registers. + for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) + { + RegIndex dest_reg = inst->destRegIdx(dest_idx); + + // Get the physical register that the destination will be + // renamed to. + rename_result = renameMap->rename(dest_reg); + + DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " + "reg %i.\n", (int)dest_reg, + (int)rename_result.first); + + // Record the rename information so that a history can be kept. + RenameHistory hb_entry(inst->seqNum, dest_reg, + rename_result.first, + rename_result.second); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding instruction to history buffer, " + "sequence number %lli.\n", + (*historyBuffer.begin()).instSeqNum); + + // Tell the instruction to rename the appropriate destination + // register (dest_idx) to the new physical register + // (rename_result.first), and record the previous physical + // register that the same logical register was renamed to + // (rename_result.second). + inst->renameDestReg(dest_idx, + rename_result.first, + rename_result.second); + + ++renameRenamedOperands; + } + } +} + +template <class Impl> +inline int +SimpleRename<Impl>::calcFreeROBEntries() +{ + return fromCommit->commitInfo.freeROBEntries - + renameWidth * iewToRenameDelay; +} + +template <class Impl> +inline int +SimpleRename<Impl>::calcFreeIQEntries() +{ + return fromIEW->iewInfo.freeIQEntries - renameWidth * iewToRenameDelay; +} + +template<class Impl> +void +SimpleRename<Impl>::tick() +{ + // Rename will need to try to rename as many instructions as it + // has bandwidth, unless it is blocked. + + // Check if _status is BarrierStall. If so, then check if the number + // of free ROB entries is equal to the number of total ROB entries. + // Once equal then wake this stage up. Set status to unblocking maybe. + + if (_status != Blocked && _status != Squashing) { + DPRINTF(Rename, "Rename: Status is not blocked, will attempt to " + "run stage.\n"); + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(_status == Unblocking ? !skidBuffer.empty() : 1); + + rename(); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + if (_status == Unblocking) { + ++renameUnblockCycles; + + if (fromDecode->size > 0) { + // Add the current inputs onto the skid buffer, so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromDecode); + } + + unblock(); + } + } else if (_status == Blocked) { + ++renameBlockCycles; + + // If stage is blocked and still receiving valid instructions, + // make sure to store them in the skid buffer. + if (fromDecode->size > 0) { + + block(); + + // Continue to tell previous stage to stall. + toDecode->renameInfo.stall = true; + } + + if (!fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall && + calcFreeROBEntries() > 0 && + calcFreeIQEntries() > 0 && + renameMap->numFreeEntries() > 0) { + + // Need to be sure to check all blocking conditions above. + // If they have cleared, then start unblocking. + DPRINTF(Rename, "Rename: Stall signals cleared, going to " + "unblock.\n"); + _status = Unblocking; + + // Continue to tell previous stage to block until this stage + // is done unblocking. + toDecode->renameInfo.stall = true; + } else { + // Otherwise no conditions have changed. Tell previous + // stage to continue blocking. + toDecode->renameInfo.stall = true; + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + return; + } + } else if (_status == Squashing) { + ++renameSquashCycles; + + if (fromCommit->commitInfo.squash) { + squash(); + } else if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + + DPRINTF(Rename, "Rename: Done squashing, going to running.\n"); + _status = Running; + } else { + doSquash(); + } + } + + // Ugly code, revamp all of the tick() functions eventually. + if (fromCommit->commitInfo.doneSeqNum != 0 && _status != Squashing) { +#ifndef FULL_SYSTEM + if (!fromCommit->commitInfo.squash) { + removeFromHistory(fromCommit->commitInfo.doneSeqNum); + } +#else + removeFromHistory(fromCommit->commitInfo.doneSeqNum); +#endif + } + + // Perhaps put this outside of this function, since this will + // happen regardless of whether or not the stage is blocked or + // squashing. + // Read from the time buffer any necessary data. + // Read registers that are freed, and add them to the freelist. + // This is unnecessary due to the history buffer (assuming the history + // buffer works properly). +/* + while(!fromCommit->commitInfo.freeRegs.empty()) + { + PhysRegIndex freed_reg = fromCommit->commitInfo.freeRegs.back(); + DPRINTF(Rename, "Rename: Adding freed register %i to freelist.\n", + (int)freed_reg); + freeList->addReg(freed_reg); + + fromCommit->commitInfo.freeRegs.pop_back(); + } +*/ + +} + +template<class Impl> +void +SimpleRename<Impl>::rename() +{ + // Check if any of the stages ahead of rename are telling rename + // to squash. The squash() function will also take care of fixing up + // the rename map and the free list. + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + DPRINTF(Rename, "Rename: Receiving signal from Commit to squash.\n"); + squash(); + return; + } + + // Check if time buffer is telling this stage to stall. + if (fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) { + DPRINTF(Rename, "Rename: Receiving signal from IEW/Commit to " + "stall.\n"); + block(); + return; + } + + // Check if the current status is squashing. If so, set its status + // to running and resume execution the next cycle. + if (_status == Squashing) { + DPRINTF(Rename, "Rename: Done squashing.\n"); + _status = Running; + return; + } + + // Check the decode queue to see if instructions are available. + // If there are no available instructions to rename, then do nothing. + // Or, if the stage is currently unblocking, then go ahead and run it. + if (fromDecode->size == 0 && _status != Unblocking) { + DPRINTF(Rename, "Rename: Nothing to do, breaking out early.\n"); + // Should I change status to idle? + return; + } + + //////////////////////////////////// + // Actual rename part. + //////////////////////////////////// + + DynInstPtr inst; + + // If we're unblocking, then we may be in the middle of an instruction + // group. Subtract off numInst to get the proper number of instructions + // left. + int insts_available = _status == Unblocking ? + skidBuffer.front().size - numInst : + fromDecode->size; + + bool block_this_cycle = false; + + // Will have to do a different calculation for the number of free + // entries. Number of free entries recorded on this cycle - + // renameWidth * renameToDecodeDelay + int free_rob_entries = calcFreeROBEntries(); + int free_iq_entries = calcFreeIQEntries(); + int min_iq_rob = min(free_rob_entries, free_iq_entries); + + unsigned to_iew_index = 0; + + // Check if there's any space left. + if (min_iq_rob <= 0) { + DPRINTF(Rename, "Rename: Blocking due to no free ROB or IQ " + "entries.\n" + "Rename: ROB has %d free entries.\n" + "Rename: IQ has %d free entries.\n", + free_rob_entries, + free_iq_entries); + block(); + // Tell previous stage to stall. + toDecode->renameInfo.stall = true; + + if (free_rob_entries <= 0) { + ++renameROBFullEvents; + } else { + ++renameIQFullEvents; + } + + return; + } else if (min_iq_rob < insts_available) { + DPRINTF(Rename, "Rename: Will have to block this cycle. Only " + "%i insts can be renamed due to IQ/ROB limits.\n", + min_iq_rob); + + insts_available = min_iq_rob; + + block_this_cycle = true; + + if (free_rob_entries < free_iq_entries) { + ++renameROBFullEvents; + } else { + ++renameIQFullEvents; + } + } + + while (insts_available > 0) { + DPRINTF(Rename, "Rename: Sending instructions to iew.\n"); + + // Get the next instruction either from the skid buffer or the + // decode queue. + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : + fromDecode->insts[numInst]; + + if (inst->isSquashed()) { + DPRINTF(Rename, "Rename: instruction %i with PC %#x is " + "squashed, skipping.\n", + inst->seqNum, inst->readPC()); + + // Go to the next instruction. + ++numInst; + + ++renameSquashedInsts; + + // Decrement how many instructions are available. + --insts_available; + + continue; + } + + DPRINTF(Rename, "Rename: Processing instruction %i with PC %#x.\n", + inst->seqNum, inst->readPC()); + + // If it's a trap instruction, then it needs to wait here within + // rename until the ROB is empty. Needs a way to detect that the + // ROB is empty. Maybe an event? + // Would be nice if it could be avoided putting this into a + // specific stage and instead just put it into the AlphaFullCPU. + // Might not really be feasible though... + // (EXCB, TRAPB) + if (inst->isSerializing()) { + panic("Rename: Serializing instruction encountered.\n"); + DPRINTF(Rename, "Rename: Serializing instruction " + "encountered.\n"); + + // Change status over to BarrierStall so that other stages know + // what this is blocked on. + _status = BarrierStall; + + block_this_cycle = true; + + break; + } + + // Check here to make sure there are enough destination registers + // to rename to. Otherwise block. + if (renameMap->numFreeEntries() < inst->numDestRegs()) + { + DPRINTF(Rename, "Rename: Blocking due to lack of free " + "physical registers to rename to.\n"); + // Need some sort of event based on a register being freed. + + block_this_cycle = true; + + ++renameFullRegistersEvents; + + break; + } + + renameSrcRegs(inst); + + renameDestRegs(inst); + + // Put instruction in rename queue. + toIEW->insts[to_iew_index] = inst; + ++(toIEW->size); + + // Decrease the number of free ROB and IQ entries. + --free_rob_entries; + --free_iq_entries; + + // Increment which instruction we're on. + ++to_iew_index; + ++numInst; + + ++renameRenamedInsts; + + // Decrement how many instructions are available. + --insts_available; + } + + // Check if there's any instructions left that haven't yet been renamed. + // If so then block. + if (block_this_cycle) { + block(); + + toDecode->renameInfo.stall = true; + } else { + // If we had a successful rename and didn't have to exit early, then + // reset numInst so it will refer to the correct instruction on next + // run. + numInst = 0; + } +} diff --git a/cpu/beta_cpu/rename_map.cc b/cpu/beta_cpu/rename_map.cc new file mode 100644 index 000000000..1301202f2 --- /dev/null +++ b/cpu/beta_cpu/rename_map.cc @@ -0,0 +1,315 @@ + +#include "cpu/beta_cpu/rename_map.hh" + +// Todo: Consider making functions inline. Avoid having things that are +// using the zero register or misc registers from adding on the registers +// to the free list. Possibly remove the direct communication between +// this and the freelist. Considering making inline bool functions that +// determine if the register is a logical int, logical fp, physical int, +// physical fp, etc. + +SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + RegIndex _intZeroReg, + RegIndex _floatZeroReg) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numMiscRegs(_numMiscRegs), + intZeroReg(_intZeroReg), + floatZeroReg(_floatZeroReg) +{ + DPRINTF(Rename, "Rename: Creating rename map. Phys: %i / %i, Float: " + "%i / %i.\n", numLogicalIntRegs, numPhysicalIntRegs, + numLogicalFloatRegs, numPhysicalFloatRegs); + + numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs; + + numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs; + + //Create the rename maps, and their scoreboards. + intRenameMap = new RenameEntry[numLogicalIntRegs]; + floatRenameMap = new RenameEntry[numLogicalRegs]; + + // Should combine this into one scoreboard. + intScoreboard.resize(numPhysicalIntRegs); + floatScoreboard.resize(numPhysicalRegs); + miscScoreboard.resize(numPhysicalRegs + numMiscRegs); + + // Initialize the entries in the integer rename map to point to the + // physical registers of the same index, and consider each register + // ready until the first rename occurs. + for (RegIndex index = 0; index < numLogicalIntRegs; ++index) + { + intRenameMap[index].physical_reg = index; + intScoreboard[index] = 1; + } + + // Initialize the rest of the physical registers (the ones that don't + // directly map to a logical register) as unready. + for (PhysRegIndex index = numLogicalIntRegs; + index < numPhysicalIntRegs; + ++index) + { + intScoreboard[index] = 0; + } + + int float_reg_idx = numPhysicalIntRegs; + + // Initialize the entries in the floating point rename map to point to + // the physical registers of the same index, and consider each register + // ready until the first rename occurs. + // Although the index refers purely to architected registers, because + // the floating reg indices come after the integer reg indices, they + // may exceed the size of a normal RegIndex (short). + for (PhysRegIndex index = numLogicalIntRegs; + index < numLogicalRegs; ++index) + { + floatRenameMap[index].physical_reg = float_reg_idx++; + } + + for (PhysRegIndex index = numPhysicalIntRegs; + index < numPhysicalIntRegs + numLogicalFloatRegs; ++index) + { + floatScoreboard[index] = 1; + } + + // Initialize the rest of the physical registers (the ones that don't + // directly map to a logical register) as unready. + for (PhysRegIndex index = numPhysicalIntRegs + numLogicalFloatRegs; + index < numPhysicalRegs; + ++index) + { + floatScoreboard[index] = 0; + } + + // Initialize the entries in the misc register scoreboard to be ready. + for (PhysRegIndex index = numPhysicalRegs; + index < numPhysicalRegs + numMiscRegs; ++index) + { + miscScoreboard[index] = 1; + } +} + +SimpleRenameMap::~SimpleRenameMap() +{ + // Delete the rename maps as they were allocated with new. + delete [] intRenameMap; + delete [] floatRenameMap; +} + +void +SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr) +{ + //Setup the interface to the freelist. + freeList = fl_ptr; +} + + +// Don't allow this stage to fault; force that check to the rename stage. +// Simply ask to rename a logical register and get back a new physical +// register index. +SimpleRenameMap::RenameInfo +SimpleRenameMap::rename(RegIndex arch_reg) +{ + PhysRegIndex renamed_reg; + PhysRegIndex prev_reg; + + if (arch_reg < numLogicalIntRegs) { + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = intRenameMap[arch_reg].physical_reg; + + // If it's not referencing the zero register, then mark the register + // as not ready. + if (arch_reg != intZeroReg) { + // Get a free physical register to rename to. + renamed_reg = freeList->getIntReg(); + + // Update the integer rename map. + intRenameMap[arch_reg].physical_reg = renamed_reg; + + assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs); + + // Mark register as not ready. + intScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = intZeroReg; + } + } else if (arch_reg < numLogicalRegs) { + // Subtract off the base offset for floating point registers. +// arch_reg = arch_reg - numLogicalIntRegs; + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = floatRenameMap[arch_reg].physical_reg; + + // If it's not referencing the zero register, then mark the register + // as not ready. + if (arch_reg != floatZeroReg) { + // Get a free floating point register to rename to. + renamed_reg = freeList->getFloatReg(); + + // Update the floating point rename map. + floatRenameMap[arch_reg].physical_reg = renamed_reg; + + assert(renamed_reg < numPhysicalRegs && + renamed_reg >= numPhysicalIntRegs); + + // Mark register as not ready. + floatScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = floatZeroReg; + } + } else { + // Subtract off the base offset for miscellaneous registers. + arch_reg = arch_reg - numLogicalRegs; + + // No renaming happens to the misc. registers. They are simply the + // registers that come after all the physical registers; thus + // take the base architected register and add the physical registers + // to it. + renamed_reg = arch_reg + numPhysicalRegs; + + // Set the previous register to the same register; mainly it must be + // known that the prev reg was outside the range of normal registers + // so the free list can avoid adding it. + prev_reg = renamed_reg; + + assert(renamed_reg < numPhysicalRegs + numMiscRegs); + + miscScoreboard[renamed_reg] = false; + } + + return RenameInfo(renamed_reg, prev_reg); +} + +//Perhaps give this a pair as a return value, of the physical register +//and whether or not it's ready. +PhysRegIndex +SimpleRenameMap::lookup(RegIndex arch_reg) +{ + if (arch_reg < numLogicalIntRegs) { + return intRenameMap[arch_reg].physical_reg; + } else if (arch_reg < numLogicalRegs) { + // Subtract off the base FP offset. +// arch_reg = arch_reg - numLogicalIntRegs; + + return floatRenameMap[arch_reg].physical_reg; + } else { + // Subtract off the misc registers offset. + arch_reg = arch_reg - numLogicalRegs; + + // Misc. regs don't rename, so simply add the base arch reg to + // the number of physical registers. + return numPhysicalRegs + arch_reg; + } +} + +bool +SimpleRenameMap::isReady(PhysRegIndex phys_reg) +{ + if (phys_reg < numPhysicalIntRegs) { + return intScoreboard[phys_reg]; + } else if (phys_reg < numPhysicalRegs) { + + // Subtract off the base FP offset. +// phys_reg = phys_reg - numPhysicalIntRegs; + + return floatScoreboard[phys_reg]; + } else { + // Subtract off the misc registers offset. +// phys_reg = phys_reg - numPhysicalRegs; + + return miscScoreboard[phys_reg]; + } +} + +// In this implementation the miscellaneous registers do not actually rename, +// so this function does not allow you to try to change their mappings. +void +SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg) +{ + if (arch_reg < numLogicalIntRegs) { + DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n", + (int)arch_reg, renamed_reg); + + intRenameMap[arch_reg].physical_reg = renamed_reg; + } else { + assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); + + DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n", + (int)arch_reg - numLogicalIntRegs, renamed_reg); + + floatRenameMap[arch_reg].physical_reg = renamed_reg; + } +} + +void +SimpleRenameMap::squash(vector<RegIndex> freed_regs, + vector<UnmapInfo> unmaps) +{ + panic("Not sure this function should be called."); + + // Not sure the rename map should be able to access the free list + // like this. + while (!freed_regs.empty()) { + RegIndex free_register = freed_regs.back(); + + if (free_register < numPhysicalIntRegs) { + freeList->addIntReg(free_register); + } else { + // Subtract off the base FP dependence tag. + free_register = free_register - numPhysicalIntRegs; + freeList->addFloatReg(free_register); + } + + freed_regs.pop_back(); + } + + // Take unmap info and roll back the rename map. +} + +void +SimpleRenameMap::markAsReady(PhysRegIndex ready_reg) +{ + DPRINTF(Rename, "Rename map: Marking register %i as ready.\n", + (int)ready_reg); + + if (ready_reg < numPhysicalIntRegs) { + assert(ready_reg >= 0); + + intScoreboard[ready_reg] = 1; + } else if (ready_reg < numPhysicalRegs) { + + // Subtract off the base FP offset. +// ready_reg = ready_reg - numPhysicalIntRegs; + + floatScoreboard[ready_reg] = 1; + } else { + //Subtract off the misc registers offset. +// ready_reg = ready_reg - numPhysicalRegs; + + miscScoreboard[ready_reg] = 1; + } +} + +int +SimpleRenameMap::numFreeEntries() +{ + int free_int_regs = freeList->numFreeIntRegs(); + int free_float_regs = freeList->numFreeFloatRegs(); + + if (free_int_regs < free_float_regs) { + return free_int_regs; + } else { + return free_float_regs; + } +} diff --git a/cpu/beta_cpu/rename_map.hh b/cpu/beta_cpu/rename_map.hh new file mode 100644 index 000000000..44a7eefb1 --- /dev/null +++ b/cpu/beta_cpu/rename_map.hh @@ -0,0 +1,142 @@ +// Todo: Create destructor. +// Have it so that there's a more meaningful name given to the variable +// that marks the beginning of the FP registers. + +#ifndef __CPU_BETA_CPU_RENAME_MAP_HH__ +#define __CPU_BETA_CPU_RENAME_MAP_HH__ + +#include <iostream> +#include <utility> +#include <vector> + +#include "cpu/beta_cpu/free_list.hh" + +class SimpleRenameMap +{ + public: + /** + * Pair of a logical register and a physical register. Tells the + * previous mapping of a logical register to a physical register. + * Used to roll back the rename map to a previous state. + */ + typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo; + + /** + * Pair of a physical register and a physical register. Used to + * return the physical register that a logical register has been + * renamed to, and the previous physical register that the same + * logical register was previously mapped to. + */ + typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo; + + public: + //Constructor + SimpleRenameMap(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + RegIndex _intZeroReg, + RegIndex _floatZeroReg); + + /** Destructor. */ + ~SimpleRenameMap(); + + void setFreeList(SimpleFreeList *fl_ptr); + + //Tell rename map to get a free physical register for a given + //architected register. Not sure it should have a return value, + //but perhaps it should have some sort of fault in case there are + //no free registers. + RenameInfo rename(RegIndex arch_reg); + + PhysRegIndex lookup(RegIndex phys_reg); + + bool isReady(PhysRegIndex arch_reg); + + /** + * Marks the given register as ready, meaning that its value has been + * calculated and written to the register file. + * @params ready_reg The index of the physical register that is now + * ready. + */ + void markAsReady(PhysRegIndex ready_reg); + + void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg); + + void squash(vector<RegIndex> freed_regs, + vector<UnmapInfo> unmaps); + + int numFreeEntries(); + + private: + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Number of miscellaneous registers. */ + int numMiscRegs; + + /** Number of logical integer + float registers. */ + int numLogicalRegs; + + /** Number of physical integer + float registers. */ + int numPhysicalRegs; + + /** The integer zero register. This implementation assumes it is always + * zero and never can be anything else. + */ + RegIndex intZeroReg; + + /** The floating point zero register. This implementation assumes it is + * always zero and never can be anything else. + */ + RegIndex floatZeroReg; + + class RenameEntry + { + public: + PhysRegIndex physical_reg; + bool valid; + + RenameEntry() + : physical_reg(0), valid(false) + { } + }; + + /** Integer rename map. */ + RenameEntry *intRenameMap; + + /** Floating point rename map. */ + RenameEntry *floatRenameMap; + + /** Free list interface. */ + SimpleFreeList *freeList; + + // Might want to make all these scoreboards into one large scoreboard. + + /** Scoreboard of physical integer registers, saying whether or not they + * are ready. + */ + std::vector<bool> intScoreboard; + + /** Scoreboard of physical floating registers, saying whether or not they + * are ready. + */ + std::vector<bool> floatScoreboard; + + /** Scoreboard of miscellaneous registers, saying whether or not they + * are ready. + */ + std::vector<bool> miscScoreboard; +}; + +#endif //__CPU_BETA_CPU_RENAME_MAP_HH__ diff --git a/cpu/beta_cpu/rob.cc b/cpu/beta_cpu/rob.cc new file mode 100644 index 000000000..611cca0ba --- /dev/null +++ b/cpu/beta_cpu/rob.cc @@ -0,0 +1,7 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/rob_impl.hh" + +// Force instantiation of InstructionQueue. +template ROB<AlphaSimpleImpl>; diff --git a/cpu/beta_cpu/rob.hh b/cpu/beta_cpu/rob.hh new file mode 100644 index 000000000..da6b5232a --- /dev/null +++ b/cpu/beta_cpu/rob.hh @@ -0,0 +1,136 @@ +// Todo: Probably add in support for scheduling events (more than one as +// well) on the case of the ROB being empty or full. Considering tracking +// free entries instead of insts in ROB. Differentiate between squashing +// all instructions after the instruction, and all instructions after *and* +// including that instruction. + +#ifndef __CPU_BETA_CPU_ROB_HH__ +#define __CPU_BETA_CPU_ROB_HH__ + +#include <utility> +#include <vector> + +//#include "arch/alpha/isa_traits.hh" + +/** + * ROB class. Uses the instruction list that exists within the CPU to + * represent the ROB. This class doesn't contain that list, but instead + * a pointer to the CPU to get access to the list. The ROB, in this first + * implementation, is largely what drives squashing. + */ +template <class Impl> +class ROB +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + + typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo_t; + typedef typename list<DynInstPtr>::iterator InstIt_t; + + public: + /** ROB constructor. + * @params _numEntries Number of entries in ROB. + * @params _squashWidth Number of instructions that can be squashed in a + * single cycle. + */ + ROB(unsigned _numEntries, unsigned _squashWidth); + + /** Function to set the CPU pointer, necessary due to which object the ROB + * is created within. + * @params cpu_ptr Pointer to the implementation specific full CPU object. + */ + void setCPU(FullCPU *cpu_ptr); + + /** Function to insert an instruction into the ROB. The parameter inst is + * not truly required, but is useful for checking correctness. Note + * that whatever calls this function must ensure that there is enough + * space within the ROB for the new instruction. + * @params inst The instruction being inserted into the ROB. + * @todo Remove the parameter once correctness is ensured. + */ + void insertInst(DynInstPtr &inst); + + /** Returns pointer to the head instruction within the ROB. There is + * no guarantee as to the return value if the ROB is empty. + * @retval Pointer to the DynInst that is at the head of the ROB. + */ + DynInstPtr readHeadInst() { return cpu->instList.front(); } + + DynInstPtr readTailInst() { return (*tail); } + + void retireHead(); + + bool isHeadReady(); + + unsigned numFreeEntries(); + + bool isFull() + { return numInstsInROB == numEntries; } + + bool isEmpty() + { return numInstsInROB == 0; } + + void doSquash(); + + void squash(InstSeqNum squash_num); + + uint64_t readHeadPC(); + + uint64_t readHeadNextPC(); + + InstSeqNum readHeadSeqNum(); + + uint64_t readTailPC(); + + InstSeqNum readTailSeqNum(); + + /** Checks if the ROB is still in the process of squashing instructions. + * @retval Whether or not the ROB is done squashing. + */ + bool isDoneSquashing() const { return doneSquashing; } + + /** This is more of a debugging function than anything. Use + * numInstsInROB to get the instructions in the ROB unless you are + * double checking that variable. + */ + int countInsts(); + + private: + + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Number of instructions in the ROB. */ + unsigned numEntries; + + /** Number of instructions that can be squashed in a single cycle. */ + unsigned squashWidth; + + /** Iterator pointing to the instruction which is the last instruction + * in the ROB. This may at times be invalid (ie when the ROB is empty), + * however it should never be incorrect. + */ + InstIt_t tail; + + /** Iterator used for walking through the list of instructions when + * squashing. Used so that there is persistent state between cycles; + * when squashing, the instructions are marked as squashed but not + * immediately removed, meaning the tail iterator remains the same before + * and after a squash. + * This will always be set to cpu->instList.end() if it is invalid. + */ + InstIt_t squashIt; + + /** Number of instructions in the ROB. */ + int numInstsInROB; + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Is the ROB done squashing. */ + bool doneSquashing; +}; + +#endif //__CPU_BETA_CPU_ROB_HH__ diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh new file mode 100644 index 000000000..86c4e2db1 --- /dev/null +++ b/cpu/beta_cpu/rob_impl.hh @@ -0,0 +1,286 @@ +#ifndef __ROB_IMPL_HH__ +#define __ROB_IMPL_HH__ + +#include "cpu/beta_cpu/rob.hh" + +template <class Impl> +ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth) + : numEntries(_numEntries), + squashWidth(_squashWidth), + numInstsInROB(0), + squashedSeqNum(0) +{ + doneSquashing = true; +} + +template <class Impl> +void +ROB<Impl>::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + // Set the tail to the beginning of the CPU instruction list so that + // upon the first instruction being inserted into the ROB, the tail + // iterator can simply be incremented. + tail = cpu->instList.begin(); + + // Set the squash iterator to the end of the instruction list. + squashIt = cpu->instList.end(); +} + +template <class Impl> +int +ROB<Impl>::countInsts() +{ + // Start at 1; if the tail matches cpu->instList.begin(), then there is + // one inst in the ROB. + int return_val = 1; + + // There are quite a few special cases. Do not use this function other + // than for debugging purposes. + if (cpu->instList.begin() == cpu->instList.end()) { + // In this case there are no instructions in the list. The ROB + // must be empty. + return 0; + } else if (tail == cpu->instList.end()) { + // In this case, the tail is not yet pointing to anything valid. + // The ROB must be empty. + return 0; + } + + // Iterate through the ROB from the head to the tail, counting the + // entries. + for (InstIt_t i = cpu->instList.begin(); i != tail; ++i) + { + assert(i != cpu->instList.end()); + ++return_val; + } + + return return_val; + + // Because the head won't be tracked properly until the ROB gets the + // first instruction, and any time that the ROB is empty and has not + // yet gotten the instruction, this function doesn't work. +// return numInstsInROB; +} + +template <class Impl> +void +ROB<Impl>::insertInst(DynInstPtr &inst) +{ + // Make sure we have the right number of instructions. + assert(numInstsInROB == countInsts()); + // Make sure the instruction is valid. + assert(inst); + + DPRINTF(ROB, "ROB: Adding inst PC %#x to the ROB.\n", inst->readPC()); + + // If the ROB is full then exit. + assert(numInstsInROB != numEntries); + + ++numInstsInROB; + + // Increment the tail iterator, moving it one instruction back. + // There is a special case if the ROB was empty prior to this insertion, + // in which case the tail will be pointing at instList.end(). If that + // happens, then reset the tail to the beginning of the list. + if (tail != cpu->instList.end()) { + ++tail; + } else { + tail = cpu->instList.begin(); + } + + // Make sure the tail iterator is actually pointing at the instruction + // added. + assert((*tail) == inst); + + DPRINTF(ROB, "ROB: Now has %d instructions.\n", numInstsInROB); + +} + +// Whatever calls this function needs to ensure that it properly frees up +// registers prior to this function. +template <class Impl> +void +ROB<Impl>::retireHead() +{ + assert(numInstsInROB == countInsts()); + assert(numInstsInROB > 0); + + DynInstPtr head_inst; + + // Get the head ROB instruction. + head_inst = cpu->instList.front(); + + // Make certain this can retire. + assert(head_inst->readyToCommit()); + + DPRINTF(ROB, "ROB: Retiring head instruction of the ROB, " + "instruction PC %#x, seq num %i\n", head_inst->readPC(), + head_inst->seqNum); + + // Keep track of how many instructions are in the ROB. + --numInstsInROB; + + // Tell CPU to remove the instruction from the list of instructions. + // A special case is needed if the instruction being retired is the + // only instruction in the ROB; otherwise the tail iterator will become + // invalidated. + if (tail == cpu->instList.begin()) { + cpu->removeFrontInst(head_inst); + tail = cpu->instList.end(); + } else { + cpu->removeFrontInst(head_inst); + } +} + +template <class Impl> +bool +ROB<Impl>::isHeadReady() +{ + if (numInstsInROB != 0) { + return cpu->instList.front()->readyToCommit(); + } + + return false; +} + +template <class Impl> +unsigned +ROB<Impl>::numFreeEntries() +{ + assert(numInstsInROB == countInsts()); + + return numEntries - numInstsInROB; +} + +template <class Impl> +void +ROB<Impl>::doSquash() +{ + DPRINTF(ROB, "ROB: Squashing instructions.\n"); + + assert(squashIt != cpu->instList.end()); + + for (int numSquashed = 0; + numSquashed < squashWidth && (*squashIt)->seqNum != squashedSeqNum; + ++numSquashed) + { + // Ensure that the instruction is younger. + assert((*squashIt)->seqNum > squashedSeqNum); + + DPRINTF(ROB, "ROB: Squashing instruction PC %#x, seq num %i.\n", + (*squashIt)->readPC(), (*squashIt)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*squashIt)->setSquashed(); + + (*squashIt)->setCanCommit(); + + // Special case for when squashing due to a syscall. It's possible + // that the squash happened after the head instruction was already + // committed, meaning that (*squashIt)->seqNum != squashedSeqNum + // will never be false. Normally the squash would never be able + // to go past the head of the ROB; in this case it might, so it + // must be handled otherwise it will segfault. +#ifndef FULL_SYSTEM + if (squashIt == cpu->instList.begin()) { + DPRINTF(ROB, "ROB: Reached head of instruction list while " + "squashing.\n"); + + squashIt = cpu->instList.end(); + + doneSquashing = true; + + return; + } +#endif + + // Move the tail iterator to the next instruction. + squashIt--; + } + + + // Check if ROB is done squashing. + if ((*squashIt)->seqNum == squashedSeqNum) { + DPRINTF(ROB, "ROB: Done squashing instructions.\n"); + + squashIt = cpu->instList.end(); + + doneSquashing = true; + } +} + +template <class Impl> +void +ROB<Impl>::squash(InstSeqNum squash_num) +{ + DPRINTF(ROB, "ROB: Starting to squash within the ROB.\n"); + doneSquashing = false; + + squashedSeqNum = squash_num; + + assert(tail != cpu->instList.end()); + + squashIt = tail; + + doSquash(); +} + +template <class Impl> +uint64_t +ROB<Impl>::readHeadPC() +{ + assert(numInstsInROB == countInsts()); + + DynInstPtr head_inst = cpu->instList.front(); + + return head_inst->readPC(); +} + +template <class Impl> +uint64_t +ROB<Impl>::readHeadNextPC() +{ + assert(numInstsInROB == countInsts()); + + DynInstPtr head_inst = cpu->instList.front(); + + return head_inst->readNextPC(); +} + +template <class Impl> +InstSeqNum +ROB<Impl>::readHeadSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + DynInstPtr head_inst = cpu->instList.front(); + + return head_inst->seqNum; +} + +template <class Impl> +uint64_t +ROB<Impl>::readTailPC() +{ + assert(numInstsInROB == countInsts()); + + assert(tail != cpu->instList.end()); + + return (*tail)->readPC(); +} + +template <class Impl> +InstSeqNum +ROB<Impl>::readTailSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + return (*tail)->seqNum; +} + +#endif // __ROB_IMPL_HH__ diff --git a/cpu/beta_cpu/store_set.cc b/cpu/beta_cpu/store_set.cc new file mode 100644 index 000000000..a5458685d --- /dev/null +++ b/cpu/beta_cpu/store_set.cc @@ -0,0 +1,254 @@ +#include "cpu/beta_cpu/store_set.hh" +#include "base/trace.hh" + +StoreSet::StoreSet(int _SSIT_size, int _LFST_size) + : SSIT_size(_SSIT_size), LFST_size(_LFST_size) +{ + DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); + DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", + SSIT_size, LFST_size); + + SSIT = new SSID[SSIT_size]; + + validSSIT.resize(SSIT_size); + + for (int i = 0; i < SSIT_size; ++i) + validSSIT[i] = false; + + LFST = new InstSeqNum[LFST_size]; + + validLFST.resize(LFST_size); + + SSCounters = new int[LFST_size]; + + for (int i = 0; i < LFST_size; ++i) + { + validLFST[i] = false; + SSCounters[i] = 0; + } + + index_mask = SSIT_size - 1; + + offset_bits = 2; +} + +void +StoreSet::violation(Addr store_PC, Addr load_PC) +{ + int load_index = calcIndex(load_PC); + int store_index = calcIndex(store_PC); + + assert(load_index < SSIT_size && store_index < SSIT_size); + + bool valid_load_SSID = validSSIT[load_index]; + bool valid_store_SSID = validSSIT[store_index]; + + if (!valid_load_SSID && !valid_store_SSID) { + // Calculate a new SSID here. + SSID new_set = calcSSID(load_PC); + + validSSIT[load_index] = true; + + SSIT[load_index] = new_set; + + validSSIT[store_index] = true; + + SSIT[store_index] = new_set; + + assert(new_set < LFST_size); + + SSCounters[new_set]++; + + + DPRINTF(StoreSet, "StoreSet: Neither load nor store had a valid " + "storeset, creating a new one: %i for load %#x, store %#x\n", + new_set, load_PC, store_PC); + } else if (valid_load_SSID && !valid_store_SSID) { + SSID load_SSID = SSIT[load_index]; + + validSSIT[store_index] = true; + + SSIT[store_index] = load_SSID; + + assert(load_SSID < LFST_size); + + SSCounters[load_SSID]++; + + DPRINTF(StoreSet, "StoreSet: Load had a valid store set. Adding " + "store to that set: %i for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); + } else if (!valid_load_SSID && valid_store_SSID) { + SSID store_SSID = SSIT[store_index]; + + validSSIT[load_index] = true; + + SSIT[load_index] = store_SSID; + + // Because we are having a load point to an already existing set, + // the size of the store set is not incremented. + + DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for " + "load %#x, store %#x\n", + store_SSID, load_PC, store_PC); + } else { + SSID load_SSID = SSIT[load_index]; + SSID store_SSID = SSIT[store_index]; + + assert(load_SSID < LFST_size && store_SSID < LFST_size); + + int load_SS_size = SSCounters[load_SSID]; + int store_SS_size = SSCounters[store_SSID]; + + // If the load has the bigger store set, then assign the store + // to the same store set as the load. Otherwise vice-versa. + if (load_SS_size > store_SS_size) { + SSIT[store_index] = load_SSID; + + SSCounters[load_SSID]++; + SSCounters[store_SSID]--; + + DPRINTF(StoreSet, "StoreSet: Load had bigger store set: %i; " + "for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); + } else { + SSIT[load_index] = store_SSID; + + SSCounters[store_SSID]++; + SSCounters[load_SSID]--; + + DPRINTF(StoreSet, "StoreSet: Store had bigger store set: %i; " + "for load %#x, store %#x\n", + store_SSID, load_PC, store_PC); + } + } +} + +void +StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num) +{ + // Does nothing. + return; +} + +void +StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num) +{ + int index = calcIndex(store_PC); + + int store_SSID; + + assert(index < SSIT_size); + + if (!validSSIT[index]) { + // Do nothing if there's no valid entry. + return; + } else { + store_SSID = SSIT[index]; + + assert(store_SSID < LFST_size); + + // Update the last store that was fetched with the current one. + LFST[store_SSID] = store_seq_num; + + validLFST[store_SSID] = 1; + + DPRINTF(StoreSet, "Store %#x updated the LFST, SSID: %i\n", + store_PC, store_SSID); + } +} + +InstSeqNum +StoreSet::checkInst(Addr PC) +{ + int index = calcIndex(PC); + + int inst_SSID; + + assert(index < SSIT_size); + + if (!validSSIT[index]) { + DPRINTF(StoreSet, "Inst %#x with index %i had no SSID\n", + PC, index); + + // Return 0 if there's no valid entry. + return 0; + } else { + inst_SSID = SSIT[index]; + + assert(inst_SSID < LFST_size); + + if (!validLFST[inst_SSID]) { + + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no " + "dependency\n", PC, index, inst_SSID); + + return 0; + } else { + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had LFST " + "inum of %i\n", PC, index, inst_SSID, LFST[inst_SSID]); + + return LFST[inst_SSID]; + } + } +} + +void +StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) +{ + // This only is updated upon a store being issued. + if (!is_store) { + return; + } + + int index = calcIndex(issued_PC); + + int store_SSID; + + assert(index < SSIT_size); + + // Make sure the SSIT still has a valid entry for the issued store. + if (!validSSIT[index]) { + return; + } + + store_SSID = SSIT[index]; + + assert(store_SSID < LFST_size); + + // If the last fetched store in the store set refers to the store that + // was just issued, then invalidate the entry. + if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) { + DPRINTF(StoreSet, "StoreSet: store invalidated itself in LFST.\n"); + validLFST[store_SSID] = false; + } +} + +void +StoreSet::squash(InstSeqNum squashed_num) +{ + // Not really sure how to do this well. + // Generally this is small enough that it should be okay; short circuit + // evaluation should take care of invalid entries. + + DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n", + squashed_num); + + for (int i = 0; i < LFST_size; ++i) { + if (validLFST[i] && LFST[i] < squashed_num) { + validLFST[i] = false; + } + } +} + +void +StoreSet::clear() +{ + for (int i = 0; i < SSIT_size; ++i) { + validSSIT[i] = false; + } + + for (int i = 0; i < LFST_size; ++i) { + validLFST[i] = false; + } +} + diff --git a/cpu/beta_cpu/store_set.hh b/cpu/beta_cpu/store_set.hh new file mode 100644 index 000000000..b634a180d --- /dev/null +++ b/cpu/beta_cpu/store_set.hh @@ -0,0 +1,58 @@ +#ifndef __STORE_SET_HH__ +#define __STORE_SET_HH__ + +#include <vector> + +#include "arch/alpha/isa_traits.hh" +#include "cpu/inst_seq.hh" + +class StoreSet +{ + public: + typedef unsigned SSID; + + public: + StoreSet(int SSIT_size, int LFST_size); + + void violation(Addr store_PC, Addr load_PC); + + void insertLoad(Addr load_PC, InstSeqNum load_seq_num); + + void insertStore(Addr store_PC, InstSeqNum store_seq_num); + + InstSeqNum checkInst(Addr PC); + + void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store); + + void squash(InstSeqNum squashed_num); + + void clear(); + + private: + inline int calcIndex(Addr PC) + { return (PC >> offset_bits) & index_mask; } + + inline SSID calcSSID(Addr PC) + { return ((PC ^ (PC >> 10)) % LFST_size); } + + SSID *SSIT; + + std::vector<bool> validSSIT; + + InstSeqNum *LFST; + + std::vector<bool> validLFST; + + int *SSCounters; + + int SSIT_size; + + int LFST_size; + + int index_mask; + + // HACK: Hardcoded for now. + int offset_bits; +}; + +#endif // __STORE_SET_HH__ diff --git a/cpu/beta_cpu/tournament_pred.cc b/cpu/beta_cpu/tournament_pred.cc new file mode 100644 index 000000000..53a11326a --- /dev/null +++ b/cpu/beta_cpu/tournament_pred.cc @@ -0,0 +1,243 @@ +#include "cpu/beta_cpu/tournament_pred.hh" + +TournamentBP::SatCounter::SatCounter(unsigned bits) + : maxVal((1 << bits) - 1), counter(0) +{ +} + +TournamentBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val) + : maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { + panic("BP: Initial counter value exceeds max size."); + } +} + +void +TournamentBP::SatCounter::increment() +{ + if (counter < maxVal) { + ++counter; + } +} + +void +TournamentBP::SatCounter::decrement() +{ + if (counter > 0) { + --counter; + } +} + +TournamentBP::TournamentBP(unsigned _local_predictor_size, + unsigned _local_ctr_bits, + unsigned _local_history_table_size, + unsigned _local_history_bits, + unsigned _global_predictor_size, + unsigned _global_ctr_bits, + unsigned _global_history_bits, + unsigned _choice_predictor_size, + unsigned _choice_ctr_bits, + unsigned _instShiftAmt) + : local_predictor_size(_local_predictor_size), + local_ctr_bits(_local_ctr_bits), + local_history_table_size(_local_history_table_size), + local_history_bits(_local_history_bits), + global_predictor_size(_global_predictor_size), + global_ctr_bits(_global_ctr_bits), + global_history_bits(_global_history_bits), + choice_predictor_size(_global_predictor_size), + choice_ctr_bits(_choice_ctr_bits), + instShiftAmt(_instShiftAmt) +{ + //Should do checks here to make sure sizes are correct (powers of 2) + + //Setup the array of counters for the local predictor + local_ctrs = new SatCounter[local_predictor_size](local_ctr_bits); + //Setup the history table for the local table + local_history_table = new unsigned[local_history_table_size](0); + // Setup the local history mask + localHistoryMask = (1 << local_history_bits) - 1; + + //Setup the array of counters for the global predictor + global_ctrs = new SatCounter[global_predictor_size](global_ctr_bits); + //Clear the global history + global_history = 0; + // Setup the global history mask + globalHistoryMask = (1 << global_history_bits) - 1; + + //Setup the array of counters for the choice predictor + choice_ctrs = new SatCounter[choice_predictor_size](choice_ctr_bits); + + threshold = (1 << (local_ctr_bits - 1)) - 1; + threshold = threshold / 2; +} + +inline +unsigned +TournamentBP::calcLocHistIdx(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & (local_history_table_size - 1); +} + +inline +void +TournamentBP::updateHistoriesTaken(unsigned local_history_idx) +{ + global_history = (global_history << 1) | 1; + global_history = global_history & globalHistoryMask; + + local_history_table[local_history_idx] = + (local_history_table[local_history_idx] << 1) | 1; +} + +inline +void +TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx) +{ + global_history = (global_history << 1); + global_history = global_history & globalHistoryMask; + + local_history_table[local_history_idx] = + (local_history_table[local_history_idx] << 1); +} + +bool +TournamentBP::lookup(Addr &branch_addr) +{ + uint8_t local_prediction; + unsigned local_history_idx; + unsigned local_predictor_idx; + + uint8_t global_prediction; + uint8_t choice_prediction; + + //Lookup in the local predictor to get its branch prediction + local_history_idx = calcLocHistIdx(branch_addr); + local_predictor_idx = local_history_table[local_history_idx] + & localHistoryMask; + local_prediction = local_ctrs[local_predictor_idx].read(); + + //Lookup in the global predictor to get its branch prediction + global_prediction = global_ctrs[global_history].read(); + + //Lookup in the choice predictor to see which one to use + choice_prediction = choice_ctrs[global_history].read(); + + //@todo Put a threshold value in for the three predictors that can + // be set through the constructor (so this isn't hard coded). + //Also should put some of this code into functions. + if (choice_prediction > threshold) { + if (global_prediction > threshold) { + updateHistoriesTaken(local_history_idx); + + assert(global_history < global_predictor_size && + local_history_idx < local_predictor_size); + + global_ctrs[global_history].increment(); + local_ctrs[local_history_idx].increment(); + + return true; + } else { + updateHistoriesNotTaken(local_history_idx); + + assert(global_history < global_predictor_size && + local_history_idx < local_predictor_size); + + global_ctrs[global_history].decrement(); + local_ctrs[local_history_idx].decrement(); + + return false; + } + } else { + if (local_prediction > threshold) { + updateHistoriesTaken(local_history_idx); + + assert(global_history < global_predictor_size && + local_history_idx < local_predictor_size); + + global_ctrs[global_history].increment(); + local_ctrs[local_history_idx].increment(); + + return true; + } else { + updateHistoriesNotTaken(local_history_idx); + + assert(global_history < global_predictor_size && + local_history_idx < local_predictor_size); + + global_ctrs[global_history].decrement(); + local_ctrs[local_history_idx].decrement(); + + return false; + } + } +} + +// Update the branch predictor if it predicted a branch wrong. +void +TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken) +{ + + uint8_t local_prediction; + unsigned local_history_idx; + unsigned local_predictor_idx; + bool local_pred_taken; + + uint8_t global_prediction; + bool global_pred_taken; + + // Load the correct global history into the register. + global_history = correct_gh; + + // Get the local predictor's current prediction, remove the incorrect + // update, and update the local predictor + local_history_idx = calcLocHistIdx(branch_addr); + local_predictor_idx = local_history_table[local_history_idx]; + local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask; + + local_prediction = local_ctrs[local_predictor_idx].read(); + local_pred_taken = local_prediction > threshold; + + //Get the global predictor's current prediction, and update the + //global predictor + global_prediction = global_ctrs[global_history].read(); + global_pred_taken = global_prediction > threshold; + + //Update the choice predictor to tell it which one was correct + if (local_pred_taken != global_pred_taken) { + //If the local prediction matches the actual outcome, decerement + //the counter. Otherwise increment the counter. + if (local_pred_taken == taken) { + choice_ctrs[global_history].decrement(); + } else { + choice_ctrs[global_history].increment(); + } + } + + if (taken) { + assert(global_history < global_predictor_size && + local_predictor_idx < local_predictor_size); + + local_ctrs[local_predictor_idx].increment(); + global_ctrs[global_history].increment(); + + global_history = (global_history << 1) | 1; + global_history = global_history & globalHistoryMask; + + local_history_table[local_history_idx] |= 1; + } + else { + assert(global_history < global_predictor_size && + local_predictor_idx < local_predictor_size); + + local_ctrs[local_predictor_idx].decrement(); + global_ctrs[global_history].decrement(); + + global_history = (global_history << 1); + global_history = global_history & globalHistoryMask; + + local_history_table[local_history_idx] &= ~1; + } +} diff --git a/cpu/beta_cpu/tournament_pred.hh b/cpu/beta_cpu/tournament_pred.hh new file mode 100644 index 000000000..bf87d753b --- /dev/null +++ b/cpu/beta_cpu/tournament_pred.hh @@ -0,0 +1,160 @@ +#ifndef __TOURNAMENT_PRED_HH__ +#define __TOURNAMENT_PRED_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +class TournamentBP +{ + public: + /** + * Default branch predictor constructor. + */ + TournamentBP(unsigned local_predictor_size, + unsigned local_ctr_bits, + unsigned local_history_table_size, + unsigned local_history_bits, + unsigned global_predictor_size, + unsigned global_history_bits, + unsigned global_ctr_bits, + unsigned choice_predictor_size, + unsigned choice_ctr_bits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, unsigned global_history, bool taken); + + inline unsigned readGlobalHist() { return global_history; } + + private: + + inline bool getPrediction(uint8_t &count); + + inline unsigned calcLocHistIdx(Addr &branch_addr); + + inline void updateHistoriesTaken(unsigned local_history_idx); + + inline void updateHistoriesNotTaken(unsigned local_history_idx); + + /** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ + class SatCounter + { + public: + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ + SatCounter(unsigned bits); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ + SatCounter(unsigned bits, unsigned initial_val); + + /** + * Increments the counter's current value. + */ + void increment(); + + /** + * Decrements the counter's current value. + */ + void decrement(); + + /** + * Read the counter's value. + */ + uint8_t read() + { + return counter; + } + + private: + uint8_t maxVal; + uint8_t counter; + }; + + /** Local counters. */ + SatCounter *local_ctrs; + + /** Size of the local predictor. */ + unsigned local_predictor_size; + + /** Number of bits of the local predictor's counters. */ + unsigned local_ctr_bits; + + /** Array of local history table entries. */ + unsigned *local_history_table; + + /** Size of the local history table. */ + unsigned local_history_table_size; + + /** Number of bits for each entry of the local history table. + * @todo Doesn't this come from the size of the local predictor? + */ + unsigned local_history_bits; + + /** Mask to get the proper local history. */ + unsigned localHistoryMask; + + + /** Array of counters that make up the global predictor. */ + SatCounter *global_ctrs; + + /** Size of the global predictor. */ + unsigned global_predictor_size; + + /** Number of bits of the global predictor's counters. */ + unsigned global_ctr_bits; + + /** Global history register. */ + unsigned global_history; + + /** Number of bits for the global history. */ + unsigned global_history_bits; + + /** Mask to get the proper global history. */ + unsigned globalHistoryMask; + + + /** Array of counters that make up the choice predictor. */ + SatCounter *choice_ctrs; + + /** Size of the choice predictor (identical to the global predictor). */ + unsigned choice_predictor_size; + + /** Number of bits of the choice predictor's counters. */ + unsigned choice_ctr_bits; + + /** Number of bits to shift the instruction over to get rid of the word + * offset. + */ + unsigned instShiftAmt; + + /** Threshold for the counter value; above the threshold is taken, + * equal to or below the threshold is not taken. + */ + unsigned threshold; +}; + +#endif // __TOURNAMENT_PRED_HH__ |