From 7940c10ace28d5b93a61d4d278e6647e0c497149 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 2 Jun 2006 18:15:20 -0400 Subject: Fixes to get compiling to work. This is mainly fixing up some includes; changing functions within the XCs; changing MemReqPtrs to Requests or Packets where appropriate. Currently the O3 and Ozone CPUs do not work in the new memory system; I still need to fix up the ports to work and handle responses properly. This check-in is so that the merge between m5 and newmem is no longer outstanding. src/SConscript: Need to include FU Pool for new CPU model. I'll try to figure out a cleaner way to handle this in the future. src/base/traceflags.py: Include new traces flags, fix up merge mess up. src/cpu/SConscript: Include the base_dyn_inst.cc as one of othe sources. Don't compile the Ozone CPU for now. src/cpu/base.cc: Remove an extra } from the merge. src/cpu/base_dyn_inst.cc: Fixes to make compiling work. Don't instantiate the OzoneCPU for now. src/cpu/base_dyn_inst.hh: src/cpu/o3/2bit_local_pred.cc: src/cpu/o3/alpha_cpu_builder.cc: src/cpu/o3/alpha_cpu_impl.hh: src/cpu/o3/alpha_dyn_inst.hh: src/cpu/o3/alpha_params.hh: src/cpu/o3/bpred_unit.cc: src/cpu/o3/btb.hh: src/cpu/o3/commit.hh: src/cpu/o3/commit_impl.hh: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/fetch.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/free_list.hh: src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/regfile.hh: src/cpu/o3/sat_counter.hh: src/cpu/op_class.hh: src/cpu/ozone/cpu.hh: src/cpu/checker/cpu.cc: src/cpu/checker/cpu.hh: src/cpu/checker/exec_context.hh: src/cpu/checker/o3_cpu_builder.cc: src/cpu/ozone/cpu_impl.hh: src/mem/request.hh: src/cpu/o3/fu_pool.hh: src/cpu/o3/lsq.hh: src/cpu/o3/lsq_unit.hh: src/cpu/o3/lsq_unit_impl.hh: src/cpu/o3/thread_state.hh: src/cpu/ozone/back_end.hh: src/cpu/ozone/dyn_inst.cc: src/cpu/ozone/dyn_inst.hh: src/cpu/ozone/front_end.hh: src/cpu/ozone/inorder_back_end.hh: src/cpu/ozone/lw_back_end.hh: src/cpu/ozone/lw_lsq.hh: src/cpu/ozone/ozone_impl.hh: src/cpu/ozone/thread_state.hh: Fixes to get compiling to work. src/cpu/o3/alpha_cpu.hh: Fixes to get compiling to work. Float reg accessors have changed, as well as MemReqPtrs to RequestPtrs. src/cpu/o3/alpha_dyn_inst_impl.hh: Fixes to get compiling to work. Pass in the packet to the completeAcc function. Fix up syscall function. --HG-- rename : cpu/activity.cc => src/cpu/activity.cc rename : cpu/activity.hh => src/cpu/activity.hh rename : cpu/checker/cpu.cc => src/cpu/checker/cpu.cc rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh rename : cpu/checker/cpu_builder.cc => src/cpu/checker/cpu_builder.cc rename : cpu/checker/exec_context.hh => src/cpu/checker/exec_context.hh rename : cpu/checker/o3_cpu_builder.cc => src/cpu/checker/o3_cpu_builder.cc rename : cpu/o3/dep_graph.hh => src/cpu/o3/dep_graph.hh rename : cpu/o3/fu_pool.cc => src/cpu/o3/fu_pool.cc rename : cpu/o3/fu_pool.hh => src/cpu/o3/fu_pool.hh rename : cpu/o3/lsq.cc => src/cpu/o3/lsq.cc rename : cpu/o3/lsq.hh => src/cpu/o3/lsq.hh rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh rename : cpu/o3/lsq_unit.cc => src/cpu/o3/lsq_unit.cc rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh rename : cpu/o3/scoreboard.cc => src/cpu/o3/scoreboard.cc rename : cpu/o3/scoreboard.hh => src/cpu/o3/scoreboard.hh rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh rename : cpu/ozone/back_end.cc => src/cpu/ozone/back_end.cc rename : cpu/ozone/back_end.hh => src/cpu/ozone/back_end.hh rename : cpu/ozone/back_end_impl.hh => src/cpu/ozone/back_end_impl.hh rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc rename : cpu/ozone/dyn_inst.cc => src/cpu/ozone/dyn_inst.cc rename : cpu/ozone/dyn_inst.hh => src/cpu/ozone/dyn_inst.hh rename : cpu/ozone/dyn_inst_impl.hh => src/cpu/ozone/dyn_inst_impl.hh rename : cpu/ozone/front_end.cc => src/cpu/ozone/front_end.cc rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh rename : cpu/ozone/inorder_back_end.cc => src/cpu/ozone/inorder_back_end.cc rename : cpu/ozone/inorder_back_end.hh => src/cpu/ozone/inorder_back_end.hh rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh rename : cpu/ozone/inst_queue.cc => src/cpu/ozone/inst_queue.cc rename : cpu/ozone/inst_queue.hh => src/cpu/ozone/inst_queue.hh rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh rename : cpu/ozone/lsq_unit.cc => src/cpu/ozone/lsq_unit.cc rename : cpu/ozone/lsq_unit.hh => src/cpu/ozone/lsq_unit.hh rename : cpu/ozone/lsq_unit_impl.hh => src/cpu/ozone/lsq_unit_impl.hh rename : cpu/ozone/lw_back_end.cc => src/cpu/ozone/lw_back_end.cc rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh rename : cpu/ozone/lw_lsq.cc => src/cpu/ozone/lw_lsq.cc rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh rename : cpu/ozone/null_predictor.hh => src/cpu/ozone/null_predictor.hh rename : cpu/ozone/ozone_impl.hh => src/cpu/ozone/ozone_impl.hh rename : cpu/ozone/rename_table.cc => src/cpu/ozone/rename_table.cc rename : cpu/ozone/rename_table.hh => src/cpu/ozone/rename_table.hh rename : cpu/ozone/rename_table_impl.hh => src/cpu/ozone/rename_table_impl.hh rename : cpu/ozone/simple_impl.hh => src/cpu/ozone/simple_impl.hh rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh rename : cpu/quiesce_event.cc => src/cpu/quiesce_event.cc rename : cpu/quiesce_event.hh => src/cpu/quiesce_event.hh rename : cpu/thread_state.hh => src/cpu/thread_state.hh rename : python/m5/objects/FUPool.py => src/python/m5/objects/FUPool.py rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py rename : python/m5/objects/SimpleOzoneCPU.py => src/python/m5/objects/SimpleOzoneCPU.py extra : convert_revision : ca7f0fbf65ee1a70d482fb4eda9a1840c7f9b8f8 --- src/cpu/ozone/back_end.hh | 514 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 514 insertions(+) create mode 100644 src/cpu/ozone/back_end.hh (limited to 'src/cpu/ozone/back_end.hh') diff --git a/src/cpu/ozone/back_end.hh b/src/cpu/ozone/back_end.hh new file mode 100644 index 000000000..63823363e --- /dev/null +++ b/src/cpu/ozone/back_end.hh @@ -0,0 +1,514 @@ + +#ifndef __CPU_OZONE_BACK_END_HH__ +#define __CPU_OZONE_BACK_END_HH__ + +#include +#include +#include + +#include "arch/faults.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" +#include "mem/request.hh" +#include "sim/eventq.hh" + +class ExecContext; + +template +class OzoneThreadState; + +template +class BackEnd +{ + public: + typedef OzoneThreadState Thread; + + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::FrontEnd FrontEnd; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + struct SizeStruct { + int size; + }; + + typedef SizeStruct DispatchToIssue; + typedef SizeStruct IssueToExec; + typedef SizeStruct ExecToCommit; + typedef SizeStruct Writeback; + + TimeBuffer d2i; + typename TimeBuffer::wire instsToDispatch; + TimeBuffer i2e; + typename TimeBuffer::wire instsToExecute; + TimeBuffer e2c; + TimeBuffer numInstsToWB; + + TimeBuffer *comm; + typename TimeBuffer::wire toIEW; + typename TimeBuffer::wire fromCommit; + + class InstQueue { + enum queue { + NonSpec, + IQ, + ToBeScheduled, + ReadyList, + ReplayList + }; + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + public: + InstQueue(Params *params); + + std::string name() const; + + void regStats(); + + void setIssueExecQueue(TimeBuffer *i2e_queue); + + void setBE(BackEnd *_be) { be = _be; } + + void insert(DynInstPtr &inst); + + void scheduleReadyInsts(); + + void scheduleNonSpec(const InstSeqNum &sn); + + DynInstPtr getReadyInst(); + + void commit(const InstSeqNum &sn) {} + + void squash(const InstSeqNum &sn); + + int wakeDependents(DynInstPtr &inst); + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst); + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst); + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst); + + void violation(DynInstPtr &inst, DynInstPtr &violation) { } + + bool isFull() { return numInsts >= size; } + + void dumpInsts(); + + private: + bool find(queue q, typename std::list::iterator it); + BackEnd *be; + TimeBuffer *i2e; + typename TimeBuffer::wire numIssued; + typedef typename std::list InstList; + typedef typename std::list::iterator InstListIt; + typedef typename std::priority_queue, pqCompare> ReadyInstQueue; + // Not sure I need the IQ list; it just needs to be a count. + InstList iq; + InstList toBeScheduled; + InstList readyList; + InstList nonSpec; + InstList replayList; + ReadyInstQueue readyQueue; + public: + int size; + int numInsts; + int width; + + Stats::VectorDistribution<> occ_dist; + + Stats::Vector<> inst_count; + Stats::Vector<> peak_inst_count; + Stats::Scalar<> empty_count; + Stats::Scalar<> current_count; + Stats::Scalar<> fullCount; + + Stats::Formula occ_rate; + Stats::Formula avg_residency; + Stats::Formula empty_rate; + Stats::Formula full_rate; + }; + + /** LdWriteback event for a load completion. */ + class LdWritebackEvent : public Event { + private: + /** Instruction that is writing back data to the register file. */ + DynInstPtr inst; + /** Pointer to IEW stage. */ + BackEnd *be; + + public: + /** Constructs a load writeback event. */ + LdWritebackEvent(DynInstPtr &_inst, BackEnd *be); + + /** Processes writeback event. */ + virtual void process(); + /** Returns the description of the writeback event. */ + virtual const char *description(); + }; + + BackEnd(Params *params); + + std::string name() const; + + void regStats(); + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setFrontEnd(FrontEnd *front_end_ptr) + { frontEnd = front_end_ptr; } + + void setXC(ExecContext *xc_ptr) + { xc = xc_ptr; } + + void setThreadState(Thread *thread_ptr) + { thread = thread_ptr; } + + void setCommBuffer(TimeBuffer *_comm); + + void tick(); + void squash(); + void squashFromXC(); + bool xcSquash; + + template + Fault read(RequestPtr req, T &data, int load_idx); + + template + Fault write(RequestPtr req, T &data, int store_idx); + + Addr readCommitPC() { return commitPC; } + + Addr commitPC; + + bool robEmpty() { return instList.empty(); } + + bool isFull() { return numInsts >= numROBEntries; } + bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; } + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst) + { IQ.rescheduleMemInst(inst); } + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst) + { IQ.replayMemInst(inst); } + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst) + { IQ.completeMemInst(inst); } + + void fetchFault(Fault &fault); + + private: + void updateStructures(); + void dispatchInsts(); + void dispatchStall(); + void checkDispatchStatus(); + void scheduleReadyInsts(); + void executeInsts(); + void commitInsts(); + void addToIQ(DynInstPtr &inst); + void addToLSQ(DynInstPtr &inst); + void instToCommit(DynInstPtr &inst); + void writebackInsts(); + bool commitInst(int inst_num); + void squash(const InstSeqNum &sn); + void squashDueToBranch(DynInstPtr &inst); + void squashDueToMemBlocked(DynInstPtr &inst); + void updateExeInstStats(DynInstPtr &inst); + void updateComInstStats(DynInstPtr &inst); + + public: + FullCPU *cpu; + + FrontEnd *frontEnd; + + ExecContext *xc; + + Thread *thread; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissComplete, + Blocked + }; + + Status status; + + Status dispatchStatus; + + Counter funcExeInst; + + private: +// typedef typename Impl::InstQueue InstQueue; + + InstQueue IQ; + + typedef typename Impl::LdstQueue LdstQueue; + + LdstQueue LSQ; + public: + RenameTable commitRenameTable; + + RenameTable renameTable; + private: + class DCacheCompletionEvent : public Event + { + private: + BackEnd *be; + + public: + DCacheCompletionEvent(BackEnd *_be); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + DCacheCompletionEvent cacheCompletionEvent; + + MemInterface *dcacheInterface; + + Request *memReq; + + // General back end width. Used if the more specific isn't given. + int width; + + // Dispatch width. + int dispatchWidth; + int numDispatchEntries; + int dispatchSize; + + int issueWidth; + + // Writeback width + int wbWidth; + + // Commit width + int commitWidth; + + /** Index into queue of instructions being written back. */ + unsigned wbNumInst; + + /** Cycle number within the queue of instructions being written + * back. Used in case there are too many instructions writing + * back at the current cycle and writesbacks need to be scheduled + * for the future. See comments in instToCommit(). + */ + unsigned wbCycle; + + int numROBEntries; + int numInsts; + + bool squashPending; + InstSeqNum squashSeqNum; + Addr squashNextPC; + + Fault faultFromFetch; + + private: + typedef typename std::list::iterator InstListIt; + + std::list instList; + std::list dispatch; + std::list writeback; + + int latency; + + int squashLatency; + + bool exactFullStall; + + bool fetchRedirect[Impl::MaxThreads]; + + // number of cycles stalled for D-cache misses +/* Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; +*/ + Stats::Vector<> rob_cap_events; + Stats::Vector<> rob_cap_inst_count; + Stats::Vector<> iq_cap_events; + Stats::Vector<> iq_cap_inst_count; + // total number of instructions executed + Stats::Vector<> exe_inst; + Stats::Vector<> exe_swp; + Stats::Vector<> exe_nop; + Stats::Vector<> exe_refs; + Stats::Vector<> exe_loads; + Stats::Vector<> exe_branches; + + Stats::Vector<> issued_ops; + + // total number of loads forwaded from LSQ stores + Stats::Vector<> lsq_forw_loads; + + // total number of loads ignored due to invalid addresses + Stats::Vector<> inv_addr_loads; + + // total number of software prefetches ignored due to invalid addresses + Stats::Vector<> inv_addr_swpfs; + // ready loads blocked due to memory disambiguation + Stats::Vector<> lsq_blocked_loads; + + Stats::Scalar<> lsqInversion; + + Stats::Vector<> n_issued_dist; + Stats::VectorDistribution<> issue_delay_dist; + + Stats::VectorDistribution<> queue_res_dist; +/* + Stats::Vector<> stat_fu_busy; + Stats::Vector2d<> stat_fuBusy; + Stats::Vector<> dist_unissued; + Stats::Vector2d<> stat_issued_inst_type; + + Stats::Formula misspec_cnt; + Stats::Formula misspec_ipc; + Stats::Formula issue_rate; + Stats::Formula issue_stores; + Stats::Formula issue_op_rate; + Stats::Formula fu_busy_rate; + Stats::Formula commit_stores; + Stats::Formula commit_ipc; + Stats::Formula commit_ipb; + Stats::Formula lsq_inv_rate; +*/ + Stats::Vector<> writeback_count; + Stats::Vector<> producer_inst; + Stats::Vector<> consumer_inst; + Stats::Vector<> wb_penalized; + + Stats::Formula wb_rate; + Stats::Formula wb_fanout; + Stats::Formula wb_penalized_rate; + + // total number of instructions committed + Stats::Vector<> stat_com_inst; + Stats::Vector<> stat_com_swp; + Stats::Vector<> stat_com_refs; + Stats::Vector<> stat_com_loads; + Stats::Vector<> stat_com_membars; + Stats::Vector<> stat_com_branches; + + Stats::Distribution<> n_committed_dist; + + Stats::Scalar<> commit_eligible_samples; + Stats::Vector<> commit_eligible; + + Stats::Scalar<> ROB_fcount; + Stats::Formula ROB_full_rate; + + Stats::Vector<> ROB_count; // cumulative ROB occupancy + Stats::Formula ROB_occ_rate; + Stats::VectorDistribution<> ROB_occ_dist; + public: + void dumpInsts(); +}; + +template +template +Fault +BackEnd::read(RequestPtr req, T &data, int load_idx) +{ +/* memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataReadReq(memReq); + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Read; + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT && dcacheInterface->doEvents()) { + // Fix this hack for keeping funcExeInst correct with loads that + // are executed twice. + --funcExeInst; + + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); +// status = DcacheMissStall; + DPRINTF(OzoneCPU, "Dcache miss stall!\n"); + } else { + // do functional access + fault = thread->mem->read(memReq, data); + + } + } +*/ +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return LSQ.read(req, data, load_idx); +} + +template +template +Fault +BackEnd::write(RequestPtr req, T &data, int store_idx) +{ +/* + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataWriteReq(memReq); + + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Write; + memcpy(memReq->data,(uint8_t *)&data,memReq->size); + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT && dcacheInterface->doEvents()) { + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); +// status = DcacheMissStall; + DPRINTF(OzoneCPU, "Dcache miss stall!\n"); + } + } + + if (res && (fault == NoFault)) + *res = memReq->result; + */ +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return LSQ.write(req, data, store_idx); +} + +#endif // __CPU_OZONE_BACK_END_HH__ -- cgit v1.2.3